import gradio as gr from huggingface_hub import InferenceClient from datasets import load_dataset from sentence_transformers import SentenceTransformer, util # Load the Indian law dataset from Hugging Face dataset = load_dataset("viber1/indian-law-dataset")['train'] # Initialize a sentence transformer for semantic similarity matching model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Precompute embeddings for all instructions (legal queries) in the dataset dataset_embeddings = model.encode([entry['Instruction'] for entry in dataset]) # Initialize the inference client with the fallback model (if needed) client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Encode the user message query_embedding = model.encode(message, convert_to_tensor=True) # Compute cosine similarity between the query and all dataset instructions similarities = util.pytorch_cos_sim(query_embedding, dataset_embeddings) # Find the highest similarity score highest_similarity_idx = similarities.argmax().item() highest_similarity_score = similarities[0, highest_similarity_idx].item() # Set a similarity threshold to determine if the query is legal similarity_threshold = 0.7 # Adjust based on experimentation if highest_similarity_score >= similarity_threshold: # Return the corresponding legal response if the query is similar enough response = dataset[highest_similarity_idx]['Response'] yield response else: # If no legal match is found, return a polite refusal yield "No legal information available for this query." # Define the Gradio Chat Interface and additional inputs demo = gr.ChatInterface( fn=respond, additional_inputs=[ gr.Textbox(value="You are an AI Legal Assistant for Indian law.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], title="AI Legal Assistant for Indian Law", description="Ask legal questions related to Indian laws and get precise answers. If the query is not related to legal matters, no information will be provided.", ) if __name__ == "__main__": demo.launch()