Spaces:

ruslanmv
/

Medical-Llama3-v2

Paused

App Files Files Community

ruslanmv commited on 20 days ago

Commit

77854f6

verified ·

1 Parent(s): 672d64d

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -84

app.py CHANGED Viewed

@@ -1,94 +1,227 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
-import spaces
-import os
 IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
 IS_SPACE = os.environ.get("SPACE_ID", None) is not None
-device = "cuda" if torch.cuda.is_available() else "cpu"
-LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
-print(f"Using device: {device}")
-print(f"low memory: {LOW_MEMORY}")
-# Define BitsAndBytesConfig
-bnb_config = BitsAndBytesConfig(load_in_4bit=True,
-                                bnb_4bit_quant_type="nf4",
-                                bnb_4bit_compute_dtype=torch.float16)
-# Model name
-model_name = "ruslanmv/Medical-Llama3-v2"
-# Load tokenizer and model with BitsAndBytesConfig
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, bnb_config=bnb_config)
-model = AutoModelForCausalLM.from_pretrained(model_name, config=bnb_config)
-# Ensure model is on the correct device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-@spaces.GPU
-# Define the respond function
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    # Format the conversation as a single string for the model
-    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512)
-    # Move inputs to device
-    input_ids = inputs['input_ids'].to(device)
-    attention_mask = inputs['attention_mask'].to(device)
-    # Generate the response
-    with torch.no_grad():
-        outputs = model.generate(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            max_length=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            use_cache=True
         )
-    # Extract the response
-    response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
-    # Remove the prompt and system message from the response
-    response_text = response_text.replace(system_message, '').strip()
-    response_text = response_text.replace(f"Human: {message}\n\nAssistant: ", '').strip()
-    return response_text
-# Create the Gradio interface
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a Medical AI Assistant. Please be thorough and provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.", label="System message", lines=3),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.8, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling)"),
-    ],
-    title="Medical AI Assistant",
-    description="Give me your symptoms and ask me a health problem. The AI will provide informative answers. If the AI doesn't know the answer, it will advise seeking professional help.",
-    examples=[["I'm a 35-year-old male and for the past few months, I've been experiencing fatigue, increased sensitivity to cold, and dry, itchy skin. Could these symptoms be related to hypothyroidism?"], ["I have a headache and a fever. What should I do?"], ["How can I improve my sleep?"]],
 )
 if __name__ == "__main__":
-    demo.launch()

+import os
 import gradio as gr
 import torch
+import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+model_name = "ruslanmv/Medical-Llama3-8B"
 IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
 IS_SPACE = os.environ.get("SPACE_ID", None) is not None
+print(f"Running in Hugging Face Space: {IS_SPACE}")
+print(f"Running with ZeroGPU: {IS_SPACES_ZERO}")
+print(f"CUDA available: {torch.cuda.is_available()}")
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    trust_remote_code=True,
+)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+model = None
+def load_model():
+    global model
+    if model is not None:
+        return model
+    print("Loading model...")
+    if torch.cuda.is_available():
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            quantization_config=quantization_config,
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+        )
+    else:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float32,
+            trust_remote_code=True,
+        )
+    model.eval()
+    print("Model loaded.")
+    return model
+@spaces.GPU(duration=120)
+def askme(symptoms, question):
+    try:
+        current_model = load_model()
+        sys_message = """
+You are an AI Medical Assistant trained on a vast dataset of health information.
+Please be thorough and provide an informative answer.
+If you don't know the answer to a specific medical inquiry, advise seeking professional help.
+Always remind users that your answer is not a substitute for professional medical advice.
+"""
+        symptoms = symptoms.strip() if symptoms else ""
+        question = question.strip() if question else ""
+        if not symptoms and not question:
+            return "Please enter your symptoms and/or medical question."
+        content = f"Symptoms: {symptoms}\n\nQuestion: {question}"
+        messages = [
+            {"role": "system", "content": sys_message},
+            {"role": "user", "content": content},
+        ]
+        prompt = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        inputs = tokenizer(
+            prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=2048,
         )
+        inputs = {
+            key: value.to(current_model.device)
+            for key, value in inputs.items()
+        }
+        with torch.no_grad():
+            outputs = current_model.generate(
+                **inputs,
+                max_new_tokens=300,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                repetition_penalty=1.1,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                use_cache=True,
+            )
+        generated_ids = outputs[0][inputs["input_ids"].shape[-1]:]
+        answer = tokenizer.decode(
+            generated_ids,
+            skip_special_tokens=True,
+        ).strip()
+        if not answer:
+            answer = "I could not generate a response. Please try rephrasing your question."
+        return answer
+    except Exception as e:
+        return f"Error: {type(e).__name__}: {str(e)}"
+symptoms_example = """
+I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,
+increased sensitivity to cold, and dry, itchy skin.
+"""
+question_example = """
+Could these symptoms be related to hypothyroidism?
+If so, what steps should I take to get a proper diagnosis and discuss treatment options?
+"""
+examples = [
+    [symptoms_example, question_example]
+]
+css = """
+.gradio-container {
+    font-family: "IBM Plex Sans", sans-serif;
+    background-color: #212529;
+    color: #fff;
+    background-image: url("https://huggingface.co/spaces/ruslanmv/AI-Medical-Chatbot/resolve/main/notebook/local/img/background.jpg");
+    background-size: cover;
+    background-position: center;
+}
+.gr-button {
+    color: white;
+    background: #007bff;
+    white-space: nowrap;
+    border: none;
+    padding: 10px 20px;
+    border-radius: 8px;
+    cursor: pointer;
+}
+.gr-button:hover {
+    background-color: #0056b3;
+}
+.gradio-textbox textarea {
+    background-color: #343a40;
+    color: #fff;
+    border-color: #343a40;
+    border-radius: 8px;
+}
+"""
+welcome_message = """
+# AI Medical Llama 3 Chatbot
+Ask any medical question by first giving your symptoms.
+Developed by Ruslan Magana. Visit [https://ruslanmv.com/](https://ruslanmv.com/) for more information.
+**Disclaimer:** This chatbot is for educational purposes only and is not a substitute for professional medical advice, diagnosis, or treatment.
+"""
+symptoms_input = gr.Textbox(
+    label="Symptoms",
+    placeholder="Enter your symptoms here...",
+    lines=6,
 )
+question_input = gr.Textbox(
+    label="Question",
+    placeholder="Enter your medical question here...",
+    lines=4,
+)
+answer_output = gr.Textbox(
+    label="Answer",
+    lines=12,
+)
+iface = gr.Interface(
+    fn=askme,
+    inputs=[symptoms_input, question_input],
+    outputs=answer_output,
+    examples=examples,
+    cache_examples=False,
+    css=css,
+    title="AI Medical Llama 3 Chatbot",
+    description=welcome_message,
+)
 if __name__ == "__main__":
+    iface.launch()