SachithRKA
/

ASL_AI_model

Model card Files Files and versions

ASL_AI_model / run.py

SachithRKA's picture

Upload 2 files

f853f39 verified about 2 years ago

History Blame Contribute Delete

3.58 kB

	import gradio as gr
	import google.generativeai as genai
	import os
	import markdown
	import cv2
	import numpy as np
	from tensorflow.keras.models import load_model
	import mediapipe as mp
	from dotenv import load_dotenv

	load_dotenv()
	genai.configure(api_key=os.environ.get("API_KEY"))

	# Setup the model
	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"top_k": 0,
	"max_output_tokens": 8192,
	}

	safety_settings = [
	{
	"category": "HARM_CATEGORY_HARASSMENT",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	{
	"category": "HARM_CATEGORY_HATE_SPEECH",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	{
	"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	{
	"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
	"threshold": "BLOCK_MEDIUM_AND_ABOVE"
	},
	]

	model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest",
	generation_config=generation_config,
	safety_settings=safety_settings)

	convo = model.start_chat(history=[])

	model = load_model('asl_landmark_mine_model_one.h5')


	def preprocess_image(img, target_size=(64, 64)):
	img = cv2.cvtColor(cv2.flip(img, 1), cv2.COLOR_BGR2RGB)
	img = cv2.resize(img, target_size)
	img = np.expand_dims(img, axis=0)
	img = img / 255.0
	return img


	def predict_asl_letter(image, model):
	img = preprocess_image(image)
	predictions = model.predict(img)
	predicted_class = np.argmax(predictions)
	asl_letter = chr(predicted_class + ord('A'))
	return asl_letter


	def asl_video():
	cap = cv2.VideoCapture(0)
	sentence = ""

	mp_hands = mp.solutions.hands
	mp_drawing = mp.solutions.drawing_utils
	hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	print("Ignoring empty camera frame.")
	continue

	frame.flags.writeable = False
	results = hands.process(frame)

	frame.flags.writeable = True
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	if results.multi_hand_landmarks:
	for hand_landmarks in results.multi_hand_landmarks:
	mp_drawing.draw_landmarks(
	frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

	asl_letter = predict_asl_letter(frame, model)
	cv2.putText(frame, "Predicted Letter: " + asl_letter, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0),
	2)
	sentence += asl_letter

	cv2.imshow('MediaPipe Hands', frame)
	if cv2.waitKey(5) & 0xFF == ord('q'):
	break

	cap.release()
	cv2.destroyAllWindows()
	return sentence


	def greet():
	global convo

	sentence = "Hello"
	expected = "Hello"
	questionsRight = 4
	numberOfQuestions = 10
	questionNumber = 5

	sentence = asl_video()
	prompt = f"Analyze the user's sign for \"{expected}\" and provide feedback. Did they sign it correctly? If not, explain what went wrong and how to improve. If they got it right, offer encouragement. The user's sign was \"{sentence}\". This is question number {questionNumber} out of {numberOfQuestions}, and the user has gotten {questionsRight} questions right so far."
	convo.send_message(prompt)
	result = markdown.markdown(convo.last.text)

	return result


	if __name__ == "__main__":
	gr.Interface(
	fn=greet,
	inputs=None,
	outputs="html"
	).launch()