Spaces:

yutohub
/

japanese-chatbot-arena-leaderboard

Running

App Files Files Community

japanese-chatbot-arena-leaderboard / app.py

yutohub

Add data source

efece16 verified over 2 years ago

Raw

History Blame Contribute Delete

13.2 kB

	import json
	import os
	import random
	import time

	import pandas as pd
	import requests
	import streamlit as st


	# 環境変数
	with open("models_info.json", "r") as json_file:
	MODELS_INFO = json.load(json_file)
	with open("test.csv", "r") as file:
	QUESTION_DF = pd.read_csv(file)
	MODELS = list(MODELS_INFO.keys())
	NUM_QUESTION = 100


	# ランキングを取得
	def get_leaderboard():
	try:
	response = requests.get(os.environ['DARABASE_URL'])
	response_data = response.json()
	return response_data
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return "Error"

	# リーダーボードを作成
	def create_leaderboard_df():
	# リーダーボードを取得
	ranking = get_leaderboard()
	# エラー処理
	if ranking == "Error":
	st.error("リーダーボードを取得できませんでした。")
	print("リーダーボードを取得できませんでした。") # ログを表示
	return pd.DataFrame()
	else:
	# データの初期化
	ranks, model_names, ratings, organizations, licenses = [], [], [], [], []
	# リーダーボードの作成
	for i in range(len(ranking)):
	ranks.append(i + 1)
	model_names.append(MODELS_INFO[ranking[i]["model"]][0])
	ratings.append(ranking[i]["rating"])
	organizations.append(MODELS_INFO[ranking[i]["model"]][2])
	licenses.append(MODELS_INFO[ranking[i]["model"]][1])
	# データフレームを返す
	return pd.DataFrame({
	"ランク" : ranks,
	"🤖 モデル" : model_names,
	"⭐️ Eloレーティング" : ratings,
	"🏢 組織" : organizations,
	"📃 ライセンス" : licenses
	})

	# サーバーから回答を取得
	@st.cache_data
	def get_answer(model_name, question_id):
	try:
	params = {'modelName': model_name, 'questionId': question_id}
	response = requests.get(os.environ['ANSWER_URL'], params=params)
	response_data = response.json()
	return response_data["answer"]
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return "Error"

	# サーバーに回答を送信
	def send_choice(question_id, model_a, model_b, winner, language):
	# エラー処理 (データが入力されていない場合)
	if not question_id or not model_a or not model_b or not winner or not language:
	st.error("データが入力されていないため、回答を送信できませんでした。")
	print("質問と回答を取得してください。") # ログを表示
	return "Error"
	try:
	data = {
	"question_id": question_id,
	"model_a": model_a,
	"model_b": model_b,
	"winner": winner,
	"language": language,
	"tstamp": time.time(),
	}
	headers = {
	'Content-Type': 'application/json'
	}
	response = requests.post(os.environ['DARABASE_URL'], headers=headers, data=json.dumps(data))
	response_data = response.text
	return response_data
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return "Error"


	### Callback Functions ###
	# ステートの初期化を行う
	def handle_init_state():
	if "chat_history_a" not in st.session_state:
	st.session_state["chat_history_a"] = []
	if "chat_history_b" not in st.session_state:
	st.session_state["chat_history_b"] = []
	if "question_id" not in st.session_state:
	st.session_state["question_id"] = None
	if "model_a" not in st.session_state:
	st.session_state["model_a"] = None
	if "model_b" not in st.session_state:
	st.session_state["model_b"] = None
	if "question" not in st.session_state:
	st.session_state["question"] = None
	# ボタンの状態を初期化
	if "question_loaded" not in st.session_state:
	st.session_state["question_loaded"] = False
	# 送信を状態を初期化
	if "answer_sent" not in st.session_state:
	st.session_state["answer_sent"] = False

	# 質問と回答を取得する
	def handle_init_question():
	# エラー処理
	if st.session_state.question_loaded:
	st.session_state.question_loaded = False
	st.session_state.chat_history_a = []
	st.session_state.chat_history_b = []
	st.error("ボタンを連打しないでください。")
	print("既に質問と回答を取得しています。") # ログを表示
	else:
	# ボタンの状態を更新
	st.session_state.question_loaded = True
	st.success("質問と回答を取得しています。しばらくお待ちください。")
	# 質問を取得
	st.session_state.question_id = random.randint(1, NUM_QUESTION)
	st.session_state.question = QUESTION_DF["input"][st.session_state.question_id - 1]
	st.session_state.chat_history_a.append({"role": "user", "content": st.session_state.question})
	st.session_state.chat_history_b.append({"role": "user", "content": st.session_state.question})
	# 回答を取得
	random.shuffle(MODELS)
	st.session_state.model_a = MODELS[0]
	st.session_state.model_b = MODELS[1]
	answer_a = get_answer(st.session_state.model_a, st.session_state.question_id)
	answer_b = get_answer(st.session_state.model_b, st.session_state.question_id)
	# チャット履歴を更新
	st.session_state.chat_history_a.append({"role": "assistant", "content": answer_a})
	st.session_state.chat_history_b.append({"role": "assistant", "content": answer_b})
	st.success("質問と回答を取得しました。回答を選択してください。")
	print("質問と回答を取得しました。") # ログを表示

	# ユーザーの回答を送信する
	def handle_send_choice(winner):
	# エラー処理
	if st.session_state.answer_sent:
	st.error("既に回答を送信しています。")
	print("既に回答を送信しています。") # ログを表示
	else:
	# ボタンの状態を更新
	st.session_state.answer_sent = True
	# ユーザーの回答を送信
	response = send_choice(
	question_id=st.session_state.question_id,
	model_a=st.session_state.model_a,
	model_b=st.session_state.model_b,
	winner=winner,
	language="Japanese"
	)
	# エラーが発生した場合
	if response == "Error":
	st.error("予期せぬエラーが発生しました。")
	else:
	st.success("選択肢は正常に送信されました。")
	# 初期化
	st.session_state.question_loaded = False


	# 表示部分
	def main():
	# page config
	st.set_page_config(
	page_title="日本語チャットボットアリーナ",
	page_icon="🏆",
	layout="wide",
	)

	# ステートの初期化
	handle_init_state()
	# 説明を表示
	st.markdown("# 🏆 日本語チャットボットアリーナ")
	st.markdown("## 📖 説明")
	st.markdown("\| [Twitter](https://twitter.com/yutohub) \| [GitHub](https://github.com/yutohub) \| [ブログ](https://zenn.dev/yutohub) \|")
	st.markdown("日本語チャットボットアリーナは、日本語に対応しているLLMの評価のためのクラウドソーシングプラットフォームです。[LMSYS Chatbot Arena](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) を参考に、日本語に対応しているLLMのリーダーボードを作成することを目的としています。また、一部の質問と回答は、 [ELYZA-tasks-100](https://huggingface.co/datasets/elyza/ELYZA-tasks-100) や [Northern-System-Service/gpt4-autoeval](https://github.com/Northern-System-Service/gpt4-autoeval) を利用しています。")
	st.markdown(""" > 注意事項:
	>
	> 日本語チャットボットアリーナが提供する情報によって生じたいかなる損害についても、サービス提供者は一切の責任を負いません。
	> 日本語チャットボットアリーナは開発中であり、予告なく停止または終了する可能性があります。
	> また、ユーザーの回答を収集し、Creative Commons Attribution (CC-BY) または同様のライセンスの下で配布する権利を留保しています。
	""")

	# チャット履歴の表示部分
	st.markdown("## ⚔️ チャットボットアリーナ ⚔️")
	st.markdown(" 2つの匿名モデル (ChatGPT、Llama など) の回答を見て、より良いモデルに投票してください。")
	with st.expander(f"🔍 展開するとアリーナに参加している {len(MODELS)} 個のモデルの一覧が表示されます。"):
	st.write(MODELS)
	model_a, model_b = st.columns([1, 1])
	with model_a:
	st.markdown("### モデル A")
	if not st.session_state.chat_history_a:
	st.markdown("質問を取得してください。")
	else:
	for message in st.session_state.chat_history_a:
	with st.chat_message(message["role"]):
	st.write(message["content"])
	# 送信後に正解のモデルを表示する
	if st.session_state.answer_sent:
	with st.chat_message("assistant"):
	st.markdown(f"`{st.session_state.model_a}` が回答しました、")
	with model_b:
	st.markdown("### モデル B")
	if not st.session_state.chat_history_b:
	st.markdown("質問を取得してください。")
	else:
	for message in st.session_state.chat_history_b:
	with st.chat_message(message["role"]):
	st.write(message["content"])
	# 送信後に正解のモデルを表示する
	if st.session_state.answer_sent:
	with st.chat_message("assistant"):
	st.markdown(f"`{st.session_state.model_b}` が回答しました。")
	# 質問を取得する
	load_question = st.button(
	label="質問を取得",
	on_click=handle_init_question,
	# 回答済みの場合 or 質問を取得済の場合はボタンを無効化
	disabled=st.session_state.answer_sent or st.session_state.question_loaded,
	type="primary",
	use_container_width=True
	)
	# 回答を送信する
	choice_1, choice_2, choice_3, choice_4 = st.columns([1, 1, 1, 1])
	with choice_1:
	choice_1 = st.button(
	label="👈 Aの方が良い",
	on_click=handle_send_choice,
	args=("model_a",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)
	with choice_2:
	choice_2 = st.button(
	label="👉 Bの方が良い",
	on_click=handle_send_choice,
	args=("model_b",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)
	with choice_3:
	choice_3 = st.button(
	label="🤝 どちらも良い",
	on_click=handle_send_choice,
	args=("tie",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)
	with choice_4:
	choice_4 = st.button(
	label="👎 どちらも悪い",
	on_click=handle_send_choice,
	args=("tie (bothbad)",),
	disabled=not st.session_state.question_loaded,
	use_container_width=True
	)

	# リーダーボードを表示する
	st.markdown("## 🏆 リーダーボード")
	st.markdown(f"合計で {len(MODELS)} 個のモデルがアリーナに参加しています。30 分毎にリーダーボードが更新されます。")
	# 回答を送信した場合のみ表示する
	if st.session_state.answer_sent:
	# リーダーボードを取得
	leaderboard = create_leaderboard_df()
	st.dataframe(
	data=leaderboard,
	height=(len(MODELS) + 1) * 35 + 3,
	use_container_width=True,
	hide_index=True,
	)
	else:
	st.markdown("""
	> まずは、「⚔️ チャットボットアリーナ ⚔️」に回答を送信してください。
	> 回答を送信すると、リーダーボードが表示されます。
	""")

	# 引用を表示する
	st.markdown("## 📚 引用")
	st.markdown("""
	```
	@misc{elyzatasks100,
	title={ELYZA-tasks-100: 日本語instructionモデル評価データセット},
	url={https://huggingface.co/elyza/ELYZA-tasks-100},
	author={Akira Sasaki and Masato Hirakawa and Shintaro Horie and Tomoaki Nakamura},
	year={2023},
	}
	```

	[(c) 2023 Northern System Service Co., Ltd.](https://github.com/Northern-System-Service/gpt4-autoeval/blob/main/LICENSE)
	""")


	if __name__ == "__main__":
	main()