import datasets from langchain.docstore.document import Document from langchain_community.retrievers import BM25Retriever from langchain.tools import Tool # Load the dataset games_dataset = datasets.load_dataset("nateraw/list-of-bestselling-pc-games", split="train") # Convert dataset entries into Document objects docs = [ Document( page_content="\n".join([ f"Game: {game['Game']}", f"Total copies sold: {game['Total copies sold']}", f"Series: {game['Series'] if game['Series'] else 'N/A'}", f"Release date: {game['Release date']}", f"Genre(s): {game['Genre(s)']}", f"Developer(s): {game['Developer(s)']}", f"Publisher(s): {game['Publisher(s)']}" ]), metadata={"game": game["Game"]} ) for game in games_dataset ] bm25_retriever = BM25Retriever.from_documents(docs) def extract_text(query: str) -> str: """Retrieves detailed information about games based on their name or genre.""" results = bm25_retriever.invoke(query) if results: return "\n\n".join([doc.page_content for doc in results[:3]]) else: return "No matching game information found." game_info_tool = Tool( name="game_info_retriever", func=extract_text, description="Retrieves detailed information about games based on their name or relation." )