| from smolagents import Tool |
| from typing import Any, Optional |
|
|
| class SimpleTool(Tool): |
| name = "fetch_lastest_news_titles_and_urls" |
| description = "This tool extracts the titles and URLs of the latest news articles from a news website's homepage." |
| inputs = {"url":{"type":"string","description":"The URL of the news website's homepage."}} |
| output_type = "array" |
|
|
| def forward(self, url: str) -> list[tuple[str, str]]: |
| """ |
| This tool extracts the titles and URLs of the latest news articles from a news website's homepage. |
| |
| Args: |
| url: The URL of the news website's homepage. |
| |
| Returns: |
| list[tuple[str, str]]: A list of titles and URLs of the latest news articles. |
| """ |
| import requests |
| from bs4 import BeautifulSoup |
|
|
| article_urls = [] |
| article_titles = [] |
| navigation_urls = [] |
|
|
| |
| response = requests.get(url) |
| soup = BeautifulSoup(response.text, 'html.parser') |
|
|
| |
| navigation_bar = soup.find('nav', class_='main-nav') |
| if navigation_bar: |
| |
| for header in navigation_bar.ul.find_all('li')[2:7]: |
| navigation_urls.append(url + header.a['href']) |
| |
| for section_url in navigation_urls: |
| response = requests.get(section_url) |
| section_soup = BeautifulSoup(response.text, 'html.parser') |
| |
| for article in section_soup.find_all('article')[:10]: |
| title_tag = article.find('h3', class_='title-news') |
| if title_tag: |
| title = title_tag.text.strip() |
| article_url = article.find('a')['href'] |
| article_titles.append(title) |
| article_urls.append(article_url) |
|
|
| return list(zip(article_titles, article_urls)) |