BirdScopeAI / bonus_ebird_tools.py
facemelter's picture
Initial commit to hf space for hackathon
ff0e97f verified
Raw
History Blame Contribute Delete
27.9 kB
"""
eBird MCP Server
Wraps eBird API v2 as reusable MCP tools
Runs locally with FastMCP and supports both stdio and streamable-http transport
Features:
- 7 core tools for bird data discovery
- Configurable tool enabling/disabling
- Support for both user input AND classifier output
- Rate limiting and error handling
- JSON responses for easy integration
- Dual transport: stdio for CLI, streamable-http for web clients (via FastAPI)
"""
import os
import sys
import requests
import json
import time
from typing import Optional, Dict, List, Any
from fastmcp import FastMCP
from dotenv import load_dotenv
# ============================================================================
# CONFIGURATION & SETUP
# ============================================================================
load_dotenv()
EBIRD_API_KEY = os.getenv("EBIRD_API_KEY")
BASE_URL = os.getenv("EBIRD_BASE_URL", "https://api.ebird.org/v2")
DEFAULT_TIMEOUT = 15
RATE_LIMIT_DELAY = 0.1 # 100ms between requests
if not EBIRD_API_KEY:
# Print to stderr to avoid corrupting STDIO MCP protocol (stdout must be JSON-RPC only)
print("⚠️ [WARNING]: EBIRD_API_KEY not found in .env", file=sys.stderr)
print(" Get one from: https://ebird.org/api/keygen", file=sys.stderr)
# Authentication configuration (production only)
IS_PRODUCTION = os.getenv("ENVIRONMENT") == "production"
MCP_API_KEY = os.getenv("MCP_API_KEY")
# Tool configuration - enable/disable as needed
ENABLED_TOOLS = {
"search_species": True,
"get_recent_sightings_nearby": True,
"find_hotspots_nearby": True,
"get_location_birds": True,
"get_species_info": True,
"get_notable_sightings": True,
"analyze_location": True,
}
# Initialize FastMCP server with optional auth
if IS_PRODUCTION and MCP_API_KEY:
# Production: Enable API key authentication
from fastmcp.server.auth.providers.debug import DebugTokenVerifier
auth = DebugTokenVerifier(
validate=lambda token: token == MCP_API_KEY,
client_id="ebird-mcp-client"
)
mcp = FastMCP("eBird Data Explorer", auth=auth)
else:
# Development: No authentication
mcp = FastMCP("eBird Data Explorer")
# Rate limiting tracker
_last_request_time = 0
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def _rate_limit():
"""Enforce rate limiting to avoid exceeding eBird's API limits"""
global _last_request_time
elapsed = time.time() - _last_request_time
if elapsed < RATE_LIMIT_DELAY:
time.sleep(RATE_LIMIT_DELAY - elapsed)
_last_request_time = time.time()
def _make_request(endpoint: str, params: Optional[Dict] = None) -> Optional[Dict]:
"""
Centralized request handler with error handling and rate limiting.
Args:
endpoint: API endpoint path (e.g., "/data/obs/geo/recent")
params: Query parameters dictionary
Returns:
JSON response data or None on error
"""
_rate_limit()
try:
headers = {"X-eBirdApiToken": EBIRD_API_KEY}
url = f"{BASE_URL}{endpoint}"
response = requests.get(
url,
headers=headers,
params=params or {},
timeout=DEFAULT_TIMEOUT
)
if response.status_code == 200:
return response.json()
elif response.status_code == 400:
print(f"❌ Bad Request ({url}): {response.text[:400]}", flush=True)
return None
elif response.status_code == 401:
print(f"❌ Unauthorized ({url}): Check your EBIRD_API_KEY - body={response.text[:400]}", flush=True)
return None
elif response.status_code == 404:
print(f"❌ Not found ({url}): Invalid endpoint or resource - body={response.text[:400]}", flush=True)
return None
else:
print(
f"❌ HTTP {response.status_code} for {url} "
f"params={params or {}} body={response.text[:400]}",
flush=True,
)
return None
except requests.Timeout:
print(f"❌ Request timeout after {DEFAULT_TIMEOUT}s for {endpoint}", flush=True)
return None
except requests.ConnectionError:
print(f"❌ Connection error calling {endpoint} - check network", flush=True)
return None
except Exception as e:
print(f"❌ Unexpected error calling {endpoint}: {str(e)}", flush=True)
return None
def _format_success_response(data: Any, **kwargs) -> str:
"""Format a successful response as JSON"""
response = {"status": "success", "data": data}
response.update(kwargs)
return json.dumps(response)
def _format_error_response(error: str) -> str:
"""Format an error response as JSON"""
return json.dumps({"status": "error", "error": error})
# ============================================================================
# TOOL 1: search_species
# ============================================================================
# Use case: User types "cardinal" or classifier returns "Northern Cardinal"
# This tool finds the species code needed for other tools
def search_species(search_term: str, max_results: int = 10) -> str:
"""
Search for bird species by common or scientific name.
This tool finds species codes needed for other lookups. Accepts:
- Common names: "cardinal", "blue jay", "bald eagle"
- Partial matches: "car" -> "Northern Cardinal", "Carolina Parakeet", etc.
- Scientific names: "Cardinalis cardinalis"
Can accept:
- User input: Direct species search
- Classifier output: e.g., "Northern Cardinal" from image classification
Args:
search_term: Bird name (common or scientific)
max_results: Maximum matches to return (default: 10)
Returns:
JSON with matched species and their codes for other tools
Example:
search_species("cardinal")
-> Returns all cardinals with species codes (norcar, carcar, etc.)
"""
if not search_term or len(search_term.strip()) < 2:
return _format_error_response("Search term must be at least 2 characters")
try:
endpoint = "/ref/taxonomy/ebird"
params = {"fmt": "json"}
data = _make_request(endpoint, params)
if not data:
return _format_error_response("Failed to fetch species database")
search_lower = search_term.lower()
# Filter: match in common name OR scientific name, main species only
matches = [
{
"common_name": s['comName'],
"scientific_name": s['sciName'],
"species_code": s['speciesCode'],
"family": s.get('familyComName', 'Unknown'),
"order": s.get('order', 'Unknown'),
"category": s.get('category', 'Unknown')
}
for s in data
if (search_lower in s['comName'].lower() or search_lower in s['sciName'].lower()) and s.get('category') == 'species'
]
if not matches:
return _format_error_response(f"No species found matching '{search_term}'")
return _format_success_response(
matches[:max_results],
count=len(matches[:max_results]),
search_term=search_term
)
except Exception as e:
return _format_error_response(f"Search failed: {str(e)}")
# Register as MCP tool
mcp.tool()(search_species)
# ============================================================================
# TOOL 2: get_recent_sightings_nearby
# ============================================================================
# Use case: After identifying a bird, find recent sightings near user
def get_recent_sightings_nearby(
species_code: str,
latitude: float,
longitude: float,
radius_km: int = 50,
max_results: int = 10
) -> str:
"""
Get recent sightings of a specific bird near a location.
Returns observations from other birdwatchers in the eBird network.
Can accept:
- User input: Coordinates from address lookup, species code from search
- Classifier output: Species code (after search_species lookup)
Args:
species_code: eBird species code (e.g., "norcar" for Northern Cardinal)
latitude: Location latitude
longitude: Location longitude
radius_km: Search radius in kilometers (max 50)
max_results: Maximum observations to return
Returns:
JSON with recent observations near location
Example:
get_recent_sightings_nearby("norcar", 40.7829, -73.9654, 25, 10)
-> Recent cardinal sightings in Central Park area
"""
if not species_code:
return _format_error_response("Species code required")
if not -90 <= latitude <= 90:
return _format_error_response("Latitude must be between -90 and 90")
if not -180 <= longitude <= 180:
return _format_error_response("Longitude must be between -180 and 180")
try:
endpoint = f"/data/obs/geo/recent/{species_code}"
params = {
"lat": latitude,
"lng": longitude,
"dist": min(radius_km, 50),
"maxResults": max_results
}
data = _make_request(endpoint, params)
if data is None:
return _format_error_response("Failed to fetch sightings")
if not data:
return _format_success_response(
[],
count=0,
location={"lat": latitude, "lng": longitude},
radius_km=radius_km,
species_code=species_code
)
sightings = [
{
"common_name": obs['comName'],
"scientific_name": obs['sciName'],
"location": obs['locName'],
"location_id": obs['locId'],
"date": obs['obsDt'],
"count": obs.get('howMany'),
"latitude": obs.get('lat'),
"longitude": obs.get('lng')
}
for obs in data
]
return _format_success_response(
sightings,
count=len(sightings),
location={"lat": latitude, "lng": longitude},
radius_km=radius_km
)
except Exception as e:
return _format_error_response(f"Lookup failed: {str(e)}")
# Register as MCP tool
mcp.tool()(get_recent_sightings_nearby)
# ============================================================================
# TOOL 3: find_hotspots_nearby
# ============================================================================
# Use case: Find popular birding locations near user
def find_hotspots_nearby(
latitude: float,
longitude: float,
radius_km: int = 50,
max_results: int = 15
) -> str:
"""
Find popular birding hotspots (known locations) near a location.
Hotspots are locations frequented by birders where many species recorded.
Great for planning birding trips.
Can accept:
- User input: Coordinates from address lookup
- Classifier output: Not directly, but used after location analysis
Args:
latitude: Location latitude
longitude: Location longitude
radius_km: Search radius in kilometers
max_results: Maximum hotspots to return
Returns:
JSON with nearby hotspots and their details
Example:
find_hotspots_nearby(40.7829, -73.9654, 25, 10)
-> Popular birding locations near Central Park
"""
if not -90 <= latitude <= 90:
return _format_error_response("Latitude must be between -90 and 90")
if not -180 <= longitude <= 180:
return _format_error_response("Longitude must be between -180 and 180")
try:
endpoint = "/ref/hotspot/geo"
params = {
"lat": latitude,
"lng": longitude,
"dist": radius_km,
"fmt": "json"
}
data = _make_request(endpoint, params)
if data is None:
return _format_error_response("Failed to fetch hotspots")
if not data:
return _format_success_response(
[],
count=0,
location={"lat": latitude, "lng": longitude},
radius_km=radius_km,
message="No hotspots found nearby"
)
hotspots = [
{
"name": hotspot['locName'],
"location_id": hotspot['locId'],
"latitude": hotspot['lat'],
"longitude": hotspot['lng'],
"species_recorded": hotspot.get('numSpeciesAllTime', 0),
"latest_obs_date": hotspot.get('latestObsDt', 'Unknown')
}
for hotspot in data[:max_results]
]
return _format_success_response(
hotspots,
count=len(hotspots),
location={"lat": latitude, "lng": longitude},
radius_km=radius_km
)
except Exception as e:
return _format_error_response(f"Lookup failed: {str(e)}")
# Register as MCP tool
mcp.tool()(find_hotspots_nearby)
# ============================================================================
# TOOL 4: get_location_birds
# ============================================================================
# Use case: See ALL birds being seen at a location right now
def get_location_birds(
latitude: float,
longitude: float,
radius_km: int = 50,
max_results: int = 50
) -> str:
"""
Get ALL recent bird sightings at a location (no species filter).
Returns comprehensive view of bird activity - what's being seen right now
Can accept:
- User input: Coordinates from address lookup
- Classifier output: Not directly, but provides context for found species
Args:
latitude: Location latitude
longitude: Location longitude
radius_km: Search radius in kilometers
max_results: Maximum sightings to return
Returns:
JSON with all recent sightings and summary statistics
Example:
get_location_birds(40.7829, -73.9654, 25, 60)
-> All birds being seen in Central Park area right now
"""
if not -90 <= latitude <= 90:
return _format_error_response("Latitude must be between -90 and 90")
if not -180 <= longitude <= 180:
return _format_error_response("Longitude must be between -180 and 180")
try:
endpoint = "/data/obs/geo/recent"
params = {
"lat": latitude,
"lng": longitude,
"dist": radius_km,
"maxResults": max_results
}
data = _make_request(endpoint, params)
if data is None:
return _format_error_response("Failed to fetch sightings")
if not data:
return _format_success_response(
[],
count=0,
unique_species=0,
location={"lat": latitude, "lng": longitude},
radius_km=radius_km,
message="No sightings found at this location"
)
sightings = [
{
"common_name": obs['comName'],
"scientific_name": obs['sciName'],
"species_code": obs['speciesCode'],
"location": obs['locName'],
"date": obs['obsDt'],
"count": obs.get('howMany'),
"latitude": obs.get('lat'),
"longitude": obs.get('lng')
}
for obs in data
]
# Calculate unique species count
unique_species = len(set(obs['common_name'] for obs in sightings))
# Find most common birds
bird_counts = {}
for obs in sightings:
bird_counts[obs['common_name']] = bird_counts.get(obs['common_name'], 0) + 1
top_birds = sorted(bird_counts.items(), key=lambda x: x[1], reverse=True)[:5]
return _format_success_response(
sightings,
count=len(sightings),
unique_species=unique_species,
location={"lat": latitude, "lng": longitude},
radius_km=radius_km,
top_birds=[{"species": name, "observations": count} for name, count in top_birds]
)
except Exception as e:
return _format_error_response(f"Lookup failed: {str(e)}")
# Register as MCP tool
mcp.tool()(get_location_birds)
# ============================================================================
# TOOL 5: get_species_info
# ============================================================================
# Use case: Get taxonomy and detailed info about a species
def get_species_info(species_code: str) -> str:
"""
Get detailed taxonomy and metadata for a bird species.
Returns scientific classification, family, order, and other details.
Can accept:
- User input: Species code from search_species tool
- Classifier output: Species code (after search_species lookup)
Args:
species_code: eBird species code (e.g., "norcar")
Returns:
JSON with complete species information
Example:
get_species_info("norcar")
-> Northern Cardinal taxonomy, family, order, etc.
"""
if not species_code or len(species_code.strip()) < 2:
return _format_error_response("Species code required")
try:
endpoint = "/ref/taxonomy/ebird"
params = {
"fmt": "json",
"species": species_code
}
data = _make_request(endpoint, params)
if data is None:
return _format_error_response("Failed to fetch taxonomy")
# Find main species (not subspecies)
species = None
for s in data:
if s.get('speciesCode') == species_code and s.get('category') == 'species':
species = s
break
if not species:
return _format_error_response(f"Species code '{species_code}' not found")
info = {
"common_name": species['comName'],
"scientific_name": species['sciName'],
"species_code": species['speciesCode'],
"family": species.get('familyComName', 'Unknown'),
"family_sci_name": species.get('familySciName', 'Unknown'),
"order": species.get('order', 'Unknown'),
"category": species.get('category', 'Unknown')
}
return _format_success_response(info, species_code=species_code)
except Exception as e:
return _format_error_response(f"Lookup failed: {str(e)}")
# Register as MCP tool
mcp.tool()(get_species_info)
# ============================================================================
# TOOL 6: get_notable_sightings
# ============================================================================
# Use case: Find rare/unusual birds in a region
def get_notable_sightings(
region_code: str = "US",
max_results: int = 10
) -> str:
"""
Get rare or notable bird sightings in a region.
Notable sightings are birds that are unusual/rare for the region.
Great for discovering unexpected species.
Can accept:
- User input: Region code (e.g., "US", "US-NY", "CA-ON")
- Classifier output: Not directly, but region can be derived from location
Args:
region_code: Region code (country, state, province)
max_results: Maximum notable sightings to return
Returns:
JSON with recent notable/rare sightings
Example:
get_notable_sightings("US-NY", 10)
-> Rare/unusual birds spotted in New York recently
"""
if not region_code:
return _format_error_response("Region code required")
try:
endpoint = f"/data/obs/{region_code}/recent/notable"
params = {"maxResults": max_results}
data = _make_request(endpoint, params)
if data is None:
return _format_error_response("Failed to fetch notable sightings")
if not data:
return _format_success_response(
[],
count=0,
region_code=region_code,
message="No notable sightings found"
)
notable = [
{
"common_name": obs['comName'],
"scientific_name": obs['sciName'],
"species_code": obs['speciesCode'],
"location": obs['locName'],
"location_id": obs['locId'],
"date": obs['obsDt'],
"count": obs.get('howMany'),
"latitude": obs.get('lat'),
"longitude": obs.get('lng')
}
for obs in data
]
return _format_success_response(
notable,
count=len(notable),
region_code=region_code
)
except Exception as e:
return _format_error_response(f"Lookup failed: {str(e)}")
# Register as MCP tool
mcp.tool()(get_notable_sightings)
# ============================================================================
# TOOL 7: analyze_location
# ============================================================================
# Use case: Comprehensive location analysis - all birds + hotspots + summary
def analyze_location(
latitude: float,
longitude: float,
radius_km: int = 50
) -> str:
"""
Comprehensive location analysis combining all bird data.
This is a "power tool" that combines multiple API calls to give
complete view of birding activity: recent sightings, hotspots, stats.
Can accept:
- User input: Coordinates from address lookup
- Classifier output: Not directly, but provides full context
Args:
latitude: Location latitude
longitude: Location longitude
radius_km: Search radius in kilometers
Returns:
JSON with sightings, hotspots, and comprehensive statistics
Example:
analyze_location(40.7820, -73.9654, 25)
-> Complete birding report for Central Park area
"""
if not -90 <= latitude <= 90:
return _format_error_response("Latitude must be between -90 and 90")
if not -180 <= longitude <= 180:
return _format_error_response("Longitude must be between -180 and 180")
try:
# Get all recent observations
obs_endpoint = "/data/obs/geo/recent"
obs_params = {
"lat": latitude,
"lng": longitude,
"dist": radius_km,
"maxResults": 100
}
sightings_data = _make_request(obs_endpoint, obs_params) or []
# Get hotspots (max 50)
hotspots_endpoint = "/ref/hotspot/geo"
hotspots_params = {
"lat": latitude,
"lng": longitude,
"dist": radius_km,
"fmt": "json"
}
hotspots_data = _make_request(hotspots_endpoint, hotspots_params) or []
# Format sightings
sightings = [
{
"common_name": obs['comName'],
"scientific_name": obs['sciName'],
"species_code": obs['speciesCode'],
"location": obs['locName'],
"date": obs['obsDt'],
"count": obs.get('howMany'),
"latitude": obs.get('lat'),
"longitude": obs.get('lng')
}
for obs in sightings_data
]
# Format hotspots
hotspots = [
{
"name": hotspot['locName'],
"location_id": hotspot['locId'],
"latitude": hotspot['lat'],
"longitude": hotspot['lng'],
"species_recorded": hotspot.get('numSpeciesAllTime', 0),
"latest_obs_date": hotspot.get('latestObsDt', 'Unknown')
}
for hotspot in hotspots_data[:15]
]
# Calculate statistics
unique_species = len(set(obs['common_name'] for obs in sightings))
# Find top species
bird_counts = {}
for obs in sightings:
bird_counts[obs['common_name']] = bird_counts.get(obs['common_name'], 0) + 1
top_birds = sorted(bird_counts.items(), key=lambda x: x[1], reverse=True)[:10]
analysis = {
"location": {
"latitude": latitude,
"longitude": longitude,
"radius_km": radius_km,
},
"sightings": sightings,
"hotspots": hotspots,
"summary": {
"total_sightings": len(sightings),
"unique_species": unique_species,
"total_hotspots": len(hotspots),
"top_species": [{"name": name, "observations": count} for name, count in top_birds]
}
}
return _format_success_response(analysis)
except Exception as e:
return _format_error_response(f"Analysis failed: {str(e)}")
# Register as MCP tool
mcp.tool()(analyze_location)
# ============================================================================
# SERVER STARTUP WITH DUAL TRANSPORT SUPPORT
# ============================================================================
def main():
"""Start the MCP server with dual transport support."""
# Determine transport mode first
is_http_mode = "--http" in sys.argv or "--streamable-http" in sys.argv
# For STDIO mode, all informational output must go to stderr (stdout is for JSON-RPC only)
output = sys.stdout if is_http_mode else sys.stderr
print("\n" + "=" * 70, file=output)
print("πŸ¦… [eBird MCP SERVER] - Starting...", file=output)
print("=" * 70, file=output)
print(f"[API KEY]: {'βœ… Configured' if EBIRD_API_KEY else '❌ Missing'}", file=output)
print("\n[AVAILABLE TOOLS]:", file=output)
tools_list = [
"1. search_species - Find species by name",
"2. get_recent_sightings_nearby - Recent sightings near location",
"3. find_hotspots_nearby - Find popular birding locations",
"4. get_location_birds - All birds at a location",
"5. get_species_info - Taxonomy and species details",
"6. get_notable_sightings - Rare/unusual birds in region",
"7. analyze_location - Comprehensive location analysis"
]
for tool in tools_list:
print(f" βœ“ {tool}", file=output)
print("\n" + "=" * 70, file=output)
if is_http_mode:
# Extract port from command line args
port = 8000
host = "127.0.0.1"
for i, arg in enumerate(sys.argv):
if arg == "--port" and i + 1 < len(sys.argv):
port = int(sys.argv[i + 1])
elif arg == "--host" and i + 1 < len(sys.argv):
host = sys.argv[i + 1]
print("[TRANSPORT]: Starting streamable-http MCP server", file=output)
print(f"[HOST]: {host}", file=output)
print(f"[PORT]: {port}", file=output)
print(f"[URL]: http://{host}:{port}", file=output)
print(f"[AUTH]: {'πŸ”’ Enabled (production)' if IS_PRODUCTION and MCP_API_KEY else 'πŸ”“ Disabled (development)'}", file=output)
print("[NOTE]: This is proper MCP over HTTP", file=output)
print("=" * 70 + "\n", file=output)
# Run with streamable-http transport (built-in MCP support)
mcp.run(transport="streamable-http", host=host, port=port)
else:
print("[TRANSPORT]: Running as stdio MCP server", file=output)
print("[NOTE]: For HTTP transport, use: python ebird_tools.py --http", file=output)
print("=" * 70 + "\n", file=output)
# Run as stdio MCP server (default)
mcp.run(transport="stdio")
if __name__ == "__main__":
main()