Video-Note-Taker / backend /services /action_item_extractor.py
rajiv-ramteke's picture
Fix critical issues: API key race conditions, unused pipeline stages, port mapping
28db209
Raw
History Blame Contribute Delete
7.52 kB
"""
backend/services/action_item_extractor.py
==========================================
Extracts action items, decisions, and follow-up tasks from
transcript text using LLM-based NLP.
"""
import os
import re
from typing import Dict, List
from backend.utils.config import settings
from backend.utils.logger import get_logger
logger = get_logger(__name__)
# ── Prompt ────────────────────────────────────────────────────────────────────
ACTION_ITEM_PROMPT = """You are an expert meeting analyst. Analyse the following transcript and
extract ALL action items, tasks, decisions, and follow-up points. Keep descriptions very simple and easy to understand.
CRITICAL INSTRUCTION: You MUST generate the action items in the following language: {language}
Format your output STRICTLY as a JSON array with objects having these keys:
- "type": one of ["action", "decision", "follow_up", "reminder"]
- "description": simple description of the item
- "owner": person responsible (use "Unassigned" if unclear)
- "priority": one of ["high", "medium", "low"]
Transcript:
\"\"\"
{text}
\"\"\"
JSON Array (return ONLY valid JSON, no markdown):"""
# ── Regex fallback patterns ───────────────────────────────────────────────────
ACTION_PATTERNS = [
r"(?:we need to|we should|you should|please|make sure to|don't forget to|"
r"action item[:\s]|todo[:\s]|follow[\s-]up[:\s]|next step[:\s]|"
r"will|shall|must|have to|going to)\s+(.+?)(?:\.|$)",
]
DECISION_PATTERNS = [
r"(?:we decided|decision[:\s]|agreed|we agreed|it was decided|"
r"resolved|concluded|the conclusion is)\s+(.+?)(?:\.|$)",
]
class ActionItemExtractor:
"""
Extracts structured action items from transcript text.
Uses LLM when available; falls back to regex heuristics.
"""
def __init__(self, api_key: str = None):
self.api_key = api_key
self._openai_client = None
# ── Public API ────────────────────────────────────────────
def extract(self, chunks: List[Dict], language: str = "English") -> List[Dict]:
"""
Extract action items from all transcript chunks.
Args:
chunks: List of chunk dicts from TextChunker.
language: The language to generate action items in.
Returns:
List of action item dicts.
"""
all_items = []
# We don't want to run this for every single chunk if there are many,
# so we combine or limit. For simplicity, we process the whole text in chunks or combined.
# Let's combine the text and limit to 15k chars for the prompt.
combined_text = " ".join([c["text"] for c in chunks])
text_to_process = combined_text[:15000]
has_key = bool(os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY)
if settings.LLM_PROVIDER == "openai" and has_key:
items = self._extract_with_llm(text_to_process, language)
all_items.extend(items)
else:
logger.warning("LLM provider is not OpenAI or API key is missing. Using regex fallback.")
items = self._extract_with_regex(combined_text)
all_items.extend(items)
logger.info(f"Extracted {len(all_items)} action items total")
return all_items
def extract_from_full_text(self, text: str) -> List[Dict]:
"""Extract from a single text block (no chunk metadata)."""
dummy_chunk = {"text": text, "start_ts": "00:00:00", "chunk_id": 0}
return self._extract_from_chunk(dummy_chunk)
# ── Private ───────────────────────────────────────────────
def _extract_from_chunk(self, chunk: Dict) -> List[Dict]:
"""Try LLM extraction; fall back to regex."""
has_key = bool(os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY)
if settings.LLM_PROVIDER == "openai" and has_key:
items = self._extract_with_llm(chunk["text"])
else:
items = self._extract_with_regex(chunk["text"])
# Attach timestamp to each item
for item in items:
item["timestamp"] = chunk.get("start_ts", "00:00:00")
item["chunk_id"] = chunk.get("chunk_id", 0)
return items
def _extract_with_llm(self, text: str, language: str = "English") -> List[Dict]:
"""Use OpenAI to extract structured action items."""
import json as _json
try:
from openai import OpenAI
if self._openai_client is None:
kwargs = {"api_key": os.environ.get("OPENAI_API_KEY") or settings.OPENAI_API_KEY}
if settings.OPENAI_BASE_URL:
kwargs["base_url"] = settings.OPENAI_BASE_URL
self._openai_client = OpenAI(**kwargs)
prompt = ACTION_ITEM_PROMPT.format(text=text[:3000], language=language)
response = self._openai_client.chat.completions.create(
model=settings.OPENAI_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.1,
max_tokens=800,
)
raw = response.choices[0].message.content.strip()
# Strip any accidental markdown fences
raw = re.sub(r"```(?:json)?", "", raw).strip().rstrip("```").strip()
items = _json.loads(raw)
return items if isinstance(items, list) else []
except Exception as e:
logger.warning(f"LLM action item extraction failed: {e}. Using regex.")
return self._extract_with_regex(text)
def _extract_with_regex(self, text: str) -> List[Dict]:
"""Regex-based heuristic extraction as fallback."""
items = []
text_lower = text.lower()
for pattern in ACTION_PATTERNS:
for match in re.finditer(pattern, text_lower, re.IGNORECASE):
desc = match.group(1).strip()
if len(desc) > 10:
items.append({
"type": "action",
"description": desc.capitalize(),
"owner": "Unassigned",
"priority": "medium",
})
for pattern in DECISION_PATTERNS:
for match in re.finditer(pattern, text_lower, re.IGNORECASE):
desc = match.group(1).strip()
if len(desc) > 10:
items.append({
"type": "decision",
"description": desc.capitalize(),
"owner": "Unassigned",
"priority": "high",
})
return items[:10] # Cap regex results per chunk
@staticmethod
def _deduplicate(items: List[Dict]) -> List[Dict]:
"""Remove near-duplicate action items by description similarity."""
seen = set()
unique = []
for item in items:
key = item["description"].lower()[:60]
if key not in seen:
seen.add(key)
unique.append(item)
return unique