Spaces:

minhvtt
/

EBD_Fest

Sleeping

App Files Files Community

minhvtt commited on Apr 7

Commit

ce85c02

verified ·

1 Parent(s): 44f3755

Upload 8 files

Browse files

Files changed (1) hide show

routes_team_chat.py +66 -1

routes_team_chat.py CHANGED Viewed

@@ -225,6 +225,71 @@ def _has_requirement_node_payload(payload: Dict[str, Any]) -> bool:
     )
 def _assert_team_project_access(user: Dict[str, Any], team_id: str, project_id: Optional[str]) -> Optional[Dict[str, Any]]:
     team = teams_collection.find_one({"id": team_id}, {"_id": 0})
     if not team or user["id"] not in unique_ids([team.get("owner_id", "")], team.get("member_ids", [])):
@@ -609,7 +674,7 @@ OUTPUT SHAPE:
     raw_text = run_team_agent_with_nvidia(
         system_prompt=TEAM_AGENT_SYSTEM_PROMPT + "\n" + tools_description,
-        payload=prompt_context,
     ).strip()
     parsed = _extract_json_payload(raw_text) or {}

     )
+def _truncate_prompt_context(
+    ctx: Dict[str, Any],
+    *,
+    max_section_content: int = 1500,
+    max_sections: int = 10,
+    max_messages: int = 15,
+    max_msg_content: int = 500,
+    max_grounded_answer: int = 2000,
+    max_qa_memory: int = 1500,
+    max_index_nodes: int = 30,
+) -> Dict[str, Any]:
+    """Return a size-bounded copy of prompt_context before sending to NVIDIA."""
+    ctx = dict(ctx)
+    # Truncate document sections (most expensive — can be huge)
+    sections = list(ctx.get("documents_sections") or [])
+    if len(sections) > max_sections:
+        sections = sections[:max_sections]
+    safe_sections = []
+    for sec in sections:
+        sec = dict(sec)
+        content = str(sec.get("content") or "")
+        if len(content) > max_section_content:
+            sec["content"] = content[:max_section_content] + "…[truncated]"
+        safe_sections.append(sec)
+    ctx["documents_sections"] = safe_sections
+    # Limit + truncate chat messages
+    for key in ("selected_messages", "fallback_messages"):
+        msgs = list(ctx.get(key) or [])
+        if len(msgs) > max_messages:
+            msgs = msgs[-max_messages:]
+        trimmed = []
+        for m in msgs:
+            m = dict(m)
+            content = str(m.get("content") or "")
+            if len(content) > max_msg_content:
+                m["content"] = content[:max_msg_content] + "…"
+            trimmed.append(m)
+        ctx[key] = trimmed
+    # Truncate LLM-generated answer that may be large
+    answer = str(ctx.get("document_grounded_answer") or "")
+    if len(answer) > max_grounded_answer:
+        ctx["document_grounded_answer"] = answer[:max_grounded_answer] + "…"
+    # Truncate accumulated QA memory
+    memory = str(ctx.get("doc_qa_memory") or "")
+    if len(memory) > max_qa_memory:
+        ctx["doc_qa_memory"] = memory[:max_qa_memory] + "…"
+    # Limit index nodes per document (already minimal fields, just guard count)
+    doc_indexes = list(ctx.get("documents_index") or [])
+    for di in doc_indexes:
+        nodes = di.get("nodes") or []
+        if len(nodes) > max_index_nodes:
+            di["nodes"] = nodes[:max_index_nodes]
+    ctx["documents_index"] = doc_indexes
+    # Drop large debug metadata that the agent does not use
+    ctx.pop("documents_retrieval_meta", None)
+    return ctx
 def _assert_team_project_access(user: Dict[str, Any], team_id: str, project_id: Optional[str]) -> Optional[Dict[str, Any]]:
     team = teams_collection.find_one({"id": team_id}, {"_id": 0})
     if not team or user["id"] not in unique_ids([team.get("owner_id", "")], team.get("member_ids", [])):
     raw_text = run_team_agent_with_nvidia(
         system_prompt=TEAM_AGENT_SYSTEM_PROMPT + "\n" + tools_description,
+        payload=_truncate_prompt_context(prompt_context),
     ).strip()
     parsed = _extract_json_payload(raw_text) or {}