Spaces:

minhvtt
/

EBD_Fest

Sleeping

App Files Files Community

minhvtt commited on Apr 7

Commit

c6c0abc

verified ·

1 Parent(s): ce85c02

Upload routes_team_chat.py

Browse files

Files changed (1) hide show

routes_team_chat.py +59 -23

routes_team_chat.py CHANGED Viewed

@@ -225,34 +225,44 @@ def _has_requirement_node_payload(payload: Dict[str, Any]) -> bool:
     )
 def _truncate_prompt_context(
     ctx: Dict[str, Any],
     *,
     max_section_content: int = 1500,
-    max_sections: int = 10,
-    max_messages: int = 15,
-    max_msg_content: int = 500,
     max_grounded_answer: int = 2000,
     max_qa_memory: int = 1500,
-    max_index_nodes: int = 30,
 ) -> Dict[str, Any]:
     """Return a size-bounded copy of prompt_context before sending to NVIDIA."""
     ctx = dict(ctx)
-    # Truncate document sections (most expensive — can be huge)
-    sections = list(ctx.get("documents_sections") or [])
-    if len(sections) > max_sections:
-        sections = sections[:max_sections]
     safe_sections = []
     for sec in sections:
         sec = dict(sec)
-        content = str(sec.get("content") or "")
-        if len(content) > max_section_content:
-            sec["content"] = content[:max_section_content] + "…[truncated]"
         safe_sections.append(sec)
     ctx["documents_sections"] = safe_sections
-    # Limit + truncate chat messages
     for key in ("selected_messages", "fallback_messages"):
         msgs = list(ctx.get(key) or [])
         if len(msgs) > max_messages:
@@ -262,30 +272,50 @@ def _truncate_prompt_context(
             m = dict(m)
             content = str(m.get("content") or "")
             if len(content) > max_msg_content:
-                m["content"] = content[:max_msg_content] + "…"
             trimmed.append(m)
         ctx[key] = trimmed
-    # Truncate LLM-generated answer that may be large
     answer = str(ctx.get("document_grounded_answer") or "")
     if len(answer) > max_grounded_answer:
-        ctx["document_grounded_answer"] = answer[:max_grounded_answer] + "…"
-    # Truncate accumulated QA memory
     memory = str(ctx.get("doc_qa_memory") or "")
     if len(memory) > max_qa_memory:
-        ctx["doc_qa_memory"] = memory[:max_qa_memory] + "…"
-    # Limit index nodes per document (already minimal fields, just guard count)
     doc_indexes = list(ctx.get("documents_index") or [])
     for di in doc_indexes:
         nodes = di.get("nodes") or []
         if len(nodes) > max_index_nodes:
             di["nodes"] = nodes[:max_index_nodes]
     ctx["documents_index"] = doc_indexes
-    # Drop large debug metadata that the agent does not use
     ctx.pop("documents_retrieval_meta", None)
     return ctx
@@ -555,10 +585,16 @@ async def team_chat(req: TeamChatRequest, x_session_token: Optional[str] = Heade
     )
     qa_memory = get_team_doc_qa_memory(req.team_id, req.project_id)
     doc_indexes = []
     for doc in team_docs:
         tree = doc.get("tree") or {}
         nodes = tree.get("nodes") or []
         doc_indexes.append(
             {
                 "document_id": doc.get("id"),
@@ -568,12 +604,12 @@ async def team_chat(req: TeamChatRequest, x_session_token: Optional[str] = Heade
                     {
                         "id": node.get("id"),
                         "parent_id": node.get("parent_id"),
-                        "title": node.get("title"),
-                        "summary": node.get("summary"),
-                        "scope": node.get("scope"),
                         "level": node.get("level"),
                     }
-                    for node in nodes
                 ],
             }
         )

     )
+def _clip(text: str, limit: int) -> str:
+    return text if len(text) <= limit else text[:limit] + "…[truncated]"
 def _truncate_prompt_context(
     ctx: Dict[str, Any],
     *,
     max_section_content: int = 1500,
+    max_section_summary: int = 300,
+    max_sections: int = 8,
+    max_messages: int = 12,
+    max_msg_content: int = 400,
     max_grounded_answer: int = 2000,
     max_qa_memory: int = 1500,
+    max_index_nodes: int = 25,
+    max_index_summary: int = 200,
 ) -> Dict[str, Any]:
     """Return a size-bounded copy of prompt_context before sending to NVIDIA."""
     ctx = dict(ctx)
+    # ── Truncate document sections (biggest offender) ──
+    sections = list(ctx.get("documents_sections") or [])[:max_sections]
     safe_sections = []
     for sec in sections:
         sec = dict(sec)
+        # Field names from retrieve_document_context_with_tree are section_content / section_summary / section_context
+        for field in ("section_content", "content"):
+            val = sec.get(field)
+            if val and len(str(val)) > max_section_content:
+                sec[field] = _clip(str(val), max_section_content)
+        for field in ("section_summary", "summary", "section_context"):
+            val = sec.get(field)
+            if val and len(str(val)) > max_section_summary:
+                sec[field] = _clip(str(val), max_section_summary)
         safe_sections.append(sec)
     ctx["documents_sections"] = safe_sections
+    # ── Limit + truncate chat messages ──
     for key in ("selected_messages", "fallback_messages"):
         msgs = list(ctx.get(key) or [])
         if len(msgs) > max_messages:
             m = dict(m)
             content = str(m.get("content") or "")
             if len(content) > max_msg_content:
+                m["content"] = _clip(content, max_msg_content)
             trimmed.append(m)
         ctx[key] = trimmed
+    # ── Truncate LLM-generated answer ──
     answer = str(ctx.get("document_grounded_answer") or "")
     if len(answer) > max_grounded_answer:
+        ctx["document_grounded_answer"] = _clip(answer, max_grounded_answer)
+    # ── Truncate accumulated QA memory ──
     memory = str(ctx.get("doc_qa_memory") or "")
     if len(memory) > max_qa_memory:
+        ctx["doc_qa_memory"] = _clip(memory, max_qa_memory)
+    # ── Limit document index nodes (82K nodes × 2 docs = main overflow source) ──
     doc_indexes = list(ctx.get("documents_index") or [])
     for di in doc_indexes:
         nodes = di.get("nodes") or []
         if len(nodes) > max_index_nodes:
             di["nodes"] = nodes[:max_index_nodes]
+        for node in di.get("nodes", []):
+            for field in ("summary", "scope"):
+                val = node.get(field)
+                if val and len(str(val)) > max_index_summary:
+                    node[field] = _clip(str(val), max_index_summary)
     ctx["documents_index"] = doc_indexes
+    # ── Drop large debug metadata the agent does not use ──
     ctx.pop("documents_retrieval_meta", None)
+    # Also strip citations detail (agent doesn't act on them)
+    ctx.pop("documents_citations", None)
+    ctx.pop("document_grounded_citations", None)
+    # ── Hard safety cap: if serialized prompt still too large, aggressively trim ──
+    _MAX_PROMPT_CHARS = 180_000  # ~60K tokens, well under 262K token limit
+    serialized = json.dumps(ctx, ensure_ascii=False, default=str)
+    if len(serialized) > _MAX_PROMPT_CHARS:
+        # Emergency: drop the heaviest fields until under budget
+        for drop_key in ("documents_sections", "documents_index", "agent_context", "doc_qa_memory"):
+            if len(serialized) <= _MAX_PROMPT_CHARS:
+                break
+            if drop_key in ctx:
+                ctx[drop_key] = [] if isinstance(ctx.get(drop_key), list) else ""
+                serialized = json.dumps(ctx, ensure_ascii=False, default=str)
     return ctx
     )
     qa_memory = get_team_doc_qa_memory(req.team_id, req.project_id)
+    # Build lightweight index — only top-level nodes (level <= 2) to avoid
+    # sending 82K+ nodes per document to the NVIDIA agent.
+    _MAX_INDEX_NODES_PER_DOC = 25
     doc_indexes = []
     for doc in team_docs:
         tree = doc.get("tree") or {}
         nodes = tree.get("nodes") or []
+        top_nodes = [n for n in nodes if (n.get("level") or 0) <= 2][:_MAX_INDEX_NODES_PER_DOC]
+        if not top_nodes:
+            top_nodes = nodes[:_MAX_INDEX_NODES_PER_DOC]
         doc_indexes.append(
             {
                 "document_id": doc.get("id"),
                     {
                         "id": node.get("id"),
                         "parent_id": node.get("parent_id"),
+                        "title": _clip(str(node.get("title") or ""), 120),
+                        "summary": _clip(str(node.get("summary") or ""), 200),
+                        "scope": _clip(str(node.get("scope") or ""), 100),
                         "level": node.get("level"),
                     }
+                    for node in top_nodes
                 ],
             }
         )