minhvtt commited on
Commit
ce85c02
·
verified ·
1 Parent(s): 44f3755

Upload 8 files

Browse files
Files changed (1) hide show
  1. routes_team_chat.py +66 -1
routes_team_chat.py CHANGED
@@ -225,6 +225,71 @@ def _has_requirement_node_payload(payload: Dict[str, Any]) -> bool:
225
  )
226
 
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  def _assert_team_project_access(user: Dict[str, Any], team_id: str, project_id: Optional[str]) -> Optional[Dict[str, Any]]:
229
  team = teams_collection.find_one({"id": team_id}, {"_id": 0})
230
  if not team or user["id"] not in unique_ids([team.get("owner_id", "")], team.get("member_ids", [])):
@@ -609,7 +674,7 @@ OUTPUT SHAPE:
609
 
610
  raw_text = run_team_agent_with_nvidia(
611
  system_prompt=TEAM_AGENT_SYSTEM_PROMPT + "\n" + tools_description,
612
- payload=prompt_context,
613
  ).strip()
614
 
615
  parsed = _extract_json_payload(raw_text) or {}
 
225
  )
226
 
227
 
228
+ def _truncate_prompt_context(
229
+ ctx: Dict[str, Any],
230
+ *,
231
+ max_section_content: int = 1500,
232
+ max_sections: int = 10,
233
+ max_messages: int = 15,
234
+ max_msg_content: int = 500,
235
+ max_grounded_answer: int = 2000,
236
+ max_qa_memory: int = 1500,
237
+ max_index_nodes: int = 30,
238
+ ) -> Dict[str, Any]:
239
+ """Return a size-bounded copy of prompt_context before sending to NVIDIA."""
240
+ ctx = dict(ctx)
241
+
242
+ # Truncate document sections (most expensive — can be huge)
243
+ sections = list(ctx.get("documents_sections") or [])
244
+ if len(sections) > max_sections:
245
+ sections = sections[:max_sections]
246
+ safe_sections = []
247
+ for sec in sections:
248
+ sec = dict(sec)
249
+ content = str(sec.get("content") or "")
250
+ if len(content) > max_section_content:
251
+ sec["content"] = content[:max_section_content] + "…[truncated]"
252
+ safe_sections.append(sec)
253
+ ctx["documents_sections"] = safe_sections
254
+
255
+ # Limit + truncate chat messages
256
+ for key in ("selected_messages", "fallback_messages"):
257
+ msgs = list(ctx.get(key) or [])
258
+ if len(msgs) > max_messages:
259
+ msgs = msgs[-max_messages:]
260
+ trimmed = []
261
+ for m in msgs:
262
+ m = dict(m)
263
+ content = str(m.get("content") or "")
264
+ if len(content) > max_msg_content:
265
+ m["content"] = content[:max_msg_content] + "…"
266
+ trimmed.append(m)
267
+ ctx[key] = trimmed
268
+
269
+ # Truncate LLM-generated answer that may be large
270
+ answer = str(ctx.get("document_grounded_answer") or "")
271
+ if len(answer) > max_grounded_answer:
272
+ ctx["document_grounded_answer"] = answer[:max_grounded_answer] + "…"
273
+
274
+ # Truncate accumulated QA memory
275
+ memory = str(ctx.get("doc_qa_memory") or "")
276
+ if len(memory) > max_qa_memory:
277
+ ctx["doc_qa_memory"] = memory[:max_qa_memory] + "…"
278
+
279
+ # Limit index nodes per document (already minimal fields, just guard count)
280
+ doc_indexes = list(ctx.get("documents_index") or [])
281
+ for di in doc_indexes:
282
+ nodes = di.get("nodes") or []
283
+ if len(nodes) > max_index_nodes:
284
+ di["nodes"] = nodes[:max_index_nodes]
285
+ ctx["documents_index"] = doc_indexes
286
+
287
+ # Drop large debug metadata that the agent does not use
288
+ ctx.pop("documents_retrieval_meta", None)
289
+
290
+ return ctx
291
+
292
+
293
  def _assert_team_project_access(user: Dict[str, Any], team_id: str, project_id: Optional[str]) -> Optional[Dict[str, Any]]:
294
  team = teams_collection.find_one({"id": team_id}, {"_id": 0})
295
  if not team or user["id"] not in unique_ids([team.get("owner_id", "")], team.get("member_ids", [])):
 
674
 
675
  raw_text = run_team_agent_with_nvidia(
676
  system_prompt=TEAM_AGENT_SYSTEM_PROMPT + "\n" + tools_description,
677
+ payload=_truncate_prompt_context(prompt_context),
678
  ).strip()
679
 
680
  parsed = _extract_json_payload(raw_text) or {}