sachinchandrankallar commited on
Commit
0bd8e71
·
1 Parent(s): a28ed90

slim api ehr response

Browse files
.vscode/settings.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "python.analysis.extraPaths": [
3
+ "./ai_med_extract/utils"
4
+ ]
5
+ }
ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc and b/ai_med_extract/agents/__pycache__/patient_summary_agent.cpython-311.pyc differ
 
ai_med_extract/api/__pycache__/routes.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/api/__pycache__/routes.cpython-311.pyc and b/ai_med_extract/api/__pycache__/routes.cpython-311.pyc differ
 
ai_med_extract/api/routes.py CHANGED
@@ -404,7 +404,8 @@ def register_routes(app, agents):
404
  if not chartsummarydtl:
405
  return jsonify({"error": "Missing chartsummarydtl in input"}), 400
406
  # Normalize visits
407
- visits = parse_ehr_chartsummarydtl(chartsummarydtl)
 
408
  # Extract patient demographics if available
409
  patient_info = ""
410
  if isinstance(ehr_result, dict):
@@ -1046,6 +1047,7 @@ def register_routes(app, agents):
1046
  from ai_med_extract.utils.openvino_summarizer_utils import (
1047
  parse_ehr_chartsummarydtl, visits_sorted, compute_deltas, build_compact_baseline, delta_to_text, build_main_prompt
1048
  )
 
1049
  try:
1050
  start_total = time.time()
1051
  data = request.get_json()
@@ -1082,6 +1084,25 @@ def register_routes(app, agents):
1082
  t_api_start = time.time()
1083
  try:
1084
  response = requests.post(api_url, json={"patientid": patientid}, headers=headers, timeout=EHR_TIMEOUT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1085
  except requests.exceptions.Timeout:
1086
  logger.warning(f"EHR API timeout ({EHR_TIMEOUT}s) — returning structured fallback.")
1087
  minimal_fallback = f"""
@@ -1177,9 +1198,10 @@ def register_routes(app, agents):
1177
  "timing": {"total": round(time.time() - start_total, 1)},
1178
  "timeout_mode_used": timeout_mode
1179
  }), 500
1180
-
1181
  # Parse and compute deltas — YOUR LOGIC PRESERVED
1182
- visits = parse_ehr_chartsummarydtl(chartsummarydtl)
 
1183
  delta = compute_deltas([], visits)
1184
  all_visits = visits_sorted(visits)
1185
  baseline = build_compact_baseline(all_visits)
@@ -1240,7 +1262,7 @@ Generate the full 4-section summary based on the data.</s>
1240
  full_prompt,
1241
  max_tokens=1500, # Increased for fuller section generation
1242
  temperature=0.0, # Set to 0.0 for maximum determinism and often faster generation.
1243
- top_p=1.0, # Set to 1.0 to consider all tokens, often faster than sampling with top_p < 1.0.
1244
  # top_k=50, # Adding top_k can sometimes speed up sampling. 50 is a good balance.
1245
  # repeat_penalty=1.1 # A small penalty can prevent loops and potentially speed up convergence.
1246
  )
 
404
  if not chartsummarydtl:
405
  return jsonify({"error": "Missing chartsummarydtl in input"}), 400
406
  # Normalize visits
407
+ # visits = parse_ehr_chartsummarydtl(chartsummarydtl)
408
+ visits = chartsummarydtl
409
  # Extract patient demographics if available
410
  patient_info = ""
411
  if isinstance(ehr_result, dict):
 
1047
  from ai_med_extract.utils.openvino_summarizer_utils import (
1048
  parse_ehr_chartsummarydtl, visits_sorted, compute_deltas, build_compact_baseline, delta_to_text, build_main_prompt
1049
  )
1050
+ from ai_med_extract.utils.json_slimmer import slim_api_json, PruneOptions
1051
  try:
1052
  start_total = time.time()
1053
  data = request.get_json()
 
1084
  t_api_start = time.time()
1085
  try:
1086
  response = requests.post(api_url, json={"patientid": patientid}, headers=headers, timeout=EHR_TIMEOUT)
1087
+ # ✅ Slim incoming EHR API response
1088
+ opts = PruneOptions(
1089
+ remove_nulls=True,
1090
+ remove_empty_strings=True,
1091
+ remove_empty_collections=True,
1092
+ trim_strings=False, # preserve clinical text
1093
+ compact_lists=True,
1094
+ preserve_paths=set(), # add dot paths here if some fields must always stay
1095
+ output_minified_string=False, # dict instead of string
1096
+ )
1097
+ # Log token count before slim_api_json
1098
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", trust_remote_code=True, cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface'))
1099
+ before_tokens = len(tokenizer.encode(response.text))
1100
+ logger.info(f"Token count before slim_api_json: {before_tokens}")
1101
+ api_data = slim_api_json(response.text, options=opts)
1102
+ # Log token count after slim_api_json
1103
+ after_tokens = len(tokenizer.encode(json.dumps(api_data)))
1104
+ logger.info(f"Token count after slim_api_json: {after_tokens}")
1105
+
1106
  except requests.exceptions.Timeout:
1107
  logger.warning(f"EHR API timeout ({EHR_TIMEOUT}s) — returning structured fallback.")
1108
  minimal_fallback = f"""
 
1198
  "timing": {"total": round(time.time() - start_total, 1)},
1199
  "timeout_mode_used": timeout_mode
1200
  }), 500
1201
+
1202
  # Parse and compute deltas — YOUR LOGIC PRESERVED
1203
+ # visits = parse_ehr_chartsummarydtl(chartsummarydtl)
1204
+ visits = chartsummarydtl
1205
  delta = compute_deltas([], visits)
1206
  all_visits = visits_sorted(visits)
1207
  baseline = build_compact_baseline(all_visits)
 
1262
  full_prompt,
1263
  max_tokens=1500, # Increased for fuller section generation
1264
  temperature=0.0, # Set to 0.0 for maximum determinism and often faster generation.
1265
+ top_p=0.3, # Set to 1.0 to consider all tokens, often faster than sampling with top_p < 1.0.
1266
  # top_k=50, # Adding top_k can sometimes speed up sampling. 50 is a good balance.
1267
  # repeat_penalty=1.1 # A small penalty can prevent loops and potentially speed up convergence.
1268
  )
ai_med_extract/utils/__pycache__/json_slimmer.cpython-311.pyc ADDED
Binary file (5.04 kB). View file
 
ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/model_loader_gguf.cpython-311.pyc differ
 
ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc CHANGED
Binary files a/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc and b/ai_med_extract/utils/__pycache__/openvino_summarizer_utils.cpython-311.pyc differ
 
ai_med_extract/utils/json_slimmer.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import Any, Dict, List, Set, Union
4
+
5
+
6
+ class PruneOptions:
7
+ def __init__(
8
+ self,
9
+ remove_nulls: bool = True,
10
+ remove_empty_strings: bool = True,
11
+ remove_empty_collections: bool = True,
12
+ trim_strings: bool = False, # ⚠️ Keep False for clinical text
13
+ compact_lists: bool = True,
14
+ preserve_paths: Set[str] = None,
15
+ output_minified_string: bool = True,
16
+ ):
17
+ self.remove_nulls = remove_nulls
18
+ self.remove_empty_strings = remove_empty_strings
19
+ self.remove_empty_collections = remove_empty_collections
20
+ self.trim_strings = trim_strings
21
+ self.compact_lists = compact_lists
22
+ self.preserve_paths = preserve_paths or set()
23
+ self.output_minified_string = output_minified_string
24
+
25
+
26
+ def _minify_lossless(text: str) -> str:
27
+ """Drop whitespace outside strings, keep everything inside intact."""
28
+ out = []
29
+ in_string = False
30
+ escaping = False
31
+ for ch in text:
32
+ if in_string:
33
+ out.append(ch)
34
+ if escaping:
35
+ escaping = False
36
+ elif ch == "\\":
37
+ escaping = True
38
+ elif ch == '"':
39
+ in_string = False
40
+ else:
41
+ if ch == '"':
42
+ in_string = True
43
+ out.append(ch)
44
+ elif ch in " \t\n\r":
45
+ continue # drop whitespace outside strings
46
+ else:
47
+ out.append(ch)
48
+ return "".join(out)
49
+
50
+
51
+ def _preserve(path: str, options: PruneOptions) -> bool:
52
+ return path and path in options.preserve_paths
53
+
54
+
55
+ def _prune(value: Any, options: PruneOptions, path: str) -> Any:
56
+ if _preserve(path, options):
57
+ return value
58
+
59
+ if value is None:
60
+ return None if options.remove_nulls else value
61
+
62
+ if isinstance(value, (bool, int, float)):
63
+ return value
64
+
65
+ if isinstance(value, str):
66
+ if options.trim_strings:
67
+ t = value.strip()
68
+ if options.remove_empty_strings and t == "":
69
+ return None
70
+ return t
71
+ else:
72
+ if options.remove_empty_strings and value == "":
73
+ return None
74
+ return value
75
+
76
+ if isinstance(value, list):
77
+ out = []
78
+ for i, item in enumerate(value):
79
+ pruned = _prune(item, options, f"{path}[{i}]")
80
+ if options.compact_lists:
81
+ if pruned is None:
82
+ continue
83
+ if isinstance(pruned, (list, dict)) and not pruned and options.remove_empty_collections:
84
+ continue
85
+ out.append(pruned)
86
+ if options.remove_empty_collections and not out:
87
+ return None
88
+ return out
89
+
90
+ if isinstance(value, dict):
91
+ out = {}
92
+ for k, v in value.items():
93
+ next_path = f"{path}.{k}" if path else k
94
+ pruned = _prune(v, options, next_path)
95
+ if pruned is None and options.remove_nulls:
96
+ continue
97
+ if isinstance(pruned, str) and options.remove_empty_strings and pruned == "":
98
+ continue
99
+ if isinstance(pruned, (list, dict)) and not pruned and options.remove_empty_collections:
100
+ continue
101
+ out[k] = pruned
102
+ if options.remove_empty_collections and not out:
103
+ return None
104
+ return out
105
+
106
+ return value
107
+
108
+
109
+ def slim_api_json(response_body: str, options: PruneOptions = PruneOptions()) -> Union[str, dict]:
110
+ # Pure minify if pruning disabled
111
+ if not any([options.remove_nulls, options.remove_empty_strings,
112
+ options.remove_empty_collections, options.compact_lists,
113
+ options.trim_strings]):
114
+ s = _minify_lossless(response_body)
115
+ return s if options.output_minified_string else json.loads(s)
116
+
117
+ decoded = json.loads(response_body)
118
+ pruned = _prune(decoded, options, path="")
119
+ return json.dumps(pruned, separators=(",", ":")) if options.output_minified_string else pruned