pranavkarthik10 commited on
Commit
1dff814
·
verified ·
1 Parent(s): add4c86

Remove development status from README

Browse files
Files changed (3) hide show
  1. README.md +0 -32
  2. app.py +52 -15
  3. modal_app.py +5 -0
README.md CHANGED
@@ -119,35 +119,3 @@ or granting the app write access.
119
  Do not enable remote writes for arbitrary uploads to a public dataset. Processed
120
  slides can contain the original lecture material. Use a private dataset for personal
121
  testing or only prewarm material you have permission to publish.
122
-
123
- ## Status
124
- **Professor agent loop working:** upload a lecture PDF → render and index the complete
125
- deck → Nemotron plans short teaching beats → the orchestrator executes validated slide
126
- navigation and whiteboard tools → VoxCPM speaks the explanation. Student questions
127
- cancel active narration, can send the professor to a more relevant slide, and can add
128
- supporting notes or equations to the whiteboard.
129
-
130
- Targeted vision via `look_closer` and richer Excalidraw diagrams remain follow-up work.
131
-
132
- Layout: [`app.py`](app.py) (Gradio UI) · [`ai_prof/pdf_utils.py`](ai_prof/pdf_utils.py) (PDF→slides) ·
133
- [`ai_prof/vision.py`](ai_prof/vision.py) (eyes) · [`ai_prof/brain.py`](ai_prof/brain.py) (brain) ·
134
- [`ai_prof/config.py`](ai_prof/config.py) (endpoints / mock fallback).
135
-
136
- The inference services run on Modal: MiniCPM-V through llama.cpp, Nemotron
137
- through vLLM, VoxCPM2 through vLLM-Omni, and distil-Whisper through a small
138
- OpenAI-compatible FastAPI server.
139
- The Gradio frontend remains deployable as a Hugging Face Space.
140
-
141
- Run the focused test suite with:
142
-
143
- ```bash
144
- .venv/bin/python -m unittest discover -s tests -v
145
- ```
146
-
147
- Next up (see [IDEATION.md](IDEATION.md)): prepared-deck storage on Hugging Face,
148
- animated structured diagrams, stronger cancellation during browser audio playback,
149
- and targeted `look_closer` vision.
150
-
151
- The current target design is documented in [ARCHITECTURE.md](ARCHITECTURE.md), including complete-deck
152
- indexing, the professor tool loop, a preprocessed demo lecture, synchronized speech and whiteboard actions,
153
- and interruption/resume behavior.
 
119
  Do not enable remote writes for arbitrary uploads to a public dataset. Processed
120
  slides can contain the original lecture material. Use a private dataset for personal
121
  testing or only prewarm material you have permission to publish.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -588,6 +588,30 @@ _BANNER = (
588
  )
589
 
590
  _CSS = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  .gradio-container {
592
  max-width: 1320px !important;
593
  margin: 0 auto !important;
@@ -600,14 +624,14 @@ _CSS = """
600
  }
601
  .app-title h1 {
602
  margin: 0 0 4px !important;
603
- color: #24283b;
604
  font-size: 2.15rem !important;
605
  font-weight: 800 !important;
606
  letter-spacing: -.035em;
607
  }
608
  .app-title p {
609
  margin: 0 !important;
610
- color: #697386;
611
  font-size: 1rem;
612
  }
613
  .workspace-row {
@@ -620,9 +644,9 @@ _CSS = """
620
  min-width: 0 !important;
621
  overflow: hidden;
622
  gap: 0 !important;
623
- border: 1px solid #e1e5eb;
624
  border-radius: 14px;
625
- background: #fff;
626
  }
627
  .panel-title {
628
  flex: 0 0 46px !important;
@@ -632,9 +656,9 @@ _CSS = """
632
  align-items: center !important;
633
  margin: 0 !important;
634
  padding: 0 14px !important;
635
- background: #ecebff;
636
- border-bottom: 1px solid #dedcff;
637
- color: #5b55c7;
638
  }
639
  .panel-title p {
640
  margin: 0 !important;
@@ -656,7 +680,7 @@ _CSS = """
656
  .slide-frame img {
657
  height: 550px !important;
658
  object-fit: contain !important;
659
- background: #f8f9fb;
660
  }
661
  .slide-footer {
662
  padding: 0 12px 12px !important;
@@ -664,7 +688,7 @@ _CSS = """
664
  .slide-caption {
665
  min-height: 24px;
666
  margin: 0 !important;
667
- color: #667085;
668
  }
669
  .slide-index {
670
  margin: 2px 0 8px !important;
@@ -678,9 +702,13 @@ _CSS = """
678
  font-weight: 700 !important;
679
  }
680
  .nav-button button {
681
- color: #5b55c7 !important;
682
- border: 1px solid #d8d6ff !important;
683
- background: #f7f6ff !important;
 
 
 
 
684
  }
685
  .explain-button button {
686
  color: #fff !important;
@@ -795,7 +823,7 @@ _CSS = """
795
  }
796
  .mic-label {
797
  margin: 2px 0 -4px !important;
798
- color: #697386;
799
  font-size: .82rem;
800
  font-weight: 650;
801
  }
@@ -828,9 +856,18 @@ _CSS = """
828
  }
829
  .upload-copy {
830
  margin: 0 !important;
831
- color: #667085;
832
  font-size: .88rem;
833
  }
 
 
 
 
 
 
 
 
 
834
  @media (max-width: 900px) {
835
  .gradio-container { padding-inline: 12px !important; }
836
  .teaching-panel, .bottom-panel { min-width: 100% !important; }
@@ -883,7 +920,7 @@ with gr.Blocks(title="AI Prof", theme=gr.themes.Soft(), css=_CSS) as demo:
883
  with gr.Row(equal_height=True, elem_classes=["workspace-row"]):
884
  with gr.Column(scale=5, elem_classes=["panel-card", "bottom-panel"]):
885
  gr.Markdown("Lecture transcript", elem_classes=["panel-title"])
886
- status_strip = gr.HTML(value=_STATUS_IDLE)
887
  prof_audio = gr.Audio(
888
  autoplay=True,
889
  show_label=False,
 
588
  )
589
 
590
  _CSS = """
591
+ .gradio-container {
592
+ --app-text: #24283b;
593
+ --app-muted: #667085;
594
+ --app-card: #ffffff;
595
+ --app-border: #e1e5eb;
596
+ --app-panel-title: #ecebff;
597
+ --app-panel-title-border: #dedcff;
598
+ --app-accent-text: #5b55c7;
599
+ --app-slide-bg: #f8f9fb;
600
+ --app-nav-bg: #f7f6ff;
601
+ --app-nav-border: #d8d6ff;
602
+ }
603
+ .dark .gradio-container {
604
+ --app-text: #f2f4f7;
605
+ --app-muted: #a7b0c0;
606
+ --app-card: #171923;
607
+ --app-border: #303442;
608
+ --app-panel-title: #24213d;
609
+ --app-panel-title-border: #3a3560;
610
+ --app-accent-text: #c6c2ff;
611
+ --app-slide-bg: #10121a;
612
+ --app-nav-bg: #27243f;
613
+ --app-nav-border: #4b4678;
614
+ }
615
  .gradio-container {
616
  max-width: 1320px !important;
617
  margin: 0 auto !important;
 
624
  }
625
  .app-title h1 {
626
  margin: 0 0 4px !important;
627
+ color: var(--app-text);
628
  font-size: 2.15rem !important;
629
  font-weight: 800 !important;
630
  letter-spacing: -.035em;
631
  }
632
  .app-title p {
633
  margin: 0 !important;
634
+ color: var(--app-muted);
635
  font-size: 1rem;
636
  }
637
  .workspace-row {
 
644
  min-width: 0 !important;
645
  overflow: hidden;
646
  gap: 0 !important;
647
+ border: 1px solid var(--app-border);
648
  border-radius: 14px;
649
+ background: var(--app-card);
650
  }
651
  .panel-title {
652
  flex: 0 0 46px !important;
 
656
  align-items: center !important;
657
  margin: 0 !important;
658
  padding: 0 14px !important;
659
+ background: var(--app-panel-title);
660
+ border-bottom: 1px solid var(--app-panel-title-border);
661
+ color: var(--app-accent-text);
662
  }
663
  .panel-title p {
664
  margin: 0 !important;
 
680
  .slide-frame img {
681
  height: 550px !important;
682
  object-fit: contain !important;
683
+ background: var(--app-slide-bg);
684
  }
685
  .slide-footer {
686
  padding: 0 12px 12px !important;
 
688
  .slide-caption {
689
  min-height: 24px;
690
  margin: 0 !important;
691
+ color: var(--app-muted);
692
  }
693
  .slide-index {
694
  margin: 2px 0 8px !important;
 
702
  font-weight: 700 !important;
703
  }
704
  .nav-button button {
705
+ color: var(--app-accent-text) !important;
706
+ border: 1px solid var(--app-nav-border) !important;
707
+ background: var(--app-nav-bg) !important;
708
+ }
709
+ .nav-button button:hover {
710
+ border-color: #7770ef !important;
711
+ background: color-mix(in srgb, var(--app-nav-bg) 82%, #625ce7) !important;
712
  }
713
  .explain-button button {
714
  color: #fff !important;
 
823
  }
824
  .mic-label {
825
  margin: 2px 0 -4px !important;
826
+ color: var(--app-muted);
827
  font-size: .82rem;
828
  font-weight: 650;
829
  }
 
856
  }
857
  .upload-copy {
858
  margin: 0 !important;
859
+ color: var(--app-muted);
860
  font-size: .88rem;
861
  }
862
+ .dark .panel-card input,
863
+ .dark .panel-card textarea,
864
+ .dark .panel-card select {
865
+ color-scheme: dark;
866
+ }
867
+ .dark .status-strip > div[style] {
868
+ filter: brightness(.72) saturate(.9);
869
+ color: #f3f4f6 !important;
870
+ }
871
  @media (max-width: 900px) {
872
  .gradio-container { padding-inline: 12px !important; }
873
  .teaching-panel, .bottom-panel { min-width: 100% !important; }
 
920
  with gr.Row(equal_height=True, elem_classes=["workspace-row"]):
921
  with gr.Column(scale=5, elem_classes=["panel-card", "bottom-panel"]):
922
  gr.Markdown("Lecture transcript", elem_classes=["panel-title"])
923
+ status_strip = gr.HTML(value=_STATUS_IDLE, elem_classes=["status-strip"])
924
  prof_audio = gr.Audio(
925
  autoplay=True,
926
  show_label=False,
modal_app.py CHANGED
@@ -72,6 +72,10 @@ def _vllm_cmd() -> list[str]:
72
  "--max-model-len", str(MAX_MODEL_LEN),
73
  "--max-num-seqs", "8",
74
  "--tensor-parallel-size", "1",
 
 
 
 
75
  "--trust-remote-code",
76
  "--reasoning-parser", "nemotron_v3",
77
  ]
@@ -132,6 +136,7 @@ def warm() -> None:
132
  proc.kill()
133
  vllm_cache.commit()
134
  flashinfer_cache.commit()
 
135
  print("Warm complete — compile caches committed. Cold starts will now be fast.")
136
 
137
 
 
72
  "--max-model-len", str(MAX_MODEL_LEN),
73
  "--max-num-seqs", "8",
74
  "--tensor-parallel-size", "1",
75
+ # Modal Volumes mount as 9P, which vLLM does not recognize as a network
76
+ # filesystem. Force parallel prefetch instead of reading 13 shards
77
+ # serially; the serial path takes roughly ten minutes for this checkpoint.
78
+ "--safetensors-load-strategy", "prefetch",
79
  "--trust-remote-code",
80
  "--reasoning-parser", "nemotron_v3",
81
  ]
 
136
  proc.kill()
137
  vllm_cache.commit()
138
  flashinfer_cache.commit()
139
+ triton_cache.commit()
140
  print("Warm complete — compile caches committed. Cold starts will now be fast.")
141
 
142