Spaces:
Running
Running
Remove development status from README
Browse files- README.md +0 -32
- app.py +52 -15
- modal_app.py +5 -0
README.md
CHANGED
|
@@ -119,35 +119,3 @@ or granting the app write access.
|
|
| 119 |
Do not enable remote writes for arbitrary uploads to a public dataset. Processed
|
| 120 |
slides can contain the original lecture material. Use a private dataset for personal
|
| 121 |
testing or only prewarm material you have permission to publish.
|
| 122 |
-
|
| 123 |
-
## Status
|
| 124 |
-
**Professor agent loop working:** upload a lecture PDF → render and index the complete
|
| 125 |
-
deck → Nemotron plans short teaching beats → the orchestrator executes validated slide
|
| 126 |
-
navigation and whiteboard tools → VoxCPM speaks the explanation. Student questions
|
| 127 |
-
cancel active narration, can send the professor to a more relevant slide, and can add
|
| 128 |
-
supporting notes or equations to the whiteboard.
|
| 129 |
-
|
| 130 |
-
Targeted vision via `look_closer` and richer Excalidraw diagrams remain follow-up work.
|
| 131 |
-
|
| 132 |
-
Layout: [`app.py`](app.py) (Gradio UI) · [`ai_prof/pdf_utils.py`](ai_prof/pdf_utils.py) (PDF→slides) ·
|
| 133 |
-
[`ai_prof/vision.py`](ai_prof/vision.py) (eyes) · [`ai_prof/brain.py`](ai_prof/brain.py) (brain) ·
|
| 134 |
-
[`ai_prof/config.py`](ai_prof/config.py) (endpoints / mock fallback).
|
| 135 |
-
|
| 136 |
-
The inference services run on Modal: MiniCPM-V through llama.cpp, Nemotron
|
| 137 |
-
through vLLM, VoxCPM2 through vLLM-Omni, and distil-Whisper through a small
|
| 138 |
-
OpenAI-compatible FastAPI server.
|
| 139 |
-
The Gradio frontend remains deployable as a Hugging Face Space.
|
| 140 |
-
|
| 141 |
-
Run the focused test suite with:
|
| 142 |
-
|
| 143 |
-
```bash
|
| 144 |
-
.venv/bin/python -m unittest discover -s tests -v
|
| 145 |
-
```
|
| 146 |
-
|
| 147 |
-
Next up (see [IDEATION.md](IDEATION.md)): prepared-deck storage on Hugging Face,
|
| 148 |
-
animated structured diagrams, stronger cancellation during browser audio playback,
|
| 149 |
-
and targeted `look_closer` vision.
|
| 150 |
-
|
| 151 |
-
The current target design is documented in [ARCHITECTURE.md](ARCHITECTURE.md), including complete-deck
|
| 152 |
-
indexing, the professor tool loop, a preprocessed demo lecture, synchronized speech and whiteboard actions,
|
| 153 |
-
and interruption/resume behavior.
|
|
|
|
| 119 |
Do not enable remote writes for arbitrary uploads to a public dataset. Processed
|
| 120 |
slides can contain the original lecture material. Use a private dataset for personal
|
| 121 |
testing or only prewarm material you have permission to publish.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -588,6 +588,30 @@ _BANNER = (
|
|
| 588 |
)
|
| 589 |
|
| 590 |
_CSS = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
.gradio-container {
|
| 592 |
max-width: 1320px !important;
|
| 593 |
margin: 0 auto !important;
|
|
@@ -600,14 +624,14 @@ _CSS = """
|
|
| 600 |
}
|
| 601 |
.app-title h1 {
|
| 602 |
margin: 0 0 4px !important;
|
| 603 |
-
color:
|
| 604 |
font-size: 2.15rem !important;
|
| 605 |
font-weight: 800 !important;
|
| 606 |
letter-spacing: -.035em;
|
| 607 |
}
|
| 608 |
.app-title p {
|
| 609 |
margin: 0 !important;
|
| 610 |
-
color:
|
| 611 |
font-size: 1rem;
|
| 612 |
}
|
| 613 |
.workspace-row {
|
|
@@ -620,9 +644,9 @@ _CSS = """
|
|
| 620 |
min-width: 0 !important;
|
| 621 |
overflow: hidden;
|
| 622 |
gap: 0 !important;
|
| 623 |
-
border: 1px solid
|
| 624 |
border-radius: 14px;
|
| 625 |
-
background:
|
| 626 |
}
|
| 627 |
.panel-title {
|
| 628 |
flex: 0 0 46px !important;
|
|
@@ -632,9 +656,9 @@ _CSS = """
|
|
| 632 |
align-items: center !important;
|
| 633 |
margin: 0 !important;
|
| 634 |
padding: 0 14px !important;
|
| 635 |
-
background:
|
| 636 |
-
border-bottom: 1px solid
|
| 637 |
-
color:
|
| 638 |
}
|
| 639 |
.panel-title p {
|
| 640 |
margin: 0 !important;
|
|
@@ -656,7 +680,7 @@ _CSS = """
|
|
| 656 |
.slide-frame img {
|
| 657 |
height: 550px !important;
|
| 658 |
object-fit: contain !important;
|
| 659 |
-
background:
|
| 660 |
}
|
| 661 |
.slide-footer {
|
| 662 |
padding: 0 12px 12px !important;
|
|
@@ -664,7 +688,7 @@ _CSS = """
|
|
| 664 |
.slide-caption {
|
| 665 |
min-height: 24px;
|
| 666 |
margin: 0 !important;
|
| 667 |
-
color:
|
| 668 |
}
|
| 669 |
.slide-index {
|
| 670 |
margin: 2px 0 8px !important;
|
|
@@ -678,9 +702,13 @@ _CSS = """
|
|
| 678 |
font-weight: 700 !important;
|
| 679 |
}
|
| 680 |
.nav-button button {
|
| 681 |
-
color:
|
| 682 |
-
border: 1px solid
|
| 683 |
-
background:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
}
|
| 685 |
.explain-button button {
|
| 686 |
color: #fff !important;
|
|
@@ -795,7 +823,7 @@ _CSS = """
|
|
| 795 |
}
|
| 796 |
.mic-label {
|
| 797 |
margin: 2px 0 -4px !important;
|
| 798 |
-
color:
|
| 799 |
font-size: .82rem;
|
| 800 |
font-weight: 650;
|
| 801 |
}
|
|
@@ -828,9 +856,18 @@ _CSS = """
|
|
| 828 |
}
|
| 829 |
.upload-copy {
|
| 830 |
margin: 0 !important;
|
| 831 |
-
color:
|
| 832 |
font-size: .88rem;
|
| 833 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 834 |
@media (max-width: 900px) {
|
| 835 |
.gradio-container { padding-inline: 12px !important; }
|
| 836 |
.teaching-panel, .bottom-panel { min-width: 100% !important; }
|
|
@@ -883,7 +920,7 @@ with gr.Blocks(title="AI Prof", theme=gr.themes.Soft(), css=_CSS) as demo:
|
|
| 883 |
with gr.Row(equal_height=True, elem_classes=["workspace-row"]):
|
| 884 |
with gr.Column(scale=5, elem_classes=["panel-card", "bottom-panel"]):
|
| 885 |
gr.Markdown("Lecture transcript", elem_classes=["panel-title"])
|
| 886 |
-
status_strip = gr.HTML(value=_STATUS_IDLE)
|
| 887 |
prof_audio = gr.Audio(
|
| 888 |
autoplay=True,
|
| 889 |
show_label=False,
|
|
|
|
| 588 |
)
|
| 589 |
|
| 590 |
_CSS = """
|
| 591 |
+
.gradio-container {
|
| 592 |
+
--app-text: #24283b;
|
| 593 |
+
--app-muted: #667085;
|
| 594 |
+
--app-card: #ffffff;
|
| 595 |
+
--app-border: #e1e5eb;
|
| 596 |
+
--app-panel-title: #ecebff;
|
| 597 |
+
--app-panel-title-border: #dedcff;
|
| 598 |
+
--app-accent-text: #5b55c7;
|
| 599 |
+
--app-slide-bg: #f8f9fb;
|
| 600 |
+
--app-nav-bg: #f7f6ff;
|
| 601 |
+
--app-nav-border: #d8d6ff;
|
| 602 |
+
}
|
| 603 |
+
.dark .gradio-container {
|
| 604 |
+
--app-text: #f2f4f7;
|
| 605 |
+
--app-muted: #a7b0c0;
|
| 606 |
+
--app-card: #171923;
|
| 607 |
+
--app-border: #303442;
|
| 608 |
+
--app-panel-title: #24213d;
|
| 609 |
+
--app-panel-title-border: #3a3560;
|
| 610 |
+
--app-accent-text: #c6c2ff;
|
| 611 |
+
--app-slide-bg: #10121a;
|
| 612 |
+
--app-nav-bg: #27243f;
|
| 613 |
+
--app-nav-border: #4b4678;
|
| 614 |
+
}
|
| 615 |
.gradio-container {
|
| 616 |
max-width: 1320px !important;
|
| 617 |
margin: 0 auto !important;
|
|
|
|
| 624 |
}
|
| 625 |
.app-title h1 {
|
| 626 |
margin: 0 0 4px !important;
|
| 627 |
+
color: var(--app-text);
|
| 628 |
font-size: 2.15rem !important;
|
| 629 |
font-weight: 800 !important;
|
| 630 |
letter-spacing: -.035em;
|
| 631 |
}
|
| 632 |
.app-title p {
|
| 633 |
margin: 0 !important;
|
| 634 |
+
color: var(--app-muted);
|
| 635 |
font-size: 1rem;
|
| 636 |
}
|
| 637 |
.workspace-row {
|
|
|
|
| 644 |
min-width: 0 !important;
|
| 645 |
overflow: hidden;
|
| 646 |
gap: 0 !important;
|
| 647 |
+
border: 1px solid var(--app-border);
|
| 648 |
border-radius: 14px;
|
| 649 |
+
background: var(--app-card);
|
| 650 |
}
|
| 651 |
.panel-title {
|
| 652 |
flex: 0 0 46px !important;
|
|
|
|
| 656 |
align-items: center !important;
|
| 657 |
margin: 0 !important;
|
| 658 |
padding: 0 14px !important;
|
| 659 |
+
background: var(--app-panel-title);
|
| 660 |
+
border-bottom: 1px solid var(--app-panel-title-border);
|
| 661 |
+
color: var(--app-accent-text);
|
| 662 |
}
|
| 663 |
.panel-title p {
|
| 664 |
margin: 0 !important;
|
|
|
|
| 680 |
.slide-frame img {
|
| 681 |
height: 550px !important;
|
| 682 |
object-fit: contain !important;
|
| 683 |
+
background: var(--app-slide-bg);
|
| 684 |
}
|
| 685 |
.slide-footer {
|
| 686 |
padding: 0 12px 12px !important;
|
|
|
|
| 688 |
.slide-caption {
|
| 689 |
min-height: 24px;
|
| 690 |
margin: 0 !important;
|
| 691 |
+
color: var(--app-muted);
|
| 692 |
}
|
| 693 |
.slide-index {
|
| 694 |
margin: 2px 0 8px !important;
|
|
|
|
| 702 |
font-weight: 700 !important;
|
| 703 |
}
|
| 704 |
.nav-button button {
|
| 705 |
+
color: var(--app-accent-text) !important;
|
| 706 |
+
border: 1px solid var(--app-nav-border) !important;
|
| 707 |
+
background: var(--app-nav-bg) !important;
|
| 708 |
+
}
|
| 709 |
+
.nav-button button:hover {
|
| 710 |
+
border-color: #7770ef !important;
|
| 711 |
+
background: color-mix(in srgb, var(--app-nav-bg) 82%, #625ce7) !important;
|
| 712 |
}
|
| 713 |
.explain-button button {
|
| 714 |
color: #fff !important;
|
|
|
|
| 823 |
}
|
| 824 |
.mic-label {
|
| 825 |
margin: 2px 0 -4px !important;
|
| 826 |
+
color: var(--app-muted);
|
| 827 |
font-size: .82rem;
|
| 828 |
font-weight: 650;
|
| 829 |
}
|
|
|
|
| 856 |
}
|
| 857 |
.upload-copy {
|
| 858 |
margin: 0 !important;
|
| 859 |
+
color: var(--app-muted);
|
| 860 |
font-size: .88rem;
|
| 861 |
}
|
| 862 |
+
.dark .panel-card input,
|
| 863 |
+
.dark .panel-card textarea,
|
| 864 |
+
.dark .panel-card select {
|
| 865 |
+
color-scheme: dark;
|
| 866 |
+
}
|
| 867 |
+
.dark .status-strip > div[style] {
|
| 868 |
+
filter: brightness(.72) saturate(.9);
|
| 869 |
+
color: #f3f4f6 !important;
|
| 870 |
+
}
|
| 871 |
@media (max-width: 900px) {
|
| 872 |
.gradio-container { padding-inline: 12px !important; }
|
| 873 |
.teaching-panel, .bottom-panel { min-width: 100% !important; }
|
|
|
|
| 920 |
with gr.Row(equal_height=True, elem_classes=["workspace-row"]):
|
| 921 |
with gr.Column(scale=5, elem_classes=["panel-card", "bottom-panel"]):
|
| 922 |
gr.Markdown("Lecture transcript", elem_classes=["panel-title"])
|
| 923 |
+
status_strip = gr.HTML(value=_STATUS_IDLE, elem_classes=["status-strip"])
|
| 924 |
prof_audio = gr.Audio(
|
| 925 |
autoplay=True,
|
| 926 |
show_label=False,
|
modal_app.py
CHANGED
|
@@ -72,6 +72,10 @@ def _vllm_cmd() -> list[str]:
|
|
| 72 |
"--max-model-len", str(MAX_MODEL_LEN),
|
| 73 |
"--max-num-seqs", "8",
|
| 74 |
"--tensor-parallel-size", "1",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
"--trust-remote-code",
|
| 76 |
"--reasoning-parser", "nemotron_v3",
|
| 77 |
]
|
|
@@ -132,6 +136,7 @@ def warm() -> None:
|
|
| 132 |
proc.kill()
|
| 133 |
vllm_cache.commit()
|
| 134 |
flashinfer_cache.commit()
|
|
|
|
| 135 |
print("Warm complete — compile caches committed. Cold starts will now be fast.")
|
| 136 |
|
| 137 |
|
|
|
|
| 72 |
"--max-model-len", str(MAX_MODEL_LEN),
|
| 73 |
"--max-num-seqs", "8",
|
| 74 |
"--tensor-parallel-size", "1",
|
| 75 |
+
# Modal Volumes mount as 9P, which vLLM does not recognize as a network
|
| 76 |
+
# filesystem. Force parallel prefetch instead of reading 13 shards
|
| 77 |
+
# serially; the serial path takes roughly ten minutes for this checkpoint.
|
| 78 |
+
"--safetensors-load-strategy", "prefetch",
|
| 79 |
"--trust-remote-code",
|
| 80 |
"--reasoning-parser", "nemotron_v3",
|
| 81 |
]
|
|
|
|
| 136 |
proc.kill()
|
| 137 |
vllm_cache.commit()
|
| 138 |
flashinfer_cache.commit()
|
| 139 |
+
triton_cache.commit()
|
| 140 |
print("Warm complete — compile caches committed. Cold starts will now be fast.")
|
| 141 |
|
| 142 |
|