Spaces:

awacke1
/

RescuerOfStolenBikes

Sleeping

App Files Files Community

awacke1 commited on Dec 17, 2024

Commit

bb0696e

verified ·

1 Parent(s): 5a4281c

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -46

app.py CHANGED Viewed

@@ -33,8 +33,8 @@ st.set_page_config(
     }
 )
 load_dotenv()
-openai.api_key = os.getenv('OPENAI_API_KEY') or st.secrets['OPENAI_API_KEY']
-anthropic_key = os.getenv("ANTHROPIC_API_KEY_3") or st.secrets["ANTHROPIC_API_KEY"]
 claude_client = anthropic.Anthropic(api_key=anthropic_key)
 openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
 HF_KEY = os.getenv('HF_KEY')
@@ -80,21 +80,36 @@ FILE_EMOJIS = {
     "mp3": "🎵",
 }
 def generate_filename(prompt, file_type="md"):
     ctz = pytz.timezone('US/Central')
-    date_str = datetime.now(ctz).strftime("%m%d_%H%M")
     safe = re.sub(r'[<>:"/\\\\|?*\n]', ' ', prompt)
     safe = re.sub(r'\s+', ' ', safe).strip()[:90]
     return f"{date_str}_{safe}.{file_type}"
 def create_file(filename, prompt, response):
-    # Creating file does not trigger immediate rerun
     with open(filename, 'w', encoding='utf-8') as f:
         f.write(prompt + "\n\n" + response)
 def get_download_link(file):
     with open(file, "rb") as f:
         b64 = base64.b64encode(f.read()).decode()
     return f'<a href="data:file/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
 @st.cache_resource
@@ -110,7 +125,7 @@ def speech_synthesis_html(result):
     components.html(html_code, height=0)
 async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
-    # Just create mp3 file, no immediate rerun
     if not text.strip():
         return None
     rate_str = f"{rate:+d}%"
@@ -150,7 +165,7 @@ def process_audio(audio_path):
     with open(audio_path, "rb") as f:
         transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
     st.session_state.messages.append({"role": "user", "content": transcription.text})
-    # No immediate rerun
     return transcription.text
 def process_video(video_path, seconds_per_frame=1):
@@ -204,13 +219,16 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
     st.markdown(result)
     if vocal_summary:
-        audio_file_main = speak_with_edge_tts(r2, voice="en-US-AriaNeural", rate=0, pitch=0)
         st.write("### 🎙️ Vocal Summary (Short Answer)")
         play_and_download_audio(audio_file_main)
     if extended_refs:
         summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
-        audio_file_refs = speak_with_edge_tts(summaries_text, voice="en-US-AriaNeural", rate=0, pitch=0)
         st.write("### 📜 Extended References & Summaries")
         play_and_download_audio(audio_file_refs)
@@ -222,7 +240,8 @@ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary
                 titles.append(m.group(1))
         if titles:
             titles_text = "Here are the titles of the papers: " + ", ".join(titles)
-            audio_file_titles = speak_with_edge_tts(titles_text, voice="en-US-AriaNeural", rate=0, pitch=0)
             st.write("### 🔖 Paper Titles")
             play_and_download_audio(audio_file_titles)
@@ -274,9 +293,7 @@ def create_zip_of_files(md_files, mp3_files):
         return None
     # Build a descriptive name from file stems
     stems = [os.path.splitext(os.path.basename(f))[0] for f in all_files]
-    # Join them
     joined = "_".join(stems)
-    # Truncate if too long
     if len(joined) > 50:
         joined = joined[:50] + "_etc"
     zip_name = f"{joined}.zip"
@@ -300,59 +317,74 @@ def load_files_for_sidebar():
     # Exclude README.md from listings
     md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
-    files_by_ext = defaultdict(list)
-    if md_files: files_by_ext['md'].extend(md_files)
-    if mp3_files: files_by_ext['mp3'].extend(mp3_files)
-    # Sort each extension group by modification time descending
-    for ext in files_by_ext:
-        files_by_ext[ext].sort(key=lambda x: os.path.getmtime(x), reverse=True)
-    return files_by_ext
-def display_file_manager_sidebar(files_by_ext):
-    st.sidebar.title("🎵 Audio & Document Manager")
-    md_files = files_by_ext.get('md', [])
-    mp3_files = files_by_ext.get('mp3', [])
-    # Buttons to delete all except README.md (already excluded)
-    col_del = st.sidebar.columns(3)
-    with col_del[0]:
         if st.button("🗑 Del All MD"):
             for f in md_files:
                 os.remove(f)
             st.session_state.should_rerun = True
-    with col_del[1]:
         if st.button("🗑 Del All MP3"):
             for f in mp3_files:
                 os.remove(f)
             st.session_state.should_rerun = True
-    with col_del[2]:
         if st.button("⬇️ Zip All"):
-            # create a zip of all md and mp3 except README.md
             z = create_zip_of_files(md_files, mp3_files)
             if z:
                 st.sidebar.markdown(get_download_link(z),unsafe_allow_html=True)
-    ext_counts = {ext: len(files) for ext, files in files_by_ext.items()}
-    sorted_ext = sorted(files_by_ext.keys(), key=lambda x: ext_counts[x], reverse=True)
-    # Display files with actions
-    for ext in sorted_ext:
-        emoji = FILE_EMOJIS.get(ext, "📦")
-        count = len(files_by_ext[ext])
-        with st.sidebar.expander(f"{emoji} {ext.upper()} Files ({count})", expanded=True):
-            for f in files_by_ext[ext]:
                 fname = os.path.basename(f)
                 ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
-                # Show filename and buttons in a row
                 st.write(f"**{fname}** - {ctime}")
                 file_buttons_col = st.columns([1,1,1])
                 with file_buttons_col[0]:
                     if st.button("👀View", key="view_"+f):
                         st.session_state.viewing_file = f
                         st.session_state.viewing_file_type = ext
-                        # No rerun needed, just set state
                 with file_buttons_col[1]:
                     if ext == "md":
                         if st.button("✏️Edit", key="edit_"+f):
@@ -409,12 +441,16 @@ def main():
                 col1,col2,col3=st.columns(3)
                 with col1:
                     st.subheader("GPT-4o Omni:")
-                    try: process_with_gpt(user_input)
-                    except: st.write('GPT 4o error')
                 with col2:
                     st.subheader("Claude-3 Sonnet:")
-                    try: process_with_claude(user_input)
-                    except: st.write('Claude error')
                 with col3:
                     st.subheader("Arxiv + Mistral:")
                     try:
@@ -514,10 +550,10 @@ def main():
             st.write("Select a file from the sidebar to edit.")
     # After main content, load files and display in sidebar
-    files_by_ext = load_files_for_sidebar()
-    display_file_manager_sidebar(files_by_ext)
-    # If viewing a file, show its content below (in the main area)
     if st.session_state.viewing_file and os.path.exists(st.session_state.viewing_file):
         st.write("---")
         st.write(f"**Viewing File:** {os.path.basename(st.session_state.viewing_file)}")

     }
 )
 load_dotenv()
+openai.api_key = os.getenv('OPENAI_API_KEY') or st.secrets.get('OPENAI_API_KEY', "")
+anthropic_key = os.getenv("ANTHROPIC_API_KEY_3") or st.secrets.get("ANTHROPIC_API_KEY", "")
 claude_client = anthropic.Anthropic(api_key=anthropic_key)
 openai_client = OpenAI(api_key=openai.api_key, organization=os.getenv('OPENAI_ORG_ID'))
 HF_KEY = os.getenv('HF_KEY')
     "mp3": "🎵",
 }
+def clean_for_speech(text: str) -> str:
+    # Remove \n
+    text = text.replace("\n", " ")
+    # Remove </s>
+    text = text.replace("</s>", " ")
+    # Remove markdown headings (#)
+    text = text.replace("#", "")
+    # Remove links of the form (https://...)
+    text = re.sub(r"\(https?:\/\/[^\)]+\)", "", text)
+    # Collapse multiple spaces
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
 def generate_filename(prompt, file_type="md"):
+    # Include seconds to get a 10-character prefix: %m%d%H%M%S results in exactly 10 chars
     ctz = pytz.timezone('US/Central')
+    date_str = datetime.now(ctz).strftime("%m%d%H%M%S")
     safe = re.sub(r'[<>:"/\\\\|?*\n]', ' ', prompt)
     safe = re.sub(r'\s+', ' ', safe).strip()[:90]
     return f"{date_str}_{safe}.{file_type}"
 def create_file(filename, prompt, response):
     with open(filename, 'w', encoding='utf-8') as f:
         f.write(prompt + "\n\n" + response)
+    # No immediate rerun. Changes will appear next load.
 def get_download_link(file):
     with open(file, "rb") as f:
         b64 = base64.b64encode(f.read()).decode()
+    # It's a zip file download
     return f'<a href="data:file/zip;base64,{b64}" download="{os.path.basename(file)}">📂 Download {os.path.basename(file)}</a>'
 @st.cache_resource
     components.html(html_code, height=0)
 async def edge_tts_generate_audio(text, voice="en-US-AriaNeural", rate=0, pitch=0):
+    text = clean_for_speech(text)
     if not text.strip():
         return None
     rate_str = f"{rate:+d}%"
     with open(audio_path, "rb") as f:
         transcription = openai_client.audio.transcriptions.create(model="whisper-1", file=f)
     st.session_state.messages.append({"role": "user", "content": transcription.text})
+    # No immediate rerun.
     return transcription.text
 def process_video(video_path, seconds_per_frame=1):
     st.markdown(result)
     if vocal_summary:
+        # Clean before speech
+        main_text = clean_for_speech(r2)
+        audio_file_main = speak_with_edge_tts(main_text)
         st.write("### 🎙️ Vocal Summary (Short Answer)")
         play_and_download_audio(audio_file_main)
     if extended_refs:
         summaries_text = "Here are the summaries from the references: " + refs.replace('"','')
+        summaries_text = clean_for_speech(summaries_text)
+        audio_file_refs = speak_with_edge_tts(summaries_text)
         st.write("### 📜 Extended References & Summaries")
         play_and_download_audio(audio_file_refs)
                 titles.append(m.group(1))
         if titles:
             titles_text = "Here are the titles of the papers: " + ", ".join(titles)
+            titles_text = clean_for_speech(titles_text)
+            audio_file_titles = speak_with_edge_tts(titles_text)
             st.write("### 🔖 Paper Titles")
             play_and_download_audio(audio_file_titles)
         return None
     # Build a descriptive name from file stems
     stems = [os.path.splitext(os.path.basename(f))[0] for f in all_files]
     joined = "_".join(stems)
     if len(joined) > 50:
         joined = joined[:50] + "_etc"
     zip_name = f"{joined}.zip"
     # Exclude README.md from listings
     md_files = [f for f in md_files if os.path.basename(f).lower() != 'readme.md']
+    all_files = md_files + mp3_files
+    # Group by first 10 chars of filename
+    # Note: We assume all files have at least 10 chars before underscore from generate_filename
+    groups = defaultdict(list)
+    for f in all_files:
+        fname = os.path.basename(f)
+        prefix = fname[:10]  # first 10 chars
+        groups[prefix].append(f)
+    # Sort groups by mod time of the newest file in that group
+    # Also sort files within each group by mod time descending
+    for prefix in groups:
+        groups[prefix].sort(key=lambda x: os.path.getmtime(x), reverse=True)
+    # Sort prefix keys by latest mod time of their newest file
+    sorted_prefixes = sorted(groups.keys(), key=lambda pre: max(os.path.getmtime(x) for x in groups[pre]), reverse=True)
+    return groups, sorted_prefixes
+def display_file_manager_sidebar(groups, sorted_prefixes):
+    st.sidebar.title("🎵 Audio & Document Manager")
+    # Collect all files except README.md
+    md_files = []
+    mp3_files = []
+    for prefix in groups:
+        for f in groups[prefix]:
+            if f.endswith(".md"):
+                md_files.append(f)
+            elif f.endswith(".mp3"):
+                mp3_files.append(f)
+    top_bar = st.sidebar.columns(3)
+    with top_bar[0]:
         if st.button("🗑 Del All MD"):
             for f in md_files:
                 os.remove(f)
             st.session_state.should_rerun = True
+    with top_bar[1]:
         if st.button("🗑 Del All MP3"):
             for f in mp3_files:
                 os.remove(f)
             st.session_state.should_rerun = True
+    with top_bar[2]:
         if st.button("⬇️ Zip All"):
             z = create_zip_of_files(md_files, mp3_files)
             if z:
                 st.sidebar.markdown(get_download_link(z),unsafe_allow_html=True)
+    # Display groups in expanders
+    for prefix in sorted_prefixes:
+        files = groups[prefix]
+        # Determine file types inside the group
+        # Just show the prefix + number of files
+        exp = st.sidebar.expander(f"{prefix} Files ({len(files)})", expanded=True)
+        with exp:
+            # Files sorted by mod time descending
+            for f in files:
                 fname = os.path.basename(f)
                 ctime = datetime.fromtimestamp(os.path.getmtime(f)).strftime("%Y-%m-%d %H:%M:%S")
+                ext = os.path.splitext(fname)[1].lower().strip('.')
                 st.write(f"**{fname}** - {ctime}")
                 file_buttons_col = st.columns([1,1,1])
                 with file_buttons_col[0]:
                     if st.button("👀View", key="view_"+f):
                         st.session_state.viewing_file = f
                         st.session_state.viewing_file_type = ext
+                        # Just update state, no rerun
                 with file_buttons_col[1]:
                     if ext == "md":
                         if st.button("✏️Edit", key="edit_"+f):
                 col1,col2,col3=st.columns(3)
                 with col1:
                     st.subheader("GPT-4o Omni:")
+                    try:
+                        process_with_gpt(user_input)
+                    except:
+                        st.write('GPT 4o error')
                 with col2:
                     st.subheader("Claude-3 Sonnet:")
+                    try:
+                        process_with_claude(user_input)
+                    except:
+                        st.write('Claude error')
                 with col3:
                     st.subheader("Arxiv + Mistral:")
                     try:
             st.write("Select a file from the sidebar to edit.")
     # After main content, load files and display in sidebar
+    groups, sorted_prefixes = load_files_for_sidebar()
+    display_file_manager_sidebar(groups, sorted_prefixes)
+    # If viewing a file, show its content or audio below
     if st.session_state.viewing_file and os.path.exists(st.session_state.viewing_file):
         st.write("---")
         st.write(f"**Viewing File:** {os.path.basename(st.session_state.viewing_file)}")