fix: Transition to cumulative yielding in Python backend and replacement rendering in JS frontend to prevent token duplication
Browse files- app.py +3 -1
- index.html +1 -1
app.py
CHANGED
|
@@ -49,9 +49,11 @@ async def chat(messages_json: str, temperature: float = 0.7, max_tokens: int = 1
|
|
| 49 |
stream=True,
|
| 50 |
)
|
| 51 |
|
|
|
|
| 52 |
async for chunk in stream:
|
| 53 |
if chunk.choices and chunk.choices[0].delta.content is not None:
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
except Exception as e:
|
| 57 |
yield f"Error calling Hugging Face Router: {str(e)}"
|
|
|
|
| 49 |
stream=True,
|
| 50 |
)
|
| 51 |
|
| 52 |
+
bot_response = ""
|
| 53 |
async for chunk in stream:
|
| 54 |
if chunk.choices and chunk.choices[0].delta.content is not None:
|
| 55 |
+
bot_response += chunk.choices[0].delta.content
|
| 56 |
+
yield bot_response
|
| 57 |
|
| 58 |
except Exception as e:
|
| 59 |
yield f"Error calling Hugging Face Router: {str(e)}"
|
index.html
CHANGED
|
@@ -1668,7 +1668,7 @@
|
|
| 1668 |
}
|
| 1669 |
|
| 1670 |
const chunk = message.data[0];
|
| 1671 |
-
botResponse
|
| 1672 |
|
| 1673 |
if (botBubble) {
|
| 1674 |
botBubble.innerHTML = parseMarkdown(botResponse);
|
|
|
|
| 1668 |
}
|
| 1669 |
|
| 1670 |
const chunk = message.data[0];
|
| 1671 |
+
botResponse = chunk;
|
| 1672 |
|
| 1673 |
if (botBubble) {
|
| 1674 |
botBubble.innerHTML = parseMarkdown(botResponse);
|