Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,26 +28,54 @@ st.set_page_config(
|
|
| 28 |
initial_sidebar_state="expanded"
|
| 29 |
)
|
| 30 |
|
| 31 |
-
# CSS mejorado
|
| 32 |
st.markdown("""
|
| 33 |
<style>
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
</style>
|
| 45 |
""", unsafe_allow_html=True)
|
| 46 |
|
| 47 |
# ==================== FUNCIONES AUXILIARES ====================
|
| 48 |
|
| 49 |
def parse_ingredient_string(ing_str):
|
| 50 |
-
"""Parsear cadena de ingredientes
|
| 51 |
try:
|
| 52 |
if isinstance(ing_str, str):
|
| 53 |
items = [item.strip() for item in ing_str.split(',') if item.strip()]
|
|
@@ -57,10 +85,9 @@ def parse_ingredient_string(ing_str):
|
|
| 57 |
return []
|
| 58 |
|
| 59 |
def parse_instruction_string(instr_str):
|
| 60 |
-
"""Parsear instrucciones
|
| 61 |
try:
|
| 62 |
if isinstance(instr_str, str):
|
| 63 |
-
# Dividir por puntos o números
|
| 64 |
steps = re.split(r'\.\s*|\n\s*', instr_str)
|
| 65 |
steps = [step.strip() for step in steps if step.strip()]
|
| 66 |
return steps
|
|
@@ -68,13 +95,23 @@ def parse_instruction_string(instr_str):
|
|
| 68 |
except Exception:
|
| 69 |
return [str(instr_str)]
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
@st.cache_resource(show_spinner="Cargando modelo de embeddings...")
|
| 72 |
def load_embedding_model():
|
| 73 |
-
return SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') # Multilingual
|
| 74 |
|
| 75 |
@st.cache_resource(show_spinner="Cargando modelo para chatbot...")
|
| 76 |
def load_chat_model():
|
| 77 |
-
return pipeline("text-generation", model="flax-community/gpt-2-spanish") #
|
| 78 |
|
| 79 |
@lru_cache(maxsize=1000)
|
| 80 |
def get_chat_response(query, context=""):
|
|
@@ -84,43 +121,33 @@ def get_chat_response(query, context=""):
|
|
| 84 |
response = model(prompt, max_length=150, num_return_sequences=1)[0]['generated_text']
|
| 85 |
return response.split("Asistente: ")[-1].strip()
|
| 86 |
|
| 87 |
-
def parse_duration_to_minutes(dur_str):
|
| 88 |
-
"""Convertir HH:MM a minutos"""
|
| 89 |
-
try:
|
| 90 |
-
if isinstance(dur_str, str) and ':' in dur_str:
|
| 91 |
-
h, m = map(int, dur_str.split(':'))
|
| 92 |
-
return h * 60 + m
|
| 93 |
-
return 0
|
| 94 |
-
except:
|
| 95 |
-
return 0
|
| 96 |
-
|
| 97 |
# ==================== CARGA DE DATOS ====================
|
| 98 |
|
| 99 |
@st.cache_data(show_spinner="Cargando y procesando datos de recetas...")
|
| 100 |
def load_and_preprocess_data():
|
| 101 |
-
"""Carga y preprocesa
|
| 102 |
try:
|
| 103 |
st.info("Descargando dataset de recetas españolas... Esto puede tomar unos segundos.")
|
| 104 |
ds = load_dataset("somosnlp/RecetasDeLaAbuela")
|
| 105 |
df = ds['train'].to_pandas()
|
| 106 |
|
| 107 |
-
# Limitar para rendimiento
|
| 108 |
df = df.head(8000).copy()
|
| 109 |
|
| 110 |
-
# Procesar ingredientes
|
| 111 |
df['ingredients_parsed'] = df['Ingredientes'].apply(parse_ingredient_string)
|
| 112 |
df['ingredients_str'] = df['ingredients_parsed'].apply(lambda x: ' '.join(x).lower())
|
| 113 |
|
| 114 |
-
# Procesar instrucciones
|
| 115 |
df['instructions_parsed'] = df['Pasos'].apply(parse_instruction_string)
|
| 116 |
|
| 117 |
-
# Procesar tiempo
|
| 118 |
df['total_minutes'] = df['Duracion'].apply(parse_duration_to_minutes)
|
| 119 |
|
| 120 |
-
#
|
| 121 |
df['is_healthy'] = df['Valor nutricional'].str.contains('Bajo en calorías|Bajo en grasas|vegetarianos|vegano', na=False, case=False)
|
| 122 |
|
| 123 |
-
# Pre-calcular embeddings
|
| 124 |
st.info("Calculando embeddings multilingües para búsqueda rápida...")
|
| 125 |
model = load_embedding_model()
|
| 126 |
ingredients_texts = df['ingredients_str'].tolist()
|
|
@@ -142,7 +169,7 @@ def load_and_preprocess_data():
|
|
| 142 |
# ==================== FUNCIONES DE RECOMENDACIÓN ====================
|
| 143 |
|
| 144 |
def recommend_recipes_optimized(user_ingredients, category="", top_k=5, is_healthy=True, is_vegan=False, max_time=60):
|
| 145 |
-
"""Recomendación
|
| 146 |
try:
|
| 147 |
if df.empty:
|
| 148 |
return pd.DataFrame()
|
|
@@ -186,7 +213,7 @@ with st.spinner("Cargando base de datos..."):
|
|
| 186 |
if df.empty:
|
| 187 |
st.stop()
|
| 188 |
|
| 189 |
-
# Barra lateral
|
| 190 |
with st.sidebar:
|
| 191 |
st.header("Filtros")
|
| 192 |
max_time = st.slider("Tiempo máximo (minutos)", 10, 120, 60)
|
|
@@ -194,7 +221,7 @@ with st.sidebar:
|
|
| 194 |
is_vegan = st.checkbox("Vegano", value=False)
|
| 195 |
category = st.text_input("Categoría (ej. postres)")
|
| 196 |
|
| 197 |
-
# Entrada usuario
|
| 198 |
user_input = st.text_input("Ingresa ingredientes (separados por comas, en español):", "tomate, cebolla, pollo")
|
| 199 |
user_ingredients = [i.strip().lower() for i in user_input.split(',') if i.strip()]
|
| 200 |
|
|
@@ -207,27 +234,27 @@ if st.button("Buscar Recetas"):
|
|
| 207 |
st.markdown(f"### {row['Nombre']}")
|
| 208 |
st.write(f"**Tiempo:** {row['Duracion']} | **Porciones:** {row.get('Comensales', 'N/A')} | **Nutrición:** {row['Valor nutricional']}")
|
| 209 |
|
| 210 |
-
# Tabla de ingredientes
|
| 211 |
ing_df = pd.DataFrame(row['ingredients_parsed'], columns=["Ingrediente"])
|
| 212 |
-
st.
|
| 213 |
|
| 214 |
-
# Instrucciones
|
| 215 |
for i, step in enumerate(row['instructions_parsed'], 1):
|
| 216 |
-
with st.expander(f"Paso {i}"):
|
| 217 |
st.write(step)
|
| 218 |
|
| 219 |
-
# Gráfico simple
|
| 220 |
fig = px.bar(x=['Tiempo Total'], y=[row['total_minutes']])
|
| 221 |
st.plotly_chart(fig, use_container_width=True)
|
| 222 |
|
| 223 |
-
#
|
| 224 |
-
st.header("Chatbot de Consejos")
|
| 225 |
-
chat_input = st.chat_input("Pregunta sobre una receta o
|
| 226 |
if chat_input:
|
| 227 |
with st.chat_message("user"):
|
| 228 |
st.markdown(chat_input)
|
| 229 |
-
#
|
| 230 |
-
similar_recs = recommend_recipes_optimized(
|
| 231 |
context = similar_recs['Pasos'].iloc[0] if not similar_recs.empty else ""
|
| 232 |
response = get_chat_response(chat_input, context)
|
| 233 |
with st.chat_message("assistant"):
|
|
|
|
| 28 |
initial_sidebar_state="expanded"
|
| 29 |
)
|
| 30 |
|
| 31 |
+
# CSS mejorado (mantengo tu estilo, agrego para nuevas presentaciones)
|
| 32 |
st.markdown("""
|
| 33 |
<style>
|
| 34 |
+
.main {background-color: #f8f9fa;}
|
| 35 |
+
.stButton>button {
|
| 36 |
+
background-color: #28a745;
|
| 37 |
+
color: white;
|
| 38 |
+
border-radius: 10px;
|
| 39 |
+
padding: 0.5rem 1rem;
|
| 40 |
+
font-weight: 600;
|
| 41 |
+
border: none;
|
| 42 |
+
transition: all 0.3s;
|
| 43 |
+
}
|
| 44 |
+
.stButton>button:hover {
|
| 45 |
+
background-color: #218838;
|
| 46 |
+
transform: translateY(-2px);
|
| 47 |
+
box-shadow: 0 4px 12px rgba(40, 167, 69, 0.2);
|
| 48 |
+
}
|
| 49 |
+
.recipe-card {
|
| 50 |
+
background: white;
|
| 51 |
+
border-radius: 10px;
|
| 52 |
+
padding: 1.5rem;
|
| 53 |
+
margin: 1rem 0;
|
| 54 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 55 |
+
border-left: 4px solid #28a745;
|
| 56 |
+
}
|
| 57 |
+
.highlight {
|
| 58 |
+
background-color: #e8f5e9;
|
| 59 |
+
padding: 0.5rem;
|
| 60 |
+
border-radius: 5px;
|
| 61 |
+
margin: 0.5rem 0;
|
| 62 |
+
}
|
| 63 |
+
.ingredient-item {
|
| 64 |
+
padding: 0.3rem 0;
|
| 65 |
+
border-bottom: 1px solid #eee;
|
| 66 |
+
}
|
| 67 |
+
.instruction-step {
|
| 68 |
+
margin: 0.5rem 0;
|
| 69 |
+
padding-left: 1rem;
|
| 70 |
+
border-left: 3px solid #28a745;
|
| 71 |
+
}
|
| 72 |
</style>
|
| 73 |
""", unsafe_allow_html=True)
|
| 74 |
|
| 75 |
# ==================== FUNCIONES AUXILIARES ====================
|
| 76 |
|
| 77 |
def parse_ingredient_string(ing_str):
|
| 78 |
+
"""Parsear cadena de ingredientes (adaptado para nuevo dataset: split por comas)"""
|
| 79 |
try:
|
| 80 |
if isinstance(ing_str, str):
|
| 81 |
items = [item.strip() for item in ing_str.split(',') if item.strip()]
|
|
|
|
| 85 |
return []
|
| 86 |
|
| 87 |
def parse_instruction_string(instr_str):
|
| 88 |
+
"""Parsear instrucciones (adaptado: split por puntos o líneas)"""
|
| 89 |
try:
|
| 90 |
if isinstance(instr_str, str):
|
|
|
|
| 91 |
steps = re.split(r'\.\s*|\n\s*', instr_str)
|
| 92 |
steps = [step.strip() for step in steps if step.strip()]
|
| 93 |
return steps
|
|
|
|
| 95 |
except Exception:
|
| 96 |
return [str(instr_str)]
|
| 97 |
|
| 98 |
+
def parse_duration_to_minutes(dur_str):
|
| 99 |
+
"""Convertir HH:MM a minutos (para nuevo dataset)"""
|
| 100 |
+
try:
|
| 101 |
+
if isinstance(dur_str, str) and ':' in dur_str:
|
| 102 |
+
h, m = map(int, dur_str.split(':'))
|
| 103 |
+
return h * 60 + m
|
| 104 |
+
return 0
|
| 105 |
+
except:
|
| 106 |
+
return 0
|
| 107 |
+
|
| 108 |
@st.cache_resource(show_spinner="Cargando modelo de embeddings...")
|
| 109 |
def load_embedding_model():
|
| 110 |
+
return SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') # Multilingual para español y sinónimos
|
| 111 |
|
| 112 |
@st.cache_resource(show_spinner="Cargando modelo para chatbot...")
|
| 113 |
def load_chat_model():
|
| 114 |
+
return pipeline("text-generation", model="flax-community/gpt-2-spanish") # Modelo en español para consejos
|
| 115 |
|
| 116 |
@lru_cache(maxsize=1000)
|
| 117 |
def get_chat_response(query, context=""):
|
|
|
|
| 121 |
response = model(prompt, max_length=150, num_return_sequences=1)[0]['generated_text']
|
| 122 |
return response.split("Asistente: ")[-1].strip()
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
# ==================== CARGA DE DATOS ====================
|
| 125 |
|
| 126 |
@st.cache_data(show_spinner="Cargando y procesando datos de recetas...")
|
| 127 |
def load_and_preprocess_data():
|
| 128 |
+
"""Carga y preprocesa (adaptado a dataset español, mantengo optimizaciones originales)"""
|
| 129 |
try:
|
| 130 |
st.info("Descargando dataset de recetas españolas... Esto puede tomar unos segundos.")
|
| 131 |
ds = load_dataset("somosnlp/RecetasDeLaAbuela")
|
| 132 |
df = ds['train'].to_pandas()
|
| 133 |
|
| 134 |
+
# Limitar para rendimiento (como en original)
|
| 135 |
df = df.head(8000).copy()
|
| 136 |
|
| 137 |
+
# Procesar ingredientes (adaptado)
|
| 138 |
df['ingredients_parsed'] = df['Ingredientes'].apply(parse_ingredient_string)
|
| 139 |
df['ingredients_str'] = df['ingredients_parsed'].apply(lambda x: ' '.join(x).lower())
|
| 140 |
|
| 141 |
+
# Procesar instrucciones (adaptado)
|
| 142 |
df['instructions_parsed'] = df['Pasos'].apply(parse_instruction_string)
|
| 143 |
|
| 144 |
+
# Procesar tiempo (adaptado)
|
| 145 |
df['total_minutes'] = df['Duracion'].apply(parse_duration_to_minutes)
|
| 146 |
|
| 147 |
+
# Filtro saludable (adaptado a 'Valor nutricional')
|
| 148 |
df['is_healthy'] = df['Valor nutricional'].str.contains('Bajo en calorías|Bajo en grasas|vegetarianos|vegano', na=False, case=False)
|
| 149 |
|
| 150 |
+
# Pre-calcular embeddings (como en original, pero multilingual)
|
| 151 |
st.info("Calculando embeddings multilingües para búsqueda rápida...")
|
| 152 |
model = load_embedding_model()
|
| 153 |
ingredients_texts = df['ingredients_str'].tolist()
|
|
|
|
| 169 |
# ==================== FUNCIONES DE RECOMENDACIÓN ====================
|
| 170 |
|
| 171 |
def recommend_recipes_optimized(user_ingredients, category="", top_k=5, is_healthy=True, is_vegan=False, max_time=60):
|
| 172 |
+
"""Recomendación optimizada (mantengo lógica original, adapto filtros a nuevo dataset)"""
|
| 173 |
try:
|
| 174 |
if df.empty:
|
| 175 |
return pd.DataFrame()
|
|
|
|
| 213 |
if df.empty:
|
| 214 |
st.stop()
|
| 215 |
|
| 216 |
+
# Barra lateral (mantengo original)
|
| 217 |
with st.sidebar:
|
| 218 |
st.header("Filtros")
|
| 219 |
max_time = st.slider("Tiempo máximo (minutos)", 10, 120, 60)
|
|
|
|
| 221 |
is_vegan = st.checkbox("Vegano", value=False)
|
| 222 |
category = st.text_input("Categoría (ej. postres)")
|
| 223 |
|
| 224 |
+
# Entrada usuario (mantengo)
|
| 225 |
user_input = st.text_input("Ingresa ingredientes (separados por comas, en español):", "tomate, cebolla, pollo")
|
| 226 |
user_ingredients = [i.strip().lower() for i in user_input.split(',') if i.strip()]
|
| 227 |
|
|
|
|
| 234 |
st.markdown(f"### {row['Nombre']}")
|
| 235 |
st.write(f"**Tiempo:** {row['Duracion']} | **Porciones:** {row.get('Comensales', 'N/A')} | **Nutrición:** {row['Valor nutricional']}")
|
| 236 |
|
| 237 |
+
# Mejora: Tabla de ingredientes
|
| 238 |
ing_df = pd.DataFrame(row['ingredients_parsed'], columns=["Ingrediente"])
|
| 239 |
+
st.dataframe(ing_df, use_container_width=True)
|
| 240 |
|
| 241 |
+
# Mejora: Instrucciones en expanders
|
| 242 |
for i, step in enumerate(row['instructions_parsed'], 1):
|
| 243 |
+
with st.expander(f"Paso {i}:"):
|
| 244 |
st.write(step)
|
| 245 |
|
| 246 |
+
# Gráfico simple (mantengo original)
|
| 247 |
fig = px.bar(x=['Tiempo Total'], y=[row['total_minutes']])
|
| 248 |
st.plotly_chart(fig, use_container_width=True)
|
| 249 |
|
| 250 |
+
# Mejora: Sección de Chatbot
|
| 251 |
+
st.header("Chatbot de Consejos para Recetas")
|
| 252 |
+
chat_input = st.chat_input("Pregunta sobre una receta o procedimiento (ej: ¿cómo hacer esto vegano?):")
|
| 253 |
if chat_input:
|
| 254 |
with st.chat_message("user"):
|
| 255 |
st.markdown(chat_input)
|
| 256 |
+
# RAG-like: Contexto de receta similar
|
| 257 |
+
similar_recs = recommend_recipes_optimized(user_ingredients, top_k=1)
|
| 258 |
context = similar_recs['Pasos'].iloc[0] if not similar_recs.empty else ""
|
| 259 |
response = get_chat_response(chat_input, context)
|
| 260 |
with st.chat_message("assistant"):
|