Spaces:

yokoha
/

CSV

Running

App Files Files Community

yokoha commited on Apr 26, 2025

Commit

58e2c34

verified ·

1 Parent(s): c1815a9

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -162

app.py CHANGED Viewed

@@ -1,168 +1,156 @@
-import pandas as pd
-import numpy as np
-import datetime as dt
-import warnings
-from statsmodels.tsa.holtwinters import ExponentialSmoothing
-import plotly.graph_objects as go
 import gradio as gr
-warnings.filterwarnings("ignore")
-# -----------------------------
-# CONFIG
-# -----------------------------
-DATA_FILE = "202503-domae.parquet"  # 같은 경로에 놓여 있어야 함
-FORECAST_END_YEAR = 2030            # 예측 종료 연도(12월까지)
-SEASONAL_PERIODS = 12               # 월별 seasonality
-# -----------------------------
-# 1. 데이터 적재 & 전처리
-# -----------------------------
-def load_data(path: str) -> pd.DataFrame:
-    """Parquet → 월별 피벗 테이블(DateIndex, 열: 품목, 값: 가격)."""
-    df = pd.read_parquet(path)
-    # 날짜 컬럼 생성/정규화 (두 가지 케이스 지원)
-    if "date" in df.columns:
-        df["date"] = pd.to_datetime(df["date"])
-    elif "PRCE_REG_MM" in df.columns:
-        df["date"] = pd.to_datetime(df["PRCE_REG_MM"].astype(str), format="%Y%m")
-    else:
-        raise ValueError("지원되지 않는 날짜 컬럼 형식입니다.")
-    # 기본 컬럼명 통일
-    item_col = "PDLT_NM" if "PDLT_NM" in df.columns else "item"
-    price_col = "AVRG_PRCE" if "AVRG_PRCE" in df.columns else "price"
-    monthly = (
-        df.groupby(["date", item_col])[price_col]
-        .mean()
-        .reset_index()
-    )
-    pivot = (
-        monthly
-        .pivot(index="date", columns=item_col, values=price_col)
-        .sort_index()
-    )
-    # 월 시작일 MS 빈도로 정렬
-    pivot.index = pd.to_datetime(pivot.index).to_period("M").to_timestamp()
-    return pivot
-pivot = load_data(DATA_FILE)
-products = pivot.columns.tolist()
-# -----------------------------
-# 2. 고유 모델 정의 (Holt‑Winters + fallback)
-# -----------------------------
-def _fit_forecast(series: pd.Series) -> pd.Series:
-    """월별 시계열 → 2025‑04 이후 FORECAST_END_YEAR‑12까지 예측."""
-    # Ensure Monthly Start frequency
-    series = series.asfreq("MS")
-    # 예측 기간 계산
-    last_date = series.index[-1]
-    end_date = dt.datetime(FORECAST_END_YEAR, 12, 1)
-    horizon = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
-    if horizon <= 0:
-        return pd.Series(dtype=float)
     try:
-        model = ExponentialSmoothing(
-            series,
-            trend="add",
-            seasonal="mul",
-            seasonal_periods=SEASONAL_PERIODS,
-            initialization_method="estimated",
-        )
-        res = model.fit(optimized=True)
-        fc = res.forecast(horizon)
-    except Exception:
-        # 홀트윈터스 학습 실패 시 단순 CAGR 기반 예측
-        growth = series.pct_change().fillna(0).mean()
-        fc = pd.Series(
-            [series.iloc[-1] * (1 + growth) ** i for i in range(1, horizon + 1)],
-            index=pd.date_range(
-                series.index[-1] + pd.DateOffset(months=1),
-                periods=horizon,
-                freq="MS",
-            ),
-        )
-    return fc
-# 품목별 전체 시리즈(과거+예측) 사전 구축 → 앱 반응 속도 개선
-FULL_SERIES = {}
-FORECASTS = {}
-for item in products:
-    hist = pivot[item].dropna()
-    fc = _fit_forecast(hist)
-    FULL_SERIES[item] = pd.concat([hist, fc])
-    FORECASTS[item] = fc
-# -----------------------------
-# 3. 내일 가격 예측 함수
-# -----------------------------
-today = dt.date.today()
-tomorrow = today + dt.timedelta(days=1)
-def build_tomorrow_df() -> pd.DataFrame:
-    """내일(일 단위) 예상 가격 DataFrame 반환."""
-    preds = {}
-    for item, series in FULL_SERIES.items():
-        # 일 단위 선형 보간
-        daily = series.resample("D").interpolate("linear")
-        preds[item] = round(daily.loc[tomorrow], 2) if tomorrow in daily.index else np.nan
-    return (
-        pd.DataFrame.from_dict(preds, orient="index", columns=[f"내일({tomorrow}) 예상가(KRW)"])
-        .sort_index()
     )
-tomorrow_df = build_tomorrow_df()
-# -----------------------------
-# 4. 시각화 함수
-# -----------------------------
-def plot_item(item: str):
-    hist = pivot[item].dropna().asfreq("MS")
-    fc = FORECASTS[item]
-    fig = go.Figure()
-    fig.add_trace(go.Scatter(x=hist.index, y=hist.values, mode="lines", name="Historical"))
-    fig.add_trace(go.Scatter(x=fc.index, y=fc.values, mode="lines", name="Forecast"))
-    fig.update_layout(
-        title=f"{item} – Monthly Avg Price (1996‑2025) & Forecast(2025‑04→2030‑12)",
-        xaxis_title="Date",
-        yaxis_title="Price (KRW)",
-        legend=dict(orientation="h", y=1.02, x=0.01),
-        margin=dict(l=40, r=20, t=60, b=40),
     )
-    return fig
-# -----------------------------
-# 5. Gradio UI
-# -----------------------------
-with gr.Blocks(title="도매 가격 예측 App") as demo:
-    gr.Markdown("## 📈 도매 가격 예측 대시보드 (1996‑2030)")
-    # 품목 선택 → 그래프 업데이트
-    item_dd = gr.Dropdown(products, value=products[0], label="품목 선택")
-    chart_out = gr.Plot(label="가격 추세")
-    # 내일 가격 표 (초기 고정)
-    gr.Markdown(f"### 내일({tomorrow}) 각 품목 예상가 (KRW)")
-    tomorrow_table = gr.Dataframe(tomorrow_df, interactive=False, height=400)
-    def update_chart(product):
-        return plot_item(product)
-    item_dd.change(update_chart, inputs=item_dd, outputs=chart_out, queue=False)
-# -----------------------------
-# 6. 실행 스크립트 엔트리포인트
-# -----------------------------
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import pandas as pd
+import os
+import tempfile
+import chardet
+def detect_encoding(file_path):
+    """
+    Function to detect file encoding
+    """
+    with open(file_path, 'rb') as f:
+        result = chardet.detect(f.read())
+    return result['encoding']
+def merge_csv_files(files):
+    """
+    Function to merge multiple CSV files into one
+    Args:
+        files: List of uploaded CSV files
+    Returns:
+        Path to the merged CSV file and status message
+    """
+    if not files or len(files) == 0:
+        return None, "No files were uploaded. Please select CSV files to merge."
+    if len(files) > 30:
+        return None, "Maximum 30 files can be merged at once."
     try:
+        # Read all files into DataFrame list
+        dataframes = []
+        for file in files:
+            # Detect file encoding
+            encoding = detect_encoding(file.name)
+            try:
+                df = pd.read_csv(file.name, encoding=encoding)
+            except UnicodeDecodeError:
+                # Try other encodings if detected encoding fails
+                encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
+                for enc in encodings_to_try:
+                    try:
+                        df = pd.read_csv(file.name, encoding=enc)
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                else:
+                    return None, f"Could not determine encoding for '{os.path.basename(file.name)}'."
+            dataframes.append(df)
+        # Merge all DataFrames
+        if dataframes:
+            merged_df = pd.concat(dataframes, ignore_index=True)
+            # Save to temporary file
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+                output_path = tmp.name
+            # Save merged data in Excel-compatible format (UTF-8 with BOM)
+            merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
+            return output_path, f"Successfully merged {len(files)} files. Please open with UTF-8 encoding in Excel."
+        else:
+            return None, "No data to merge."
+    except Exception as e:
+        return None, f"Error occurred: {str(e)}"
+# Create a stylish Gradio interface
+with gr.Blocks(title="CSVFusion") as app:
+    gr.Markdown(
+        """
+        # 📊 CSVFusion: Intelligent File Merger
+        *Seamlessly combine multiple CSV files into one unified dataset*
+        ---
+        """
     )
+    with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("""
+            ### How to use CSVFusion:
+            1. Upload up to 30 CSV files using the panel on the right
+            2. Click the "Merge Files" button
+            3. Download your consolidated CSV file
+            ### Features:
+            - Automatic encoding detection
+            - Handles various CSV formats
+            - Excel-compatible output (UTF-8)
+            - Preserves all data columns
+            """)
+        with gr.Column(scale=3):
+            input_files = gr.File(
+                file_count="multiple",
+                label="Upload CSV Files (Max 30)",
+                file_types=[".csv"],
+                elem_id="file_upload"
+            )
+    with gr.Row():
+        merge_button = gr.Button("Merge Files", variant="primary", size="lg")
+    with gr.Row():
+        with gr.Column():
+            status = gr.Textbox(label="Status", placeholder="Ready to merge your files...")
+        with gr.Column():
+            output_file = gr.File(label="Download Merged CSV")
+    # Add custom CSS for better visual appeal
+    gr.HTML("""
+    <style>
+    .gradio-container {
+        background: linear-gradient(to right, #f9f9f9, #ffffff);
+        border-radius: 12px;
+    }
+    #file_upload {
+        border: 2px dashed #3498db;
+        border-radius: 8px;
+        padding: 20px;
+        transition: all 0.3s;
+    }
+    #file_upload:hover {
+        border-color: #2980b9;
+        box-shadow: 0 0 10px rgba(52, 152, 219, 0.3);
+    }
+    .footer {
+        text-align: center;
+        margin-top: 30px;
+        color: #7f8c8d;
+        font-size: 0.9em;
+    }
+    </style>
+    """)
+    # Add footer
+    gr.HTML("""
+    <div class="footer">
+        <p>CSVFusion © 2025 - A powerful tool for data professionals</p>
+    </div>
+    """)
+    merge_button.click(
+        fn=merge_csv_files,
+        inputs=[input_files],
+        outputs=[output_file, status]
     )
+# Run the app
 if __name__ == "__main__":
+    app.launch()