yokoha commited on
Commit
58e2c34
·
verified ·
1 Parent(s): c1815a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -162
app.py CHANGED
@@ -1,168 +1,156 @@
1
- import pandas as pd
2
- import numpy as np
3
- import datetime as dt
4
- import warnings
5
-
6
- from statsmodels.tsa.holtwinters import ExponentialSmoothing
7
- import plotly.graph_objects as go
8
  import gradio as gr
9
-
10
- warnings.filterwarnings("ignore")
11
-
12
- # -----------------------------
13
- # CONFIG
14
- # -----------------------------
15
- DATA_FILE = "202503-domae.parquet" # 같은 경로에 놓여 있어야 함
16
- FORECAST_END_YEAR = 2030 # 예측 종료 연도(12월까지)
17
- SEASONAL_PERIODS = 12 # 월별 seasonality
18
-
19
- # -----------------------------
20
- # 1. 데이터 적재 & 전처리
21
- # -----------------------------
22
-
23
- def load_data(path: str) -> pd.DataFrame:
24
- """Parquet 월별 피벗 테이블(DateIndex, 열: 품목, 값: 가격)."""
25
- df = pd.read_parquet(path)
26
-
27
- # 날짜 컬럼 생성/정규화 (두 가지 케이스 지원)
28
- if "date" in df.columns:
29
- df["date"] = pd.to_datetime(df["date"])
30
- elif "PRCE_REG_MM" in df.columns:
31
- df["date"] = pd.to_datetime(df["PRCE_REG_MM"].astype(str), format="%Y%m")
32
- else:
33
- raise ValueError("지원되지 않는 날짜 컬럼 형식입니다.")
34
-
35
- # 기본 컬럼명 통일
36
- item_col = "PDLT_NM" if "PDLT_NM" in df.columns else "item"
37
- price_col = "AVRG_PRCE" if "AVRG_PRCE" in df.columns else "price"
38
-
39
- monthly = (
40
- df.groupby(["date", item_col])[price_col]
41
- .mean()
42
- .reset_index()
43
- )
44
- pivot = (
45
- monthly
46
- .pivot(index="date", columns=item_col, values=price_col)
47
- .sort_index()
48
- )
49
- # 월 시작일 MS 빈도로 정렬
50
- pivot.index = pd.to_datetime(pivot.index).to_period("M").to_timestamp()
51
- return pivot
52
-
53
- pivot = load_data(DATA_FILE)
54
- products = pivot.columns.tolist()
55
-
56
- # -----------------------------
57
- # 2. 고유 모델 정의 (Holt‑Winters + fallback)
58
- # -----------------------------
59
-
60
- def _fit_forecast(series: pd.Series) -> pd.Series:
61
- """월별 시계열 → 2025‑04 이후 FORECAST_END_YEAR‑12까지 예측."""
62
- # Ensure Monthly Start frequency
63
- series = series.asfreq("MS")
64
-
65
- # 예측 기간 계산
66
- last_date = series.index[-1]
67
- end_date = dt.datetime(FORECAST_END_YEAR, 12, 1)
68
- horizon = (end_date.year - last_date.year) * 12 + (end_date.month - last_date.month)
69
- if horizon <= 0:
70
- return pd.Series(dtype=float)
71
-
72
  try:
73
- model = ExponentialSmoothing(
74
- series,
75
- trend="add",
76
- seasonal="mul",
77
- seasonal_periods=SEASONAL_PERIODS,
78
- initialization_method="estimated",
79
- )
80
- res = model.fit(optimized=True)
81
- fc = res.forecast(horizon)
82
- except Exception:
83
- # 홀트윈터스 학습 실패 시 단순 CAGR 기반 예측
84
- growth = series.pct_change().fillna(0).mean()
85
- fc = pd.Series(
86
- [series.iloc[-1] * (1 + growth) ** i for i in range(1, horizon + 1)],
87
- index=pd.date_range(
88
- series.index[-1] + pd.DateOffset(months=1),
89
- periods=horizon,
90
- freq="MS",
91
- ),
92
- )
93
- return fc
94
-
95
- # 품목별 전체 시리즈(과거+예측) 사전 구축 → 앱 반응 속도 개선
96
- FULL_SERIES = {}
97
- FORECASTS = {}
98
- for item in products:
99
- hist = pivot[item].dropna()
100
- fc = _fit_forecast(hist)
101
- FULL_SERIES[item] = pd.concat([hist, fc])
102
- FORECASTS[item] = fc
103
-
104
- # -----------------------------
105
- # 3. 내일 가격 예측 함수
106
- # -----------------------------
107
-
108
- today = dt.date.today()
109
- tomorrow = today + dt.timedelta(days=1)
110
-
111
- def build_tomorrow_df() -> pd.DataFrame:
112
- """내일(일 단위) 예상 가격 DataFrame 반환."""
113
- preds = {}
114
- for item, series in FULL_SERIES.items():
115
- # 일 단위 선형 보간
116
- daily = series.resample("D").interpolate("linear")
117
- preds[item] = round(daily.loc[tomorrow], 2) if tomorrow in daily.index else np.nan
118
- return (
119
- pd.DataFrame.from_dict(preds, orient="index", columns=[f"내일({tomorrow}) 예상가(KRW)"])
120
- .sort_index()
 
121
  )
122
-
123
- tomorrow_df = build_tomorrow_df()
124
-
125
- # -----------------------------
126
- # 4. 시각화 함수
127
- # -----------------------------
128
-
129
- def plot_item(item: str):
130
- hist = pivot[item].dropna().asfreq("MS")
131
- fc = FORECASTS[item]
132
-
133
- fig = go.Figure()
134
- fig.add_trace(go.Scatter(x=hist.index, y=hist.values, mode="lines", name="Historical"))
135
- fig.add_trace(go.Scatter(x=fc.index, y=fc.values, mode="lines", name="Forecast"))
136
- fig.update_layout(
137
- title=f"{item} – Monthly Avg Price (1996‑2025) & Forecast(2025‑04→2030‑12)",
138
- xaxis_title="Date",
139
- yaxis_title="Price (KRW)",
140
- legend=dict(orientation="h", y=1.02, x=0.01),
141
- margin=dict(l=40, r=20, t=60, b=40),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  )
143
- return fig
144
-
145
- # -----------------------------
146
- # 5. Gradio UI
147
- # -----------------------------
148
- with gr.Blocks(title="도매 가격 예측 App") as demo:
149
- gr.Markdown("## 📈 도매 가격 예측 대시보드 (1996‑2030)")
150
-
151
- # 품목 선택 → 그래프 업데이트
152
- item_dd = gr.Dropdown(products, value=products[0], label="품목 선택")
153
- chart_out = gr.Plot(label="가격 추세")
154
-
155
- # 내일 가격 표 (초기 고정)
156
- gr.Markdown(f"### 내일({tomorrow}) 각 품목 예상가 (KRW)")
157
- tomorrow_table = gr.Dataframe(tomorrow_df, interactive=False, height=400)
158
-
159
- def update_chart(product):
160
- return plot_item(product)
161
-
162
- item_dd.change(update_chart, inputs=item_dd, outputs=chart_out, queue=False)
163
 
164
- # -----------------------------
165
- # 6. 실행 스크립트 엔트리포인트
166
- # -----------------------------
167
  if __name__ == "__main__":
168
- demo.launch()
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import tempfile
5
+ import chardet
6
+
7
+ def detect_encoding(file_path):
8
+ """
9
+ Function to detect file encoding
10
+ """
11
+ with open(file_path, 'rb') as f:
12
+ result = chardet.detect(f.read())
13
+ return result['encoding']
14
+
15
+ def merge_csv_files(files):
16
+ """
17
+ Function to merge multiple CSV files into one
18
+
19
+ Args:
20
+ files: List of uploaded CSV files
21
+
22
+ Returns:
23
+ Path to the merged CSV file and status message
24
+ """
25
+ if not files or len(files) == 0:
26
+ return None, "No files were uploaded. Please select CSV files to merge."
27
+
28
+ if len(files) > 30:
29
+ return None, "Maximum 30 files can be merged at once."
30
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ # Read all files into DataFrame list
33
+ dataframes = []
34
+ for file in files:
35
+ # Detect file encoding
36
+ encoding = detect_encoding(file.name)
37
+ try:
38
+ df = pd.read_csv(file.name, encoding=encoding)
39
+ except UnicodeDecodeError:
40
+ # Try other encodings if detected encoding fails
41
+ encodings_to_try = ['cp949', 'euc-kr', 'latin1', 'ISO-8859-1']
42
+ for enc in encodings_to_try:
43
+ try:
44
+ df = pd.read_csv(file.name, encoding=enc)
45
+ break
46
+ except UnicodeDecodeError:
47
+ continue
48
+ else:
49
+ return None, f"Could not determine encoding for '{os.path.basename(file.name)}'."
50
+
51
+ dataframes.append(df)
52
+
53
+ # Merge all DataFrames
54
+ if dataframes:
55
+ merged_df = pd.concat(dataframes, ignore_index=True)
56
+
57
+ # Save to temporary file
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
59
+ output_path = tmp.name
60
+
61
+ # Save merged data in Excel-compatible format (UTF-8 with BOM)
62
+ merged_df.to_csv(output_path, index=False, encoding='utf-8-sig')
63
+
64
+ return output_path, f"Successfully merged {len(files)} files. Please open with UTF-8 encoding in Excel."
65
+ else:
66
+ return None, "No data to merge."
67
+
68
+ except Exception as e:
69
+ return None, f"Error occurred: {str(e)}"
70
+
71
+ # Create a stylish Gradio interface
72
+ with gr.Blocks(title="CSVFusion") as app:
73
+ gr.Markdown(
74
+ """
75
+ # 📊 CSVFusion: Intelligent File Merger
76
+
77
+ *Seamlessly combine multiple CSV files into one unified dataset*
78
+
79
+ ---
80
+ """
81
  )
82
+
83
+ with gr.Row():
84
+ with gr.Column(scale=2):
85
+ gr.Markdown("""
86
+ ### How to use CSVFusion:
87
+ 1. Upload up to 30 CSV files using the panel on the right
88
+ 2. Click the "Merge Files" button
89
+ 3. Download your consolidated CSV file
90
+
91
+ ### Features:
92
+ - Automatic encoding detection
93
+ - Handles various CSV formats
94
+ - Excel-compatible output (UTF-8)
95
+ - Preserves all data columns
96
+ """)
97
+
98
+ with gr.Column(scale=3):
99
+ input_files = gr.File(
100
+ file_count="multiple",
101
+ label="Upload CSV Files (Max 30)",
102
+ file_types=[".csv"],
103
+ elem_id="file_upload"
104
+ )
105
+
106
+ with gr.Row():
107
+ merge_button = gr.Button("Merge Files", variant="primary", size="lg")
108
+
109
+ with gr.Row():
110
+ with gr.Column():
111
+ status = gr.Textbox(label="Status", placeholder="Ready to merge your files...")
112
+ with gr.Column():
113
+ output_file = gr.File(label="Download Merged CSV")
114
+
115
+ # Add custom CSS for better visual appeal
116
+ gr.HTML("""
117
+ <style>
118
+ .gradio-container {
119
+ background: linear-gradient(to right, #f9f9f9, #ffffff);
120
+ border-radius: 12px;
121
+ }
122
+ #file_upload {
123
+ border: 2px dashed #3498db;
124
+ border-radius: 8px;
125
+ padding: 20px;
126
+ transition: all 0.3s;
127
+ }
128
+ #file_upload:hover {
129
+ border-color: #2980b9;
130
+ box-shadow: 0 0 10px rgba(52, 152, 219, 0.3);
131
+ }
132
+ .footer {
133
+ text-align: center;
134
+ margin-top: 30px;
135
+ color: #7f8c8d;
136
+ font-size: 0.9em;
137
+ }
138
+ </style>
139
+ """)
140
+
141
+ # Add footer
142
+ gr.HTML("""
143
+ <div class="footer">
144
+ <p>CSVFusion © 2025 - A powerful tool for data professionals</p>
145
+ </div>
146
+ """)
147
+
148
+ merge_button.click(
149
+ fn=merge_csv_files,
150
+ inputs=[input_files],
151
+ outputs=[output_file, status]
152
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ # Run the app
 
 
155
  if __name__ == "__main__":
156
+ app.launch()