Spaces:

Mei-Ruan
/

rct_generator

Paused

App Files Files

admin commited on Jul 22, 2025

Commit

1bb1c3c

1 Parent(s): be296fa

replace again

Browse files

Files changed (4) hide show

.gitignore +1 -1
app.py +62 -142
requirements.txt +0 -1
utils.py +0 -33

.gitignore CHANGED Viewed

@@ -1,3 +1,3 @@
-rename.sh
 test.*
 *__pycache__*

+*.gif
 test.*
 *__pycache__*

app.py CHANGED Viewed

@@ -1,23 +1,15 @@
-import os
-import imghdr
-import hashlib
-import exifread
-import gradio as gr
 import pandas as pd
-from PIL import Image
-from utils import clean_dir, compress, mk_dir, unzip, TMP_DIR, EN_US
 ZH2EN = {
-    "单图片处理": "Process single picture",
-    "上传图片": "Upload picture",
-    "导出原格式": "Export original format",
-    "下载清理 EXIF 后的图片": "Download cleaned picture",
-    "批量处理": "Batch processor",
-    "上传包含多图片的 zip 压缩包 (确保上传进度至 100% 后再提交)": "Upload pictures zip (please ensure the zip is completely uploaded before clicking submit)",
-    "导出原格式": "Export original format",
-    "下载清理 EXIF 后的多图片压缩包": "Download cleaned pictures",
-    "EXIF 列表": "EXIF list",
     "状态栏": "Status",
 }
@@ -25,147 +17,75 @@ def _L(zh_txt: str):
     return ZH2EN[zh_txt] if EN_US else zh_txt
-def get_exif(origin_file_path):
-    with open(origin_file_path, "rb") as image_file:
-        tags = exifread.process_file(image_file)
-    output = ""
-    for key in tags.keys():
-        value = str(tags[key])
-        output += "{0}:{1}\n".format(key, value)
-    return output
-def clear_exif(img_path: str, cache: str, img_mode=None, outdir=""):
-    save_path = f"{cache}/{outdir}output." + img_path.split(".")[-1]
-    img = Image.open(img_path)
-    data = list(img.getdata())
-    if img_mode:
-        save_path = f"{cache}/{outdir}{hashlib.md5(img_path.encode()).hexdigest()}.jpg"
-    else:
-        img_mode = img.mode
-    img_without_exif = Image.new(img_mode, img.size)
-    img_without_exif.putdata(data)
-    img_without_exif.save(save_path)
-    return save_path
-def find_images(dir_path: str):
-    found_images = []
-    for root, _, files in os.walk(dir_path):
-        for file in files:
-            fpath = os.path.join(root, file).replace("\\", "/")
-            if imghdr.what(fpath) != None:
-                found_images.append(fpath)
-    return found_images
 # outer func
-def infer(img_path: str, keep_ext: bool, cache=f"{TMP_DIR}/exif"):
     status = "Success"
-    out_img = out_exif = None
     try:
-        if not img_path or imghdr.what(img_path) == None:
-            raise ValueError("请输入图片！")
         clean_dir(cache)
-        img_mode = "RGB" if not keep_ext else None
-        out_img = clear_exif(img_path, cache, img_mode)
-        out_exif = get_exif(img_path)
-    except Exception as e:
-        status = f"{e}"
-    return status, out_img, out_exif
-# outer func
-def batch_infer(imgs_zip: str, keep_ext: bool, cache=f"{TMP_DIR}/exif"):
-    status = "Success"
-    out_images = out_exifs = None
-    try:
-        if not imgs_zip:
-            raise ValueError("Please upload pictures zip!")
-        clean_dir(cache)
-        mk_dir(f"{cache}/outputs")
-        extract_to = f"{cache}/inputs"
-        unzip(imgs_zip, extract_to)
-        imgs = find_images(extract_to)
-        img_mode = "RGB" if not keep_ext else None
-        exifs = []
-        for img in imgs:
-            clear_exif(img, cache, img_mode, "outputs/")
-            exifs.append(
-                {"filename": os.path.basename(img), "exif": get_exif(img)})
-        if not exifs:
-            raise ValueError("No picture in the zip")
-        out_images = f"{cache}/outputs.zip"
-        compress(f"{cache}/outputs", out_images)
-        out_exifs = pd.DataFrame(exifs)
     except Exception as e:
         status = f"{e}"
-    return status, out_images, out_exifs
 if __name__ == "__main__":
-    with gr.Blocks() as iface:
-        with gr.Tab(_L("单图片处理")):
-            gr.Interface(
-                fn=infer,
-                inputs=[
-                    gr.File(
-                        label=_L("上传图片"),
-                        file_types=["image"],
-                    ),
-                    gr.Checkbox(
-                        label=_L("导出原格式"),
-                        value=False,
-                    ),
-                ],
-                outputs=[
-                    gr.Textbox(label=_L("状态栏"), show_copy_button=True),
-                    gr.Image(
-                        label=_L("下载清理 EXIF 后的图片"),
-                        type="filepath",
-                        show_share_button=False,
-                    ),
-                    gr.Textbox(label="EXIF", show_copy_button=True),
-                ],
-                flagging_mode="never",
-            )
-        with gr.Tab(_L("批量处理")):
-            gr.Interface(
-                fn=batch_infer,
-                inputs=[
-                    gr.File(
-                        label=_L(
-                            "上传包含多图片的 zip 压缩包 (确保上传进度至 100% 后再提交)"
-                        ),
-                        file_types=[".zip"],
-                    ),
-                    gr.Checkbox(
-                        label=_L("导出原格式"),
-                        value=False,
-                    ),
-                ],
-                outputs=[
-                    gr.Textbox(label=_L("状态栏"), show_copy_button=True),
-                    gr.File(
-                        label=_L("下载清理 EXIF 后的多图片压缩包"),
-                        type="filepath",
-                    ),
-                    gr.Dataframe(label=_L("EXIF 列表")),
-                ],
-                flagging_mode="never",
-            )
-    iface.launch()

+import csv
+import random
 import pandas as pd
+import gradio as gr
+from utils import clean_dir, TMP_DIR, EN_US
 ZH2EN = {
+    "输入参与者数量": "Number of participants",
+    "输入分组比率 (格式为用:隔开的数字，生成随机分组数据)": "Grouping ratio (numbers separated by : to generate randomized controlled trial)",
     "状态栏": "Status",
+    "下载随机分组数据 CSV": "Download data CSV",
+    "随机分组数据预览": "Data preview",
 }
     return ZH2EN[zh_txt] if EN_US else zh_txt
+def list_to_csv(list_of_dicts: list, filename: str):
+    keys = dict(list_of_dicts[0]).keys()
+    # 将列表中的字典写入 CSV 文件
+    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=keys)
+        writer.writeheader()
+        for data in list_of_dicts:
+            writer.writerow(data)
+def random_allocate(participants: int, ratio: list, out_csv: str):
+    splits = [0]
+    total = sum(ratio)
+    for i, r in enumerate(ratio):
+        splits.append(splits[i] + int(1.0 * r / total * participants))
+    splits[-1] = participants
+    partist = list(range(1, participants + 1))
+    random.shuffle(partist)
+    allocation = []
+    groups = len(ratio)
+    for i in range(groups):
+        start = splits[i]
+        end = splits[i + 1]
+        for participant in partist[start:end]:
+            allocation.append({"id": participant, "group": i + 1})
+    sorted_data = sorted(allocation, key=lambda x: x["id"])
+    list_to_csv(sorted_data, out_csv)
+    return out_csv, pd.DataFrame(sorted_data)
 # outer func
+def infer(participants: float, ratios: str, cache=f"{TMP_DIR}/rct"):
+    ratio = []
     status = "Success"
+    out_csv = previews = None
     try:
+        ratio_list = ratios.split(":")
         clean_dir(cache)
+        for r in ratio_list:
+            current_ratio = float(r.strip())
+            if current_ratio > 0:
+                ratio.append(current_ratio)
+        out_csv, previews = random_allocate(
+            int(participants), ratio, f"{cache}/output.csv"
+        )
     except Exception as e:
         status = f"{e}"
+    return status, out_csv, previews
 if __name__ == "__main__":
+    gr.Interface(
+        fn=infer,
+        inputs=[
+            gr.Number(label=_L("输入参与者数量"), value=10),
+            gr.Textbox(
+                label=_L("输入分组比率 (格式为用:隔开的数字，生成随机分组数据)"),
+                value="8:1:1",
+            ),
+        ],
+        outputs=[
+            gr.Textbox(label=_L("状态栏"), show_copy_button=True),
+            gr.File(label=_L("下载随机分组数据 CSV")),
+            gr.Dataframe(label=_L("随机分组数据预览")),
+        ],
+        flagging_mode="never",
+    ).launch()

requirements.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- exifread

utils.py CHANGED Viewed

@@ -1,45 +1,12 @@
 import os
 import shutil
-import zipfile
 EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
 TMP_DIR = "./__pycache__"
-def mk_dir(dir_path: str):
-    if not os.path.exists(dir_path):
-        os.makedirs(dir_path)
 def clean_dir(dir_path: str):
     if os.path.exists(dir_path):
         shutil.rmtree(dir_path)
     os.makedirs(dir_path)
-def unzip(zip_path: str, extract_to: str):
-    mk_dir(extract_to)
-    # 打开ZIP文件
-    with zipfile.ZipFile(zip_path, "r") as zip_ref:
-        # 解压文件
-        zip_ref.extractall(extract_to)
-def compress(folder_path: str, zip_file: str):
-    # 确保文件夹存在
-    if not os.path.exists(folder_path):
-        raise ValueError(f"错误: 文件夹 '{folder_path}' 不存在")
-    # 打开 ZIP 文件，使用 'w' 模式表示写入
-    with zipfile.ZipFile(zip_file, "w", zipfile.ZIP_DEFLATED) as zipf:
-        # 遍历文件夹中的文件和子文件夹
-        for root, _, files in os.walk(folder_path):
-            for file in files:
-                file_path = os.path.join(root, file)
-                # 计算相对路径，保留文件夹的根目录
-                relative_path = os.path.relpath(file_path, folder_path)
-                zipf.write(
-                    file_path,
-                    arcname=os.path.join(os.path.basename(
-                        folder_path), relative_path),
-                )

 import os
 import shutil
 EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
 TMP_DIR = "./__pycache__"
 def clean_dir(dir_path: str):
     if os.path.exists(dir_path):
         shutil.rmtree(dir_path)
     os.makedirs(dir_path)