Spaces:
Runtime error
Runtime error
Maki commited on
Commit ·
08d41d2
unverified ·
0
Parent(s):
Initial commit
Browse files- .SourceSageignore +54 -0
- .dockerignore +56 -0
- .github/workflows/sync-to-hf.yml +32 -0
- .github/workflows/sync-to-report-gh.yml +52 -0
- .gitignore +208 -0
- Dockerfile +28 -0
- LICENSE +21 -0
- README.md +174 -0
- app.py +431 -0
- docker-compose.dev.yml +25 -0
- docker-compose.yml +27 -0
- requirements.txt +4 -0
- theme.py +44 -0
.SourceSageignore
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# バージョン管理システム関連
|
| 2 |
+
.git/
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# キャッシュファイル
|
| 6 |
+
__pycache__/
|
| 7 |
+
.pytest_cache/
|
| 8 |
+
**/__pycache__/**
|
| 9 |
+
*.pyc
|
| 10 |
+
|
| 11 |
+
# ビルド・配布関連
|
| 12 |
+
build/
|
| 13 |
+
dist/
|
| 14 |
+
*.egg-info/
|
| 15 |
+
|
| 16 |
+
# 一時ファイル・出力
|
| 17 |
+
output/
|
| 18 |
+
output.md
|
| 19 |
+
test_output/
|
| 20 |
+
.SourceSageAssets/
|
| 21 |
+
.SourceSageAssetsDemo/
|
| 22 |
+
|
| 23 |
+
# アセット
|
| 24 |
+
*.png
|
| 25 |
+
*.svg
|
| 26 |
+
*.jpg
|
| 27 |
+
*.jepg
|
| 28 |
+
assets/
|
| 29 |
+
|
| 30 |
+
# その他
|
| 31 |
+
LICENSE
|
| 32 |
+
example/
|
| 33 |
+
package-lock.json
|
| 34 |
+
.DS_Store
|
| 35 |
+
|
| 36 |
+
# 特定のディレクトリを除外
|
| 37 |
+
tests/temp/
|
| 38 |
+
docs/drafts/
|
| 39 |
+
|
| 40 |
+
# パターンの例外(除外対象から除外)
|
| 41 |
+
!docs/important.md
|
| 42 |
+
!.github/workflows/
|
| 43 |
+
repository_summary.md
|
| 44 |
+
|
| 45 |
+
# Terraform関連
|
| 46 |
+
.terraform
|
| 47 |
+
*.terraform.lock.hcl
|
| 48 |
+
*.backup
|
| 49 |
+
*.tfstate
|
| 50 |
+
|
| 51 |
+
# Python仮想環境
|
| 52 |
+
venv
|
| 53 |
+
.venv
|
| 54 |
+
|
.dockerignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git関連
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# Python関連
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
*.egg-info/
|
| 24 |
+
.installed.cfg
|
| 25 |
+
*.egg
|
| 26 |
+
|
| 27 |
+
# 仮想環境
|
| 28 |
+
venv/
|
| 29 |
+
env/
|
| 30 |
+
ENV/
|
| 31 |
+
|
| 32 |
+
# IDE関連
|
| 33 |
+
.vscode/
|
| 34 |
+
.idea/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
|
| 38 |
+
# OS関連
|
| 39 |
+
.DS_Store
|
| 40 |
+
Thumbs.db
|
| 41 |
+
|
| 42 |
+
# ログファイル
|
| 43 |
+
*.log
|
| 44 |
+
|
| 45 |
+
# 一時ファイル
|
| 46 |
+
*.tmp
|
| 47 |
+
*.temp
|
| 48 |
+
|
| 49 |
+
# Docker関連
|
| 50 |
+
Dockerfile*
|
| 51 |
+
docker-compose*
|
| 52 |
+
.dockerignore
|
| 53 |
+
|
| 54 |
+
# ドキュメント
|
| 55 |
+
README.md
|
| 56 |
+
LICENSE
|
.github/workflows/sync-to-hf.yml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- main
|
| 7 |
+
- master
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
sync-to-hf:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
steps:
|
| 14 |
+
- name: Checkout repository
|
| 15 |
+
uses: actions/checkout@v4
|
| 16 |
+
with:
|
| 17 |
+
fetch-depth: 0
|
| 18 |
+
lfs: true
|
| 19 |
+
|
| 20 |
+
- name: Push to Hugging Face Hub
|
| 21 |
+
env:
|
| 22 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 23 |
+
run: |
|
| 24 |
+
# Git設定
|
| 25 |
+
git config --global user.email "action@github.com"
|
| 26 |
+
git config --global user.name "GitHub Action"
|
| 27 |
+
|
| 28 |
+
# Hugging Face Hubにリモートを追加
|
| 29 |
+
git remote add hf https://huggingface.co/spaces/MakiAi/wikipedia-to-markdown
|
| 30 |
+
|
| 31 |
+
# 強制プッシュでHugging Faceに同期
|
| 32 |
+
git push --force https://user:$HF_TOKEN@huggingface.co/spaces/MakiAi/wikipedia-to-markdown HEAD:main
|
.github/workflows/sync-to-report-gh.yml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: 📊 デイリーレポートハブ同期 v2.3 (YUKIHIKO PR版 - 完全リモート実行)
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches: [main, master]
|
| 5 |
+
pull_request:
|
| 6 |
+
types: [opened, synchronize, closed]
|
| 7 |
+
|
| 8 |
+
env:
|
| 9 |
+
WEEK_START_DAY: 1
|
| 10 |
+
AUTO_APPROVE: true
|
| 11 |
+
AUTO_MERGE: true
|
| 12 |
+
CREATE_PR: true
|
| 13 |
+
# リモートスクリプトの設定
|
| 14 |
+
SCRIPTS_BASE_URL: https://raw.githubusercontent.com/Sunwood-ai-labsII/daily-report-hub_dev/main/.github/scripts
|
| 15 |
+
|
| 16 |
+
jobs:
|
| 17 |
+
sync-data:
|
| 18 |
+
runs-on: ubuntu-latest
|
| 19 |
+
steps:
|
| 20 |
+
- name: 📥 現在のリポジトリをチェックアウト
|
| 21 |
+
uses: actions/checkout@v4
|
| 22 |
+
with:
|
| 23 |
+
fetch-depth: 0
|
| 24 |
+
|
| 25 |
+
- name: 📅 週情報を計算
|
| 26 |
+
run: curl -LsSf ${SCRIPTS_BASE_URL}/calculate-week-info.sh | sh -s -- ${{ env.WEEK_START_DAY }}
|
| 27 |
+
|
| 28 |
+
- name: 🔍 Git活動を分析
|
| 29 |
+
run: curl -LsSf ${SCRIPTS_BASE_URL}/analyze-git-activity.sh | sh
|
| 30 |
+
|
| 31 |
+
- name: 📝 Markdownレポートを生成
|
| 32 |
+
run: curl -LsSf ${SCRIPTS_BASE_URL}/generate-markdown-reports.sh | sh
|
| 33 |
+
|
| 34 |
+
- name: 📂 レポートハブをクローン
|
| 35 |
+
env:
|
| 36 |
+
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
|
| 37 |
+
REPORT_HUB_REPO: ${{ vars.REPORT_HUB_REPO || 'Sunwood-ai-labsII/daily-report-hub' }}
|
| 38 |
+
run: |
|
| 39 |
+
git config --global user.name "GitHub Actions Bot"
|
| 40 |
+
git config --global user.email "actions@github.com"
|
| 41 |
+
git clone https://x-access-token:${GITHUB_TOKEN}@github.com/${REPORT_HUB_REPO}.git daily-report-hub
|
| 42 |
+
|
| 43 |
+
- name: 🏗️ Docusaurus構造を作成
|
| 44 |
+
run: curl -LsSf ${SCRIPTS_BASE_URL}/create-docusaurus-structure.sh | sh
|
| 45 |
+
|
| 46 |
+
- name: 🚀 YUKIHIKO権限でPR作成&自動承認
|
| 47 |
+
env:
|
| 48 |
+
GITHUB_TOKEN_ORIGINAL: ${{ secrets.GH_PAT }} # 承認用
|
| 49 |
+
YUKIHIKO_TOKEN: ${{ secrets.GH_PAT_YUKIHIKO }} # PR作成用
|
| 50 |
+
GITHUB_TOKEN: ${{ secrets.GH_PAT }} # デフォルト
|
| 51 |
+
REPORT_HUB_REPO: ${{ vars.REPORT_HUB_REPO || 'Sunwood-ai-labsII/daily-report-hub' }}
|
| 52 |
+
run: curl -LsSf ${SCRIPTS_BASE_URL}/sync-to-hub-gh.sh | sh
|
.gitignore
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[codz]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py.cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
#uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
#poetry.lock
|
| 109 |
+
#poetry.toml
|
| 110 |
+
|
| 111 |
+
# pdm
|
| 112 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 113 |
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 114 |
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 115 |
+
#pdm.lock
|
| 116 |
+
#pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# pixi
|
| 121 |
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 122 |
+
#pixi.lock
|
| 123 |
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 124 |
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 125 |
+
.pixi
|
| 126 |
+
|
| 127 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 128 |
+
__pypackages__/
|
| 129 |
+
|
| 130 |
+
# Celery stuff
|
| 131 |
+
celerybeat-schedule
|
| 132 |
+
celerybeat.pid
|
| 133 |
+
|
| 134 |
+
# SageMath parsed files
|
| 135 |
+
*.sage.py
|
| 136 |
+
|
| 137 |
+
# Environments
|
| 138 |
+
.env
|
| 139 |
+
.envrc
|
| 140 |
+
.venv
|
| 141 |
+
env/
|
| 142 |
+
venv/
|
| 143 |
+
ENV/
|
| 144 |
+
env.bak/
|
| 145 |
+
venv.bak/
|
| 146 |
+
|
| 147 |
+
# Spyder project settings
|
| 148 |
+
.spyderproject
|
| 149 |
+
.spyproject
|
| 150 |
+
|
| 151 |
+
# Rope project settings
|
| 152 |
+
.ropeproject
|
| 153 |
+
|
| 154 |
+
# mkdocs documentation
|
| 155 |
+
/site
|
| 156 |
+
|
| 157 |
+
# mypy
|
| 158 |
+
.mypy_cache/
|
| 159 |
+
.dmypy.json
|
| 160 |
+
dmypy.json
|
| 161 |
+
|
| 162 |
+
# Pyre type checker
|
| 163 |
+
.pyre/
|
| 164 |
+
|
| 165 |
+
# pytype static type analyzer
|
| 166 |
+
.pytype/
|
| 167 |
+
|
| 168 |
+
# Cython debug symbols
|
| 169 |
+
cython_debug/
|
| 170 |
+
|
| 171 |
+
# PyCharm
|
| 172 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 173 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 174 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 175 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 176 |
+
#.idea/
|
| 177 |
+
|
| 178 |
+
# Abstra
|
| 179 |
+
# Abstra is an AI-powered process automation framework.
|
| 180 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 181 |
+
# Learn more at https://abstra.io/docs
|
| 182 |
+
.abstra/
|
| 183 |
+
|
| 184 |
+
# Visual Studio Code
|
| 185 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 186 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 187 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 188 |
+
# you could uncomment the following to ignore the entire vscode folder
|
| 189 |
+
# .vscode/
|
| 190 |
+
|
| 191 |
+
# Ruff stuff:
|
| 192 |
+
.ruff_cache/
|
| 193 |
+
|
| 194 |
+
# PyPI configuration file
|
| 195 |
+
.pypirc
|
| 196 |
+
|
| 197 |
+
# Cursor
|
| 198 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 199 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 200 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 201 |
+
.cursorignore
|
| 202 |
+
.cursorindexingignore
|
| 203 |
+
|
| 204 |
+
# Marimo
|
| 205 |
+
marimo/_static/
|
| 206 |
+
marimo/_lsp/
|
| 207 |
+
__marimo__/
|
| 208 |
+
.SourceSageAssets/
|
Dockerfile
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python 3.11をベースイメージとして使用
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# 作業ディレクトリを設定
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# システムパッケージの更新とクリーンアップ
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# 依存関係ファイルをコピー
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
|
| 14 |
+
# Python依存関係をインストール
|
| 15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
+
|
| 17 |
+
# アプリケーションファイルをコピー
|
| 18 |
+
COPY . .
|
| 19 |
+
|
| 20 |
+
# ポート7861を公開
|
| 21 |
+
EXPOSE 7861
|
| 22 |
+
|
| 23 |
+
# 非rootユーザーを作成してセキュリティを向上
|
| 24 |
+
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
| 25 |
+
USER appuser
|
| 26 |
+
|
| 27 |
+
# アプリケーションを起動
|
| 28 |
+
CMD ["python", "app.py"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 Maki
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
title: wikipedia to markdown
|
| 4 |
+
sdk: gradio
|
| 5 |
+
emoji: 📚
|
| 6 |
+
colorFrom: yellow
|
| 7 |
+
colorTo: gray
|
| 8 |
+
thumbnail: >-
|
| 9 |
+
https://cdn-uploads.huggingface.co/production/uploads/64e0ef4a4c78e1eba5178d7a/vJQZ24fctExV3dax_BGU-.jpeg
|
| 10 |
+
sdk_version: 5.42.0
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
<div align="center">
|
| 14 |
+
|
| 15 |
+

|
| 16 |
+
|
| 17 |
+
# 📚 Wikipedia to Markdown Converter
|
| 18 |
+
|
| 19 |
+
*WikipediaページをMarkdown形式に変換するWebアプリケーション*
|
| 20 |
+
|
| 21 |
+
[](https://python.org)
|
| 22 |
+
[](https://gradio.app)
|
| 23 |
+
[](LICENSE)
|
| 24 |
+
[](https://huggingface.co/spaces/MakiAi/wikipedia-to-markdown)
|
| 25 |
+
|
| 26 |
+
</div>
|
| 27 |
+
|
| 28 |
+
---
|
| 29 |
+
|
| 30 |
+
## 🌟 概要
|
| 31 |
+
|
| 32 |
+
**Wikipedia to Markdown Converter** は、Wikipediaの記事を整形されたMarkdownドキュメントに変換するWebアプリケーションです。単体処理と一括処理に対応し、複数のダウンロード形式を提供します。
|
| 33 |
+
|
| 34 |
+
### ✨ **主要機能**
|
| 35 |
+
|
| 36 |
+
- 🔄 **単体・一括処理** - 1つまたは複数のWikipediaページを同時変換
|
| 37 |
+
- 📊 **詳細分析** - 文字数、成功率、ファイル情報を表示
|
| 38 |
+
- 🗜️ **複数形式** - 個別ファイル、結合文書、ZIPダウンロード
|
| 39 |
+
- 🌐 **多言語対応** - 全てのWikipedia言語版に対応
|
| 40 |
+
- � **要使いやすいUI** - 直感的で美しいインターフェース
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## 🚀 使い方
|
| 45 |
+
|
| 46 |
+
### � **オンラインで試す(推奨)**
|
| 47 |
+
**[🚀 デモサイトはこちら](https://huggingface.co/spaces/MakiAi/wikipedia-to-markdown)**
|
| 48 |
+
|
| 49 |
+
### 💻 **ローカルで実行**
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
# リポジトリをクローン
|
| 53 |
+
git clone https://github.com/your-username/wikipedia-to-markdown.git
|
| 54 |
+
cd wikipedia-to-markdown
|
| 55 |
+
|
| 56 |
+
# 依存関係をインストール
|
| 57 |
+
pip install -r requirements.txt
|
| 58 |
+
|
| 59 |
+
# アプリケーションを起動
|
| 60 |
+
python app.py
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### 🐳 **Dockerで実行**
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
# Docker Composeを使用
|
| 67 |
+
docker-compose up -d
|
| 68 |
+
|
| 69 |
+
# ブラウザで http://localhost:7860 にアクセス
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
---
|
| 73 |
+
|
| 74 |
+
## 📋 操作方法
|
| 75 |
+
|
| 76 |
+
### 🔗 **単体処理**
|
| 77 |
+
1. WikipediaのURLを入力
|
| 78 |
+
2. 「✨ 変換する」ボタンをクリック
|
| 79 |
+
3. 生成されたMarkdownをコピーまたはダウンロード
|
| 80 |
+
|
| 81 |
+
### 📚 **一括処理**
|
| 82 |
+
1. 複数のURLを1行に1つずつ入力
|
| 83 |
+
2. 「🚀 一括変換する」ボタンをクリック
|
| 84 |
+
3. 処理結果を確認し、必要な形式でダウンロード
|
| 85 |
+
|
| 86 |
+
### 📊 **処理結果の表示例**
|
| 87 |
+
```
|
| 88 |
+
============================================================
|
| 89 |
+
📊 処理結果サマリー
|
| 90 |
+
============================================================
|
| 91 |
+
🔗 処理対象URL数: 3
|
| 92 |
+
✅ 成功: 2
|
| 93 |
+
❌ 失敗: 1
|
| 94 |
+
|
| 95 |
+
✅ 処理成功: https://ja.wikipedia.org/wiki/Python
|
| 96 |
+
📄 ページタイトル: Python
|
| 97 |
+
📊 文字数: 15,432 文字
|
| 98 |
+
💾 ファイル名: Python.md
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
---
|
| 102 |
+
|
| 103 |
+
## 📦 ダウンロード形式
|
| 104 |
+
|
| 105 |
+
| 形式 | 説明 | 用途 |
|
| 106 |
+
|------|------|------|
|
| 107 |
+
| **📄 個別ファイル** | 各ページを別々のMarkdownファイル | 個別編集・管理 |
|
| 108 |
+
| **📚 結合文書** | 全ページを1つのファイルに結合 | 一括閲覧・印刷 |
|
| 109 |
+
| **🗜️ ZIPアーカイブ** | 全ファイルを圧縮してまとめて | 大量ファイルの管理 |
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
## 🔧 技術仕様
|
| 114 |
+
|
| 115 |
+
### **使用技術**
|
| 116 |
+
- **Python 3.8+** - メイン言語
|
| 117 |
+
- **Gradio** - Webインターフェース
|
| 118 |
+
- **BeautifulSoup4** - HTML解析
|
| 119 |
+
- **html2text** - Markdown変換
|
| 120 |
+
- **Requests** - HTTP通信
|
| 121 |
+
|
| 122 |
+
### **処理フロー**
|
| 123 |
+
1. **URL検証** - 入力URLの妥当性チェック
|
| 124 |
+
2. **HTML取得** - Wikipediaページの取得
|
| 125 |
+
3. **コンテンツ抽出** - 主要コンテンツの抽出
|
| 126 |
+
4. **クリーンアップ** - 不要部分(脚注、編集リンク等)の削除
|
| 127 |
+
5. **Markdown変換** - 整形されたMarkdownに変換
|
| 128 |
+
6. **ファイル生成** - 各種形式でのファイル出力
|
| 129 |
+
|
| 130 |
+
---
|
| 131 |
+
|
| 132 |
+
## 📁 プロジェクト構成
|
| 133 |
+
|
| 134 |
+
```
|
| 135 |
+
wikipedia-to-markdown/
|
| 136 |
+
├── app.py # メインアプリケーション
|
| 137 |
+
├── theme.py # UIテーマ設定
|
| 138 |
+
├── requirements.txt # Python依存関係
|
| 139 |
+
├── docker-compose.yml # Docker設定
|
| 140 |
+
├── .github/workflows/ # CI/CD設定
|
| 141 |
+
└── README.md # このファイル
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## 🛠️ カスタマイズ
|
| 147 |
+
|
| 148 |
+
### **テーマ変更**
|
| 149 |
+
`theme.py`を編集してUIの色やスタイルを変更できます。
|
| 150 |
+
|
| 151 |
+
### **処理ロジック拡張**
|
| 152 |
+
`app.py`の`scrape_wikipedia_to_markdown_final()`関数を編集して、変換処理をカスタマイズできます。
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## 📄 ライセンス
|
| 157 |
+
|
| 158 |
+
このプロジェクトは[MITライセ��ス](LICENSE)の下で公開されています。
|
| 159 |
+
|
| 160 |
+
---
|
| 161 |
+
|
| 162 |
+
## 🤝 コントリビューション
|
| 163 |
+
|
| 164 |
+
バグ報告や機能提案は[GitHub Issues](https://github.com/your-username/wikipedia-to-markdown/issues)でお願いします。
|
| 165 |
+
|
| 166 |
+
---
|
| 167 |
+
|
| 168 |
+
<div align="center">
|
| 169 |
+
|
| 170 |
+
**🌟 このプロジェクトが役に立ったらスターをお願いします!**
|
| 171 |
+
|
| 172 |
+
*© 2025 Wikipedia to Markdown Converter*
|
| 173 |
+
|
| 174 |
+
</div>
|
app.py
ADDED
|
@@ -0,0 +1,431 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import html2text
|
| 4 |
+
import re
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from theme import create_zen_theme
|
| 7 |
+
import tempfile
|
| 8 |
+
import os
|
| 9 |
+
import zipfile
|
| 10 |
+
from urllib.parse import urlparse, unquote
|
| 11 |
+
|
| 12 |
+
def scrape_wikipedia_to_markdown_final(url: str) -> str:
|
| 13 |
+
"""
|
| 14 |
+
Wikipediaページをスクレイピングし、整形・不要部分削除を行い、
|
| 15 |
+
タイトルを付けてMarkdownに変換します。
|
| 16 |
+
|
| 17 |
+
処理フロー:
|
| 18 |
+
1. ページのタイトルをH1見出しとして取得します。
|
| 19 |
+
2. 「登場人物」などの<dt>タグを見出しに変換します。
|
| 20 |
+
3. 生成されたMarkdown文字列から「## 脚注」以降を完全に削除します。
|
| 21 |
+
4. [編集]リンクを削除します。
|
| 22 |
+
5. 最終的にタイトルと本文を結合して返します。
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
url (str): スクレイピング対象のWikipediaページのURL。
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
str: 整形・変換された最終的なMarkdownコンテンツ。失敗した場合は空の文字列。
|
| 29 |
+
"""
|
| 30 |
+
try:
|
| 31 |
+
# 1. HTMLの取得と解析
|
| 32 |
+
headers = {
|
| 33 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 34 |
+
}
|
| 35 |
+
response = requests.get(url, headers=headers)
|
| 36 |
+
response.raise_for_status() # HTTPエラーがあれば例外を発生させる
|
| 37 |
+
response.encoding = response.apparent_encoding # 文字コードを自動検出
|
| 38 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 39 |
+
|
| 40 |
+
# --- ページのタイトルを取得 ---
|
| 41 |
+
title_tag = soup.find('h1', id='firstHeading')
|
| 42 |
+
page_title = title_tag.get_text(strip=True) if title_tag else "Wikipedia ページ"
|
| 43 |
+
|
| 44 |
+
# 2. 主要コンテンツエリアの特定
|
| 45 |
+
content_div = soup.find('div', class_='mw-parser-output')
|
| 46 |
+
if not content_div:
|
| 47 |
+
return "エラー: コンテンツエリアが見つかりませんでした。"
|
| 48 |
+
|
| 49 |
+
# 3. HTMLの事前整形(登場人物などの見出し化)
|
| 50 |
+
for dt_tag in content_div.find_all('dt'):
|
| 51 |
+
h4_tag = soup.new_tag('h4')
|
| 52 |
+
h4_tag.extend(dt_tag.contents)
|
| 53 |
+
dt_tag.replace_with(h4_tag)
|
| 54 |
+
|
| 55 |
+
# 4. HTMLからMarkdownへの一次変換
|
| 56 |
+
h = html2text.HTML2Text()
|
| 57 |
+
h.body_width = 0 # テキストの折り返しを無効にする
|
| 58 |
+
full_markdown_text = h.handle(str(content_div))
|
| 59 |
+
|
| 60 |
+
# 5. 生成されたMarkdownから「## 脚注」以降を削除
|
| 61 |
+
footnote_marker = "\n## 脚注"
|
| 62 |
+
footnote_index = full_markdown_text.find(footnote_marker)
|
| 63 |
+
body_text = full_markdown_text[:footnote_index] if footnote_index != -1 else full_markdown_text
|
| 64 |
+
|
| 65 |
+
# 6. [編集]リンクを正規表現で一括削除
|
| 66 |
+
cleaned_body = re.sub(r'\[\[編集\]\(.+?\)]\n', '', body_text)
|
| 67 |
+
|
| 68 |
+
# 7. タイトルと整形後の本文を結合
|
| 69 |
+
final_markdown = f"# {page_title}\n\n{cleaned_body.strip()}"
|
| 70 |
+
|
| 71 |
+
return final_markdown
|
| 72 |
+
|
| 73 |
+
except requests.exceptions.RequestException as e:
|
| 74 |
+
return f"HTTPリクエストエラー: {e}"
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return f"予期せぬエラーが発生しました: {e}"
|
| 77 |
+
|
| 78 |
+
def get_filename_from_url(url):
|
| 79 |
+
"""URLからファイル名を生成する関数"""
|
| 80 |
+
try:
|
| 81 |
+
# URLからページ名を抽出
|
| 82 |
+
parsed_url = urlparse(url)
|
| 83 |
+
page_name = parsed_url.path.split('/')[-1]
|
| 84 |
+
# URLデコード
|
| 85 |
+
page_name = unquote(page_name)
|
| 86 |
+
# ファイル名として使用できない文字を置換
|
| 87 |
+
safe_filename = re.sub(r'[<>:"/\\|?*]', '_', page_name)
|
| 88 |
+
return f"{safe_filename}.md"
|
| 89 |
+
except:
|
| 90 |
+
return "wikipedia_page.md"
|
| 91 |
+
|
| 92 |
+
def create_download_file(content, filename):
|
| 93 |
+
"""ダウンロード用の一時ファイルを作成する関数"""
|
| 94 |
+
try:
|
| 95 |
+
# 一時ディレクトリにファイルを作成
|
| 96 |
+
temp_dir = tempfile.gettempdir()
|
| 97 |
+
file_path = os.path.join(temp_dir, filename)
|
| 98 |
+
|
| 99 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
| 100 |
+
f.write(content)
|
| 101 |
+
|
| 102 |
+
return file_path
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"ファイル作成エラー: {e}")
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
def create_zip_file(file_paths, zip_filename="wikipedia_export.zip"):
|
| 108 |
+
"""複数のファイルをZIP形式でまとめる関数"""
|
| 109 |
+
try:
|
| 110 |
+
temp_dir = tempfile.gettempdir()
|
| 111 |
+
zip_path = os.path.join(temp_dir, zip_filename)
|
| 112 |
+
|
| 113 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 114 |
+
for file_path in file_paths:
|
| 115 |
+
if os.path.exists(file_path):
|
| 116 |
+
# ファイル名のみを取得してZIPに追加
|
| 117 |
+
filename = os.path.basename(file_path)
|
| 118 |
+
zipf.write(file_path, filename)
|
| 119 |
+
|
| 120 |
+
return zip_path
|
| 121 |
+
except Exception as e:
|
| 122 |
+
print(f"ZIP作成エラー: {e}")
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
def process_wikipedia_url(url):
|
| 126 |
+
"""Wikipedia URLを処理してMarkdownを生成するGradio用関数"""
|
| 127 |
+
if not url:
|
| 128 |
+
return "URLを入力してください。", None
|
| 129 |
+
|
| 130 |
+
# URLが有効かチェック
|
| 131 |
+
if not url.startswith('http'):
|
| 132 |
+
return "有効なURLを入力してください(http://またはhttps://から始まるURL)。", None
|
| 133 |
+
|
| 134 |
+
# Wikipedia URLかチェック
|
| 135 |
+
if 'wikipedia.org' not in url:
|
| 136 |
+
return "WikipediaのURLを入力してください。", None
|
| 137 |
+
|
| 138 |
+
# スクレイピングを実行
|
| 139 |
+
markdown_content = scrape_wikipedia_to_markdown_final(url)
|
| 140 |
+
|
| 141 |
+
# ダウンロード用ファイルを作成
|
| 142 |
+
if not markdown_content.startswith("エラー:") and not markdown_content.startswith("HTTP"):
|
| 143 |
+
filename = get_filename_from_url(url)
|
| 144 |
+
file_path = create_download_file(markdown_content, filename)
|
| 145 |
+
return markdown_content, file_path
|
| 146 |
+
else:
|
| 147 |
+
return markdown_content, None
|
| 148 |
+
|
| 149 |
+
def process_multiple_urls(urls_text, progress=gr.Progress()):
|
| 150 |
+
"""複数のWikipedia URLを一括処理してMarkdownを生成する関数"""
|
| 151 |
+
if not urls_text.strip():
|
| 152 |
+
return "URLリストを入力してください。", None, [], None
|
| 153 |
+
|
| 154 |
+
# URLリストを行ごとに分割
|
| 155 |
+
urls = [url.strip() for url in urls_text.strip().split('\n') if url.strip()]
|
| 156 |
+
|
| 157 |
+
if not urls:
|
| 158 |
+
return "有効なURLが見つかりませんでした。", None, [], None
|
| 159 |
+
|
| 160 |
+
results = []
|
| 161 |
+
all_content = []
|
| 162 |
+
individual_files = []
|
| 163 |
+
total_urls = len(urls)
|
| 164 |
+
success_count = 0
|
| 165 |
+
|
| 166 |
+
for i, url in enumerate(urls):
|
| 167 |
+
progress((i + 1) / total_urls, f"処理中: {i + 1}/{total_urls}")
|
| 168 |
+
|
| 169 |
+
# URLの検証
|
| 170 |
+
if not url.startswith('http'):
|
| 171 |
+
results.append(f"❌ 無効なURL: {url}")
|
| 172 |
+
continue
|
| 173 |
+
|
| 174 |
+
if 'wikipedia.org' not in url:
|
| 175 |
+
results.append(f"❌ Wikipedia以外のURL: {url}")
|
| 176 |
+
continue
|
| 177 |
+
|
| 178 |
+
# スクレイピング実行
|
| 179 |
+
try:
|
| 180 |
+
markdown_content = scrape_wikipedia_to_markdown_final(url)
|
| 181 |
+
if markdown_content.startswith("エラー:") or markdown_content.startswith("HTTP"):
|
| 182 |
+
results.append(f"❌ 処理失敗: {url}\n エラー: {markdown_content}")
|
| 183 |
+
else:
|
| 184 |
+
# ページタイトルを抽出
|
| 185 |
+
title_match = re.match(r'^# (.+)', markdown_content)
|
| 186 |
+
page_title = title_match.group(1) if title_match else "不明なページ"
|
| 187 |
+
|
| 188 |
+
# 文字数とファイル情報を表示
|
| 189 |
+
char_count = len(markdown_content)
|
| 190 |
+
filename = get_filename_from_url(url)
|
| 191 |
+
|
| 192 |
+
results.append(f"✅ 処理成功: {url}")
|
| 193 |
+
results.append(f" 📄 ページタイトル: {page_title}")
|
| 194 |
+
results.append(f" 📊 文字数: {char_count:,} 文字")
|
| 195 |
+
results.append(f" 💾 ファイル名: {filename}")
|
| 196 |
+
|
| 197 |
+
all_content.append(markdown_content)
|
| 198 |
+
success_count += 1
|
| 199 |
+
|
| 200 |
+
# 個別ファイルを作成
|
| 201 |
+
file_path = create_download_file(markdown_content, filename)
|
| 202 |
+
if file_path:
|
| 203 |
+
individual_files.append(file_path)
|
| 204 |
+
except Exception as e:
|
| 205 |
+
results.append(f"❌ 処理エラー: {url}")
|
| 206 |
+
results.append(f" エラー内容: {str(e)}")
|
| 207 |
+
|
| 208 |
+
# サマリー情報を追加
|
| 209 |
+
summary = [
|
| 210 |
+
"=" * 60,
|
| 211 |
+
"📊 処理結果サマリー",
|
| 212 |
+
"=" * 60,
|
| 213 |
+
f"🔗 処理対象URL数: {total_urls}",
|
| 214 |
+
f"✅ 成功: {success_count}",
|
| 215 |
+
f"❌ 失敗: {total_urls - success_count}",
|
| 216 |
+
""
|
| 217 |
+
]
|
| 218 |
+
|
| 219 |
+
# 結果を結合
|
| 220 |
+
final_result = "\n".join(summary + results)
|
| 221 |
+
|
| 222 |
+
# 一括ダウンロード用ファイルを作成
|
| 223 |
+
batch_file_path = None
|
| 224 |
+
if all_content:
|
| 225 |
+
combined_content = "\n\n" + "="*80 + "\n\n".join(all_content)
|
| 226 |
+
batch_file_path = create_download_file(combined_content, "wikipedia_batch_export.md")
|
| 227 |
+
|
| 228 |
+
# ZIPファイルを作成
|
| 229 |
+
zip_file_path = None
|
| 230 |
+
if individual_files:
|
| 231 |
+
zip_file_path = create_zip_file(individual_files, "wikipedia_export.zip")
|
| 232 |
+
|
| 233 |
+
return final_result, batch_file_path, individual_files, zip_file_path
|
| 234 |
+
|
| 235 |
+
# Gradioインターフェースの作成
|
| 236 |
+
def create_interface():
|
| 237 |
+
"""Gradioインターフェースを作成する関数"""
|
| 238 |
+
theme = create_zen_theme()
|
| 239 |
+
|
| 240 |
+
with gr.Blocks(theme=theme, title="Wikipedia to Markdown Converter") as demo:
|
| 241 |
+
# ヘッダー
|
| 242 |
+
gr.HTML("""
|
| 243 |
+
<div style='text-align: center; margin-bottom: 2rem; padding: 2rem; background: linear-gradient(135deg, #d4a574 0%, #ffffff 50%, #f5f2ed 100%); color: #3d405b; border-radius: 12px;'>
|
| 244 |
+
<h1 style='font-size: 3rem; margin-bottom: 0.5rem; text-shadow: 1px 1px 2px rgba(0,0,0,0.1);'>📚 Wikipedia to Markdown Converter</h1>
|
| 245 |
+
<p style='font-size: 1.2rem; opacity: 0.8;'>WikipediaのURLを入力して、Markdown形式に変換します</p>
|
| 246 |
+
</div>
|
| 247 |
+
""")
|
| 248 |
+
|
| 249 |
+
# タブの作成
|
| 250 |
+
with gr.Tabs():
|
| 251 |
+
# 単体処理タブ
|
| 252 |
+
with gr.TabItem("🔗 単体処理"):
|
| 253 |
+
with gr.Row():
|
| 254 |
+
with gr.Column(scale=1):
|
| 255 |
+
url_input = gr.Textbox(
|
| 256 |
+
label="🔗 Wikipedia URL",
|
| 257 |
+
placeholder="https://ja.wikipedia.org/wiki/...",
|
| 258 |
+
value="https://ja.wikipedia.org/wiki/Python"
|
| 259 |
+
)
|
| 260 |
+
convert_btn = gr.Button("✨ 変換する", variant="primary")
|
| 261 |
+
|
| 262 |
+
with gr.Column(scale=1):
|
| 263 |
+
output_text = gr.Textbox(
|
| 264 |
+
label="📝 変換されたMarkdown",
|
| 265 |
+
lines=20,
|
| 266 |
+
max_lines=50,
|
| 267 |
+
show_copy_button=True
|
| 268 |
+
)
|
| 269 |
+
download_file = gr.File(
|
| 270 |
+
label="📥 マークダウンファイルをダウンロード",
|
| 271 |
+
visible=False
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# ボタンクリック時の処理
|
| 275 |
+
def update_single_output(url):
|
| 276 |
+
content, file_path = process_wikipedia_url(url)
|
| 277 |
+
if file_path:
|
| 278 |
+
return content, gr.update(value=file_path, visible=True)
|
| 279 |
+
else:
|
| 280 |
+
return content, gr.update(visible=False)
|
| 281 |
+
|
| 282 |
+
convert_btn.click(
|
| 283 |
+
fn=update_single_output,
|
| 284 |
+
inputs=url_input,
|
| 285 |
+
outputs=[output_text, download_file]
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
# 使用例
|
| 289 |
+
def example_process(url):
|
| 290 |
+
content, _ = process_wikipedia_url(url)
|
| 291 |
+
return content
|
| 292 |
+
|
| 293 |
+
gr.Examples(
|
| 294 |
+
examples=[
|
| 295 |
+
["https://ja.wikipedia.org/wiki/Python"],
|
| 296 |
+
["https://ja.wikipedia.org/wiki/JavaScript"],
|
| 297 |
+
["https://ja.wikipedia.org/wiki/HTML"]
|
| 298 |
+
],
|
| 299 |
+
inputs=url_input,
|
| 300 |
+
outputs=output_text,
|
| 301 |
+
fn=example_process,
|
| 302 |
+
cache_examples=False
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
# 一括処理タブ
|
| 306 |
+
with gr.TabItem("📋 一括処理"):
|
| 307 |
+
with gr.Row():
|
| 308 |
+
with gr.Column(scale=1):
|
| 309 |
+
urls_input = gr.Textbox(
|
| 310 |
+
label="📋 Wikipedia URLリスト(1行に1つずつ)",
|
| 311 |
+
placeholder="https://ja.wikipedia.org/wiki/Python\nhttps://ja.wikipedia.org/wiki/JavaScript\nhttps://ja.wikipedia.org/wiki/HTML",
|
| 312 |
+
lines=10,
|
| 313 |
+
value="https://ja.wikipedia.org/wiki/Python\nhttps://ja.wikipedia.org/wiki/JavaScript"
|
| 314 |
+
)
|
| 315 |
+
batch_convert_btn = gr.Button("🚀 一括変換する", variant="primary")
|
| 316 |
+
|
| 317 |
+
with gr.Column(scale=1):
|
| 318 |
+
batch_output_text = gr.Textbox(
|
| 319 |
+
label="📝 一括変換結果",
|
| 320 |
+
lines=15,
|
| 321 |
+
max_lines=30,
|
| 322 |
+
show_copy_button=True
|
| 323 |
+
)
|
| 324 |
+
batch_download_file = gr.File(
|
| 325 |
+
label="📥 全体をまとめてダウンロード",
|
| 326 |
+
visible=False
|
| 327 |
+
)
|
| 328 |
+
zip_download_file = gr.File(
|
| 329 |
+
label="🗜️ ZIPファイルでダウンロード",
|
| 330 |
+
visible=False
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
# 個別ダウンロードエリア
|
| 334 |
+
individual_downloads = gr.Column(visible=False)
|
| 335 |
+
with individual_downloads:
|
| 336 |
+
gr.Markdown("### 📥 個別ダウンロード")
|
| 337 |
+
individual_file_1 = gr.File(label="", visible=False)
|
| 338 |
+
individual_file_2 = gr.File(label="", visible=False)
|
| 339 |
+
individual_file_3 = gr.File(label="", visible=False)
|
| 340 |
+
individual_file_4 = gr.File(label="", visible=False)
|
| 341 |
+
individual_file_5 = gr.File(label="", visible=False)
|
| 342 |
+
|
| 343 |
+
# 一括処理ボタンクリック時の処理
|
| 344 |
+
def update_batch_output(urls_text):
|
| 345 |
+
content, batch_file_path, individual_files, zip_file_path = process_multiple_urls(urls_text)
|
| 346 |
+
|
| 347 |
+
# 戻り値の��ストを準備
|
| 348 |
+
outputs = [content]
|
| 349 |
+
|
| 350 |
+
# 一括ダウンロードファイル
|
| 351 |
+
if batch_file_path:
|
| 352 |
+
outputs.append(gr.update(value=batch_file_path, visible=True))
|
| 353 |
+
else:
|
| 354 |
+
outputs.append(gr.update(visible=False))
|
| 355 |
+
|
| 356 |
+
# ZIPダウンロードファイル
|
| 357 |
+
if zip_file_path:
|
| 358 |
+
outputs.append(gr.update(value=zip_file_path, visible=True))
|
| 359 |
+
else:
|
| 360 |
+
outputs.append(gr.update(visible=False))
|
| 361 |
+
|
| 362 |
+
# 個別ダウンロードエリアの表示/非表示
|
| 363 |
+
if individual_files:
|
| 364 |
+
outputs.append(gr.update(visible=True))
|
| 365 |
+
else:
|
| 366 |
+
outputs.append(gr.update(visible=False))
|
| 367 |
+
|
| 368 |
+
# 個別ファイル(最大5つまで表示)
|
| 369 |
+
for i in range(5):
|
| 370 |
+
if i < len(individual_files):
|
| 371 |
+
filename = os.path.basename(individual_files[i])
|
| 372 |
+
outputs.append(gr.update(value=individual_files[i], visible=True, label=f"📄 {filename}"))
|
| 373 |
+
else:
|
| 374 |
+
outputs.append(gr.update(visible=False))
|
| 375 |
+
|
| 376 |
+
return outputs
|
| 377 |
+
|
| 378 |
+
batch_convert_btn.click(
|
| 379 |
+
fn=update_batch_output,
|
| 380 |
+
inputs=urls_input,
|
| 381 |
+
outputs=[
|
| 382 |
+
batch_output_text,
|
| 383 |
+
batch_download_file,
|
| 384 |
+
zip_download_file,
|
| 385 |
+
individual_downloads,
|
| 386 |
+
individual_file_1,
|
| 387 |
+
individual_file_2,
|
| 388 |
+
individual_file_3,
|
| 389 |
+
individual_file_4,
|
| 390 |
+
individual_file_5
|
| 391 |
+
]
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
gr.Markdown("### 💡 一括処理の使い方")
|
| 395 |
+
gr.Markdown("1. テキストエリアに変換したいWikipediaのURLを1行に1つずつ入力します")
|
| 396 |
+
gr.Markdown("2. 「🚀 一括変換する」ボタンをクリックします")
|
| 397 |
+
gr.Markdown("3. 処理の進行状況が表示され、完了後に結果が表示されます")
|
| 398 |
+
gr.Markdown("4. 各URLの処理結果(成功/失敗)が明確に表示されます")
|
| 399 |
+
|
| 400 |
+
gr.Markdown("---")
|
| 401 |
+
gr.Markdown("### 🎯 基本的な使用方法")
|
| 402 |
+
gr.Markdown("- **単体処理**: 1つのWikipediaページを変換したい場合")
|
| 403 |
+
gr.Markdown("- **一括処理**: 複数のWikipediaページを一度に変換したい場合")
|
| 404 |
+
gr.Markdown("- 生成されたMarkdownは右側のテキストエリアからコピーできます")
|
| 405 |
+
gr.Markdown("- **📥 ダウンロード機能**: 変換が成功すると、マークダウンファイルとして直接ダウンロードできます")
|
| 406 |
+
gr.Markdown(" - 単体処理: ページ名に基づいたファイル名で個別ダウンロード")
|
| 407 |
+
gr.Markdown(" - 一括処理: 各URLごとの個別ダウンロード + 全体をまとめた一括ダウンロード + **🗜️ ZIPファイル**")
|
| 408 |
+
gr.Markdown(" - 個別ダウンロード: 成功した各ページを個別のファイルとしてダウンロード可能(最大5つまで表示)")
|
| 409 |
+
gr.Markdown(" - **ZIPダウンロード**: 複数のMarkdownファイルを1つのZIPファイルにまとめてダウンロード")
|
| 410 |
+
|
| 411 |
+
# ZENテーマの説明
|
| 412 |
+
gr.HTML("""
|
| 413 |
+
<div style='text-align: center; margin-top: 2rem; padding: 1.5rem; background: #ffffff; border-radius: 12px;'>
|
| 414 |
+
<h3 style='color: #3d405b; margin-top: 0;'>🧘♀️ ZENテーマ</h3>
|
| 415 |
+
<p style='color: #8b7355;'>和モダンなデザインで、使いやすさと美しさを追求しました</p>
|
| 416 |
+
</div>
|
| 417 |
+
""")
|
| 418 |
+
|
| 419 |
+
return demo
|
| 420 |
+
|
| 421 |
+
if __name__ == "__main__":
|
| 422 |
+
# インターフェースを作成
|
| 423 |
+
demo = create_interface()
|
| 424 |
+
|
| 425 |
+
# アプリケーションを実行
|
| 426 |
+
demo.launch(
|
| 427 |
+
server_name="0.0.0.0",
|
| 428 |
+
server_port=7860,
|
| 429 |
+
share=False,
|
| 430 |
+
debug=True
|
| 431 |
+
)
|
docker-compose.dev.yml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
wikipedia-converter-dev:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
ports:
|
| 9 |
+
- "7861:7860"
|
| 10 |
+
environment:
|
| 11 |
+
- PYTHONUNBUFFERED=1
|
| 12 |
+
- GRADIO_SERVER_NAME=0.0.0.0
|
| 13 |
+
- GRADIO_SERVER_PORT=7861
|
| 14 |
+
volumes:
|
| 15 |
+
# 開発時にコードの変更をリアルタイムで反映
|
| 16 |
+
- .:/app
|
| 17 |
+
- /app/__pycache__
|
| 18 |
+
restart: unless-stopped
|
| 19 |
+
command: python app.py
|
| 20 |
+
networks:
|
| 21 |
+
- wikipedia-dev-network
|
| 22 |
+
|
| 23 |
+
networks:
|
| 24 |
+
wikipedia-dev-network:
|
| 25 |
+
driver: bridge
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
wikipedia-converter:
|
| 5 |
+
build:
|
| 6 |
+
context: .
|
| 7 |
+
dockerfile: Dockerfile
|
| 8 |
+
ports:
|
| 9 |
+
- "7861:7860"
|
| 10 |
+
environment:
|
| 11 |
+
- PYTHONUNBUFFERED=1
|
| 12 |
+
# volumes:
|
| 13 |
+
# 開発時にコードの変更を反映させたい場合はコメントアウト
|
| 14 |
+
# - .:/app
|
| 15 |
+
restart: unless-stopped
|
| 16 |
+
healthcheck:
|
| 17 |
+
test: ["CMD", "curl", "-f", "http://localhost:7861"]
|
| 18 |
+
interval: 30s
|
| 19 |
+
timeout: 10s
|
| 20 |
+
retries: 3
|
| 21 |
+
start_period: 40s
|
| 22 |
+
networks:
|
| 23 |
+
- wikipedia-network
|
| 24 |
+
|
| 25 |
+
networks:
|
| 26 |
+
wikipedia-network:
|
| 27 |
+
driver: bridge
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
requests>=2.31.0
|
| 2 |
+
beautifulsoup4>=4.12.0
|
| 3 |
+
html2text>=2020.1.16
|
| 4 |
+
gradio>=5.42.0
|
theme.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
|
| 3 |
+
def create_zen_theme():
|
| 4 |
+
"""
|
| 5 |
+
ZENテーマの作成
|
| 6 |
+
和モダンなデザインで、使いやすさと美しさを追求したテーマ
|
| 7 |
+
"""
|
| 8 |
+
return gr.Theme(
|
| 9 |
+
primary_hue="amber",
|
| 10 |
+
secondary_hue="stone",
|
| 11 |
+
neutral_hue="slate",
|
| 12 |
+
text_size="md",
|
| 13 |
+
spacing_size="lg",
|
| 14 |
+
radius_size="sm",
|
| 15 |
+
font=[
|
| 16 |
+
"Hiragino Sans",
|
| 17 |
+
"Noto Sans JP",
|
| 18 |
+
"Yu Gothic",
|
| 19 |
+
"system-ui",
|
| 20 |
+
"sans-serif"
|
| 21 |
+
],
|
| 22 |
+
font_mono=[
|
| 23 |
+
"SF Mono",
|
| 24 |
+
"Monaco",
|
| 25 |
+
"monospace"
|
| 26 |
+
]
|
| 27 |
+
).set(
|
| 28 |
+
body_background_fill="#ffffff",
|
| 29 |
+
body_text_color="#3d405b",
|
| 30 |
+
button_primary_background_fill="#d4a574",
|
| 31 |
+
button_primary_background_fill_hover="#c19660",
|
| 32 |
+
button_primary_text_color="#ffffff",
|
| 33 |
+
button_secondary_background_fill="#f5f2ed",
|
| 34 |
+
button_secondary_text_color="#3d405b",
|
| 35 |
+
input_background_fill="#ffffff",
|
| 36 |
+
input_border_color="#d4c4a8",
|
| 37 |
+
input_border_color_focus="#d4a574",
|
| 38 |
+
block_background_fill="#ffffff",
|
| 39 |
+
block_border_color="#e8e2d5",
|
| 40 |
+
block_border_width="3px",
|
| 41 |
+
panel_background_fill="#ffffff",
|
| 42 |
+
panel_border_color="#e8e2d5",
|
| 43 |
+
slider_color="#d4a574",
|
| 44 |
+
)
|