Simonlob commited on
Commit
e6708ef
·
1 Parent(s): 431a25b
.DS_Store ADDED
Binary file (10.2 kB). View file
 
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ venv/
2
+ env/
3
+ .env
4
+ *.zip
5
+ draft.zip
6
+ draft.docx
7
+ *.DS_Store
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Acceptance Certificate Maker
3
  emoji: ⚡
4
- colorFrom: blue
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 6.10.0
8
  app_file: app.py
 
1
  ---
2
  title: Acceptance Certificate Maker
3
  emoji: ⚡
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 6.10.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ACT Maker - Gradio Web Interface for Hugging Face Spaces
4
+
5
+ Web interface for generating ACT documents from ESFS XML receipts.
6
+ """
7
+
8
+ import os
9
+ import shutil
10
+ import tempfile
11
+ import gradio as gr
12
+ from utils import NumberToWords, DocumentProcessor, ReceiptParser, DocumentMerger
13
+
14
+
15
+ def process_receipt(xml_file, user_name):
16
+ """
17
+ Process uploaded XML receipt and generate ACT document
18
+
19
+ Args:
20
+ xml_file: Uploaded XML file from Gradio
21
+ user_name: User name string (e.g., "ИП Пупкин Василий Алибабаевич")
22
+
23
+ Returns:
24
+ Path to generated DOCX file or error message
25
+ """
26
+ if xml_file is None:
27
+ return None, "⚠️ Please upload an XML receipt file"
28
+
29
+ if not user_name or not user_name.strip():
30
+ return None, "⚠️ Please enter your name"
31
+
32
+ user_name = user_name.strip()
33
+
34
+ try:
35
+ # Create temporary directory for processing
36
+ with tempfile.TemporaryDirectory() as temp_dir:
37
+ # Create esfs folder for XML files
38
+ esfs_folder = os.path.join(temp_dir, 'esfs')
39
+ os.makedirs(esfs_folder)
40
+
41
+ # Create temp docs folder
42
+ temp_docs_folder = os.path.join(temp_dir, '.temp_docs')
43
+ os.makedirs(temp_docs_folder)
44
+
45
+ # Copy uploaded XML file to esfs folder
46
+ xml_path = os.path.join(esfs_folder, os.path.basename(xml_file.name))
47
+ shutil.copy(xml_file.name, xml_path)
48
+
49
+ # Initialize components
50
+ receipt_parser = ReceiptParser(esfs_folder)
51
+ number_converter = NumberToWords()
52
+ doc_processor = DocumentProcessor('draft.docx', user_name)
53
+ doc_merger = DocumentMerger()
54
+
55
+ # Parse receipts
56
+ receipts_data = receipt_parser.get_all_receipt_data()
57
+
58
+ if not receipts_data:
59
+ return None, "⚠️ No receipt data found in XML file"
60
+
61
+ # Process each receipt
62
+ temp_files = []
63
+ for idx, receipt_data in enumerate(receipts_data, 1):
64
+ # Convert price to words
65
+ price_words = number_converter.convert(receipt_data['price'])
66
+
67
+ # Prepare full data
68
+ full_data = {
69
+ 'contract_date': receipt_data['contract_date'],
70
+ 'price': receipt_data['price'],
71
+ 'price_words': price_words,
72
+ 'today_date': receipt_data['today_date']
73
+ }
74
+
75
+ # Process document
76
+ doc = doc_processor.process_document(full_data)
77
+
78
+ # Save to temporary folder
79
+ temp_filename = os.path.join(temp_docs_folder, f"act_{idx:03d}.docx")
80
+ doc.save(temp_filename)
81
+ temp_files.append(temp_filename)
82
+
83
+ # Merge all documents
84
+ output_path = os.path.join(temp_dir, 'result.docx')
85
+ doc_merger.merge_files(temp_files, output_path)
86
+
87
+ # Create a permanent copy for download
88
+ final_output = tempfile.NamedTemporaryFile(
89
+ delete=False,
90
+ suffix='.docx',
91
+ prefix='act_'
92
+ )
93
+ shutil.copy(output_path, final_output.name)
94
+
95
+ success_msg = f"✅ Successfully processed {len(receipts_data)} receipt(s)"
96
+ return final_output.name, success_msg
97
+
98
+ except Exception as e:
99
+ error_msg = f"❌ Error processing: {str(e)}"
100
+ return None, error_msg
101
+
102
+
103
+ # Create Gradio interface
104
+ with gr.Blocks(title="ACT Maker - Acceptance Certificate Generator") as demo:
105
+ gr.Markdown(
106
+ """
107
+ # 📄 ACT Maker
108
+ ### Automated Acceptance Certificate Generation from ESFS XML Receipts
109
+
110
+ Upload an XML receipt file and enter your name to generate an acceptance certificate.
111
+ """
112
+ )
113
+
114
+ with gr.Row():
115
+ with gr.Column():
116
+ xml_input = gr.File(
117
+ label="📁 ESFS XML Receipt",
118
+ file_types=[".xml"],
119
+ type="filepath"
120
+ )
121
+
122
+ user_name_input = gr.Textbox(
123
+ label="👤 User Name",
124
+ placeholder="e.g., ИП Иванов Иван Иванович",
125
+ lines=1
126
+ )
127
+
128
+ submit_btn = gr.Button("🚀 Generate Certificate", variant="primary", size="lg")
129
+
130
+ with gr.Column():
131
+ status_output = gr.Textbox(
132
+ label="📊 Status",
133
+ lines=2,
134
+ interactive=False
135
+ )
136
+
137
+ file_output = gr.File(
138
+ label="📥 Download Document",
139
+ interactive=False
140
+ )
141
+
142
+ gr.Markdown(
143
+ """
144
+ ---
145
+ ### 📖 How to Use:
146
+ 1. **Upload XML file** - ESFS receipt in XML format
147
+ 2. **Enter your name** - Full name (e.g., "ИП Иванов Иван Иванович")
148
+ 3. **Click "Generate Certificate"** - Document will be created automatically
149
+ 4. **Download result** - Ready acceptance certificate in DOCX format
150
+
151
+ ℹ️ If the XML file contains multiple receipts, they will be processed and merged into one document.
152
+ """
153
+ )
154
+
155
+ # Connect the button to the processing function
156
+ submit_btn.click(
157
+ fn=process_receipt,
158
+ inputs=[xml_input, user_name_input],
159
+ outputs=[file_output, status_output]
160
+ )
161
+
162
+ # Launch the app
163
+ if __name__ == "__main__":
164
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ python-docx==1.1.2
2
+ gradio==4.44.0
utils/.DS_Store ADDED
Binary file (6.15 kB). View file
 
utils/README.md ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ACT Maker
2
+
3
+ Automated document generation tool for creating ACT (acceptance certificate) documents from ESFS XML receipts.
4
+
5
+ ## Features
6
+
7
+ - 📄 **Automated Document Generation**: Process multiple receipts at once
8
+ - 🔄 **Template-based**: Uses customizable DOCX templates
9
+ - 🌍 **Russian Language Support**: Converts numbers to Russian words with proper grammar
10
+ - 📊 **Batch Processing**: Handles multiple XML files and receipts
11
+ - ⚙️ **Configurable**: Command-line arguments for flexibility
12
+
13
+ ## Project Structure
14
+
15
+ ```
16
+ act_maker/
17
+ ├── main.py # Main entry point
18
+ ├── merge_documents.py # Standalone document merger utility
19
+ ├── requirements.txt # Python dependencies
20
+ ├── README.md # This file
21
+ ├── .user_name # User configuration (your name)
22
+ ├── draft.docx # DOCX template file
23
+ ├── result.docx # Final merged output (auto-generated)
24
+ ├── esfs/ # Input folder for XML receipts
25
+ │ └── *.xml # ESFS receipt XML files
26
+ ├── .temp_docs/ # Temporary buffer (auto-created, auto-deleted)
27
+ │ └── act_*.docx # Individual documents (only with --keep-temp)
28
+ └── utils/ # Utility modules
29
+ ├── __init__.py
30
+ ├── number_converter.py # Number to Russian words converter
31
+ ├── receipt_parser.py # XML receipt parser
32
+ ├── document_processor.py # DOCX template processor
33
+ └── document_merger.py # Document merging logic
34
+ ```
35
+
36
+ ## Installation
37
+
38
+ 1. **Clone or download this project**
39
+
40
+ 2. **Create a virtual environment** (recommended):
41
+ ```bash
42
+ python3 -m venv venv
43
+ source venv/bin/activate # On Windows: venv\Scripts\activate
44
+ ```
45
+
46
+ 3. **Install dependencies**:
47
+ ```bash
48
+ pip install -r requirements.txt
49
+ ```
50
+
51
+ ## Configuration
52
+
53
+ 1. **Create `.user_name` file** with your name:
54
+ ```
55
+ ИП Пупкин Василий Алибабаевич
56
+ ```
57
+
58
+ 2. **Prepare your template** (`draft.docx`) with the following tags:
59
+ - `<my-name-caps>` - Your name in UPPERCASE (bold)
60
+ - `<my-name>` - Your name in original case with last word on new line (bold)
61
+ - `<contract-date>` - Contract date (bold)
62
+ - `<price-som>` - Price in numbers (bold)
63
+ - `<price-leters>` - Price in words (normal)
64
+ - `<today-date>` - Today's date (bold)
65
+
66
+ 3. **Place XML receipts** in the `esfs/` folder
67
+
68
+ ## Usage
69
+
70
+ ### Basic Usage
71
+
72
+ Process all receipts and create a merged document:
73
+
74
+ ```bash
75
+ python main.py
76
+ ```
77
+
78
+ This will:
79
+ 1. Parse all XML files from `esfs/` folder
80
+ 2. Generate individual documents in `.temp_docs/` folder
81
+ 3. Merge all documents into `result.docx`
82
+ 4. Clean up temporary files
83
+
84
+ ### Advanced Usage
85
+
86
+ Keep temporary files for inspection:
87
+
88
+ ```bash
89
+ python main.py --keep-temp
90
+ ```
91
+
92
+ Customize paths and output file:
93
+
94
+ ```bash
95
+ python main.py --template my_template.docx \
96
+ --esfs-folder input_xmls \
97
+ --output my_result.docx \
98
+ --user-config .my_name
99
+ ```
100
+
101
+ ### Command-Line Arguments
102
+
103
+ | Argument | Default | Description |
104
+ |----------|---------|-------------|
105
+ | `--template` | `draft.docx` | Path to DOCX template file |
106
+ | `--esfs-folder` | `esfs` | Folder containing XML receipt files |
107
+ | `--output` | `result.docx` | Output merged document file |
108
+ | `--user-config` | `.user_name` | Config file containing user name |
109
+ | `--keep-temp` | `false` | Keep temporary files after merging |
110
+
111
+ ### Standalone Document Merger
112
+
113
+ You can also merge existing DOCX files using the standalone merger:
114
+
115
+ ```bash
116
+ python merge_documents.py --input-folder my_docs --output merged.docx
117
+ ```
118
+
119
+ ## How It Works
120
+
121
+ 1. **Parse XML Receipts**: Scans the ESFS folder for XML files and extracts receipt data
122
+ 2. **Extract Data**: Gets contract dates, prices, and creation dates from each receipt
123
+ 3. **Convert Numbers**: Converts prices to Russian words (e.g., 87000 → "восемьдесят семь тысяч")
124
+ 4. **Fill Template**: Replaces tags in the DOCX template with actual data
125
+ 5. **Generate Individual Documents**: Saves each filled document to `.temp_docs/` buffer
126
+ 6. **Merge Documents**: Combines all documents into a single `result.docx` file with page breaks
127
+ 7. **Cleanup**: Removes temporary files (unless `--keep-temp` is specified)
128
+
129
+ ## Template Tags
130
+
131
+ All tags are replaced with proper formatting (bold where specified):
132
+
133
+ - **`<my-name-caps>`**: Full name in UPPERCASE and bold
134
+ - **`<my-name>`**: Full name with last word on new line, bold
135
+ - **`<contract-date>`**: Date in format "2 ноября 2024", bold
136
+ - **`<price-som>`**: Integer price (e.g., "87000"), bold
137
+ - **`<price-leters>`**: Price in words (e.g., "восемьдесят семь тысяч"), regular font
138
+ - **`<today-date>`**: Date in format "1 апреля 2026", bold
139
+
140
+ ## Example Output
141
+
142
+ Input receipt with price `87000` and date `2024-11-02` will generate:
143
+
144
+ - Contract date: **2 ноября 2024**
145
+ - Price: **87000** сом (восемьдесят семь тысяч)
146
+ - Name: **ИП ИВАНОВ ИВАН ИВАНОВИЧ** / **ИП Иванов Иван**
147
+ **Иванович**
148
+
149
+ ## Development
150
+
151
+ ### Running Tests
152
+
153
+ Test the number converter:
154
+
155
+ ```bash
156
+ python -c "from utils import NumberToWords; c = NumberToWords(); print(c.convert(87000))"
157
+ ```
158
+
159
+ ### Project Components
160
+
161
+ - **NumberToWords**: Converts integers to Russian words with proper grammar
162
+ - **ReceiptParser**: Extracts data from ESFS XML receipts
163
+ - **DocumentProcessor**: Fills DOCX templates while preserving formatting
164
+ - **DocumentMerger**: Merges multiple DOCX files into a single document with page breaks
165
+
166
+ ## Requirements
167
+
168
+ - Python 3.12+
169
+ - python-docx 1.1.2
170
+
171
+ ## License
172
+
173
+ This is a utility tool for internal use.
174
+
175
+ ## Support
176
+
177
+ For issues or questions, please check the code documentation or modify according to your needs.
utils/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ """Utility modules for ACT document generation"""
2
+
3
+ from .number_converter import NumberToWords
4
+ from .document_processor import DocumentProcessor
5
+ from .receipt_parser import ReceiptParser
6
+ from .document_merger import DocumentMerger
7
+
8
+ __all__ = ['NumberToWords', 'DocumentProcessor', 'ReceiptParser', 'DocumentMerger']
utils/document_merger.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Document merger for combining multiple DOCX files into one"""
2
+
3
+ import os
4
+ import glob
5
+ from docx import Document
6
+ from docx.enum.text import WD_BREAK
7
+ from typing import List
8
+
9
+
10
+ class DocumentMerger:
11
+ """Merge multiple DOCX files into a single document with page breaks"""
12
+
13
+ def __init__(self):
14
+ pass
15
+
16
+ def _add_page_break(self, doc: Document):
17
+ """
18
+ Add a page break at the end of document
19
+
20
+ Args:
21
+ doc: DOCX document object
22
+ """
23
+ paragraph = doc.add_paragraph()
24
+ run = paragraph.add_run()
25
+ run.add_break(WD_BREAK.PAGE)
26
+
27
+ def _copy_paragraph(self, source_para, target_doc: Document):
28
+ """
29
+ Copy paragraph with all formatting to target document
30
+
31
+ Args:
32
+ source_para: Source paragraph to copy
33
+ target_doc: Target document to copy to
34
+ """
35
+ # Create new paragraph with same style
36
+ new_para = target_doc.add_paragraph(style=source_para.style)
37
+ new_para.alignment = source_para.alignment
38
+
39
+ # Copy all runs with formatting
40
+ for run in source_para.runs:
41
+ new_run = new_para.add_run(run.text)
42
+ new_run.bold = run.bold
43
+ new_run.italic = run.italic
44
+ new_run.underline = run.underline
45
+
46
+ # Copy font properties
47
+ if run.font.size:
48
+ new_run.font.size = run.font.size
49
+ if run.font.name:
50
+ new_run.font.name = run.font.name
51
+
52
+ def _copy_table(self, source_table, target_doc: Document):
53
+ """
54
+ Copy table with all formatting to target document
55
+
56
+ Args:
57
+ source_table: Source table to copy
58
+ target_doc: Target document to copy to
59
+ """
60
+ rows = len(source_table.rows)
61
+ cols = len(source_table.columns)
62
+
63
+ # Create new table
64
+ new_table = target_doc.add_table(rows=rows, cols=cols)
65
+
66
+ # Copy table style
67
+ if source_table.style:
68
+ new_table.style = source_table.style
69
+
70
+ # Copy cell contents
71
+ for i, row in enumerate(source_table.rows):
72
+ for j, cell in enumerate(row.cells):
73
+ new_cell = new_table.rows[i].cells[j]
74
+
75
+ # Remove default paragraph
76
+ new_cell.text = ''
77
+
78
+ # Copy each paragraph in the cell
79
+ for para in cell.paragraphs:
80
+ new_para = new_cell.add_paragraph(style=para.style)
81
+ new_para.alignment = para.alignment
82
+
83
+ # Copy runs with formatting
84
+ for run in para.runs:
85
+ new_run = new_para.add_run(run.text)
86
+ new_run.bold = run.bold
87
+ new_run.italic = run.italic
88
+ new_run.underline = run.underline
89
+
90
+ if run.font.size:
91
+ new_run.font.size = run.font.size
92
+ if run.font.name:
93
+ new_run.font.name = run.font.name
94
+
95
+ # Remove the first empty paragraph that was auto-created
96
+ if len(new_cell.paragraphs) > len(cell.paragraphs):
97
+ p = new_cell.paragraphs[0]._element
98
+ p.getparent().remove(p)
99
+
100
+ def merge_files(self, docx_files: List[str], output_file: str):
101
+ """
102
+ Merge multiple DOCX files into a single document
103
+
104
+ Args:
105
+ docx_files: List of paths to DOCX files to merge
106
+ output_file: Path to output merged document
107
+ """
108
+ if not docx_files:
109
+ raise ValueError("No DOCX files provided to merge")
110
+
111
+ # Sort files to ensure consistent order
112
+ docx_files = sorted(docx_files)
113
+
114
+ # Start with the first document as base
115
+ merged_doc = Document(docx_files[0])
116
+
117
+ # Process remaining documents
118
+ for docx_file in docx_files[1:]:
119
+ # Add page break before next document
120
+ self._add_page_break(merged_doc)
121
+
122
+ # Load the document to merge
123
+ sub_doc = Document(docx_file)
124
+
125
+ # Copy all paragraphs
126
+ for paragraph in sub_doc.paragraphs:
127
+ self._copy_paragraph(paragraph, merged_doc)
128
+
129
+ # Copy all tables
130
+ for table in sub_doc.tables:
131
+ self._copy_table(table, merged_doc)
132
+
133
+ # Save merged document
134
+ merged_doc.save(output_file)
135
+
136
+ def merge_from_folder(self, input_folder: str, output_file: str, pattern: str = '*.docx') -> int:
137
+ """
138
+ Merge all DOCX files from a folder into a single document
139
+
140
+ Args:
141
+ input_folder: Folder containing DOCX files
142
+ output_file: Path to output merged document
143
+ pattern: File pattern to match (default: *.docx)
144
+
145
+ Returns:
146
+ Number of documents merged
147
+ """
148
+ # Find all matching DOCX files
149
+ search_pattern = os.path.join(input_folder, pattern)
150
+ docx_files = glob.glob(search_pattern)
151
+
152
+ if not docx_files:
153
+ raise ValueError(f"No DOCX files found in {input_folder} matching pattern {pattern}")
154
+
155
+ # Merge files
156
+ self.merge_files(docx_files, output_file)
157
+
158
+ return len(docx_files)
utils/document_processor.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Document processor for filling DOCX templates with data"""
2
+
3
+ from docx import Document
4
+ from typing import Dict
5
+
6
+
7
+ class DocumentProcessor:
8
+ """Process DOCX templates and fill them with data"""
9
+
10
+ def __init__(self, template_path: str, user_name: str):
11
+ """
12
+ Initialize document processor
13
+
14
+ Args:
15
+ template_path: Path to DOCX template file
16
+ user_name: User name to fill in the document
17
+ """
18
+ self.template_path = template_path
19
+ self.user_name_original = user_name
20
+ self.user_name_caps = user_name.upper()
21
+
22
+ def _split_run_with_tag(self, paragraph, run, tag: str, replacement: str, make_bold: bool = True):
23
+ """
24
+ Split a run containing a tag into three parts to preserve formatting
25
+
26
+ Args:
27
+ paragraph: DOCX paragraph object
28
+ run: DOCX run object
29
+ tag: Tag to find and replace
30
+ replacement: Text to replace the tag with
31
+ make_bold: Whether to make the replacement text bold
32
+ """
33
+ if tag not in run.text:
34
+ return
35
+
36
+ text = run.text
37
+ tag_pos = text.find(tag)
38
+ before_text = text[:tag_pos]
39
+ after_text = text[tag_pos + len(tag):]
40
+
41
+ # Save original formatting
42
+ font_size = run.font.size
43
+ font_name = run.font.name
44
+ original_bold = run.bold
45
+
46
+ run_index = paragraph._element.index(run._element)
47
+
48
+ # Update current run to "before" text
49
+ run.text = before_text
50
+ run.bold = original_bold
51
+ if font_size:
52
+ run.font.size = font_size
53
+ if font_name:
54
+ run.font.name = font_name
55
+
56
+ # Add replacement text as new run
57
+ new_run = paragraph.add_run(replacement)
58
+ new_run.bold = make_bold
59
+ if font_size:
60
+ new_run.font.size = font_size
61
+ if font_name:
62
+ new_run.font.name = font_name
63
+ paragraph._element.insert(run_index + 1, new_run._element)
64
+
65
+ # Add "after" text as new run with original formatting
66
+ after_run = paragraph.add_run(after_text)
67
+ after_run.bold = original_bold
68
+ if font_size:
69
+ after_run.font.size = font_size
70
+ if font_name:
71
+ after_run.font.name = font_name
72
+ paragraph._element.insert(run_index + 2, after_run._element)
73
+
74
+ def _process_paragraph(self, paragraph, data: Dict[str, any]):
75
+ """
76
+ Process a single paragraph and replace all tags
77
+
78
+ Args:
79
+ paragraph: DOCX paragraph object
80
+ data: Dictionary containing data to fill (contract_date, price, price_words, today_date)
81
+ """
82
+ tags_to_check = ['<my-name-caps>', '<my-name>', '<contract-date>', '<price-som>', '<price-leters>', '<today-date>']
83
+ if not any(tag in paragraph.text for tag in tags_to_check):
84
+ return
85
+
86
+ # Process runs - snapshot because we'll be adding runs
87
+ runs_snapshot = list(paragraph.runs)
88
+
89
+ for run in runs_snapshot:
90
+ # Process <my-name-caps> tag
91
+ if '<my-name-caps>' in run.text:
92
+ self._split_run_with_tag(paragraph, run, '<my-name-caps>', self.user_name_caps, make_bold=True)
93
+
94
+ # Process <my-name> tag (with last word on new line)
95
+ elif '<my-name>' in run.text:
96
+ name_parts = self.user_name_original.rsplit(' ', 1)
97
+ if len(name_parts) == 2:
98
+ name_with_newline = name_parts[0] + '\n' + name_parts[1]
99
+ else:
100
+ name_with_newline = self.user_name_original
101
+ self._split_run_with_tag(paragraph, run, '<my-name>', name_with_newline, make_bold=True)
102
+
103
+ # Process <contract-date> tag
104
+ elif '<contract-date>' in run.text:
105
+ self._split_run_with_tag(paragraph, run, '<contract-date>', data['contract_date'], make_bold=True)
106
+
107
+ # Process <price-som> tag
108
+ elif '<price-som>' in run.text:
109
+ self._split_run_with_tag(paragraph, run, '<price-som>', str(data['price']), make_bold=True)
110
+
111
+ # Process <price-leters> tag (NOT bold)
112
+ elif '<price-leters>' in run.text:
113
+ self._split_run_with_tag(paragraph, run, '<price-leters>', data['price_words'], make_bold=False)
114
+
115
+ # Process <today-date> tag
116
+ elif '<today-date>' in run.text:
117
+ self._split_run_with_tag(paragraph, run, '<today-date>', data['today_date'], make_bold=True)
118
+
119
+ def process_document(self, data: Dict[str, any]) -> Document:
120
+ """
121
+ Process template document with given data
122
+
123
+ Args:
124
+ data: Dictionary containing:
125
+ - contract_date: Contract date string
126
+ - price: Price as integer
127
+ - price_words: Price in words
128
+ - today_date: Today's date string
129
+
130
+ Returns:
131
+ Processed DOCX document
132
+ """
133
+ # Load template
134
+ doc = Document(self.template_path)
135
+
136
+ # Process all paragraphs
137
+ for para in doc.paragraphs:
138
+ self._process_paragraph(para, data)
139
+
140
+ # Process tables
141
+ for table in doc.tables:
142
+ for row in table.rows:
143
+ for cell in row.cells:
144
+ for para in cell.paragraphs:
145
+ self._process_paragraph(para, data)
146
+
147
+ return doc
utils/number_converter.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Number to Russian words converter"""
2
+
3
+
4
+ class NumberToWords:
5
+ """Convert numbers to Russian words in nominative case"""
6
+
7
+ def __init__(self):
8
+ self.ones = ['', 'один', 'два', 'три', 'четыре', 'пять', 'шесть', 'семь', 'восемь', 'девять']
9
+ self.tens = ['', '', 'двадцать', 'тридцать', 'сорок', 'пятьдесят', 'шестьдесят', 'семьдесят', 'восемьдесят', 'девяносто']
10
+ self.teens = ['десять', 'одиннадцать', 'двенадцать', 'тринадцать', 'четырнадцать', 'пятнадцать',
11
+ 'шестнадцать', 'семнадцать', 'восемнадцать', 'девятнадцать']
12
+ self.hundreds = ['', 'сто', 'двести', 'триста', 'четыреста', 'пятьсот', 'шестьсот', 'семьсот', 'восемьсот', 'девятьсот']
13
+ self.thousands = ['', 'одна', 'две', 'три', 'четыре', 'пять', 'шесть', 'семь', 'восемь', 'девять']
14
+
15
+ def _convert_hundreds(self, num: int) -> str:
16
+ """Convert number 0-999 to words"""
17
+ if num == 0:
18
+ return ''
19
+
20
+ result = []
21
+
22
+ # Hundreds
23
+ hundred = num // 100
24
+ if hundred > 0:
25
+ result.append(self.hundreds[hundred])
26
+
27
+ # Tens and ones
28
+ remainder = num % 100
29
+ if 10 <= remainder <= 19:
30
+ result.append(self.teens[remainder - 10])
31
+ else:
32
+ ten = remainder // 10
33
+ one = remainder % 10
34
+ if ten > 0:
35
+ result.append(self.tens[ten])
36
+ if one > 0:
37
+ result.append(self.ones[one])
38
+
39
+ return ' '.join(result)
40
+
41
+ def _convert_thousands(self, num: int) -> str:
42
+ """Convert thousands part (0-999) to words with feminine forms"""
43
+ if num == 0:
44
+ return ''
45
+
46
+ result = []
47
+
48
+ # Hundreds
49
+ hundred = num // 100
50
+ if hundred > 0:
51
+ result.append(self.hundreds[hundred])
52
+
53
+ # Tens and ones (with feminine forms for thousands)
54
+ remainder = num % 100
55
+ if 10 <= remainder <= 19:
56
+ result.append(self.teens[remainder - 10])
57
+ else:
58
+ ten = remainder // 10
59
+ one = remainder % 10
60
+ if ten > 0:
61
+ result.append(self.tens[ten])
62
+ if one > 0:
63
+ result.append(self.thousands[one])
64
+
65
+ return ' '.join(result)
66
+
67
+ def _thousand_word(self, num: int) -> str:
68
+ """Get correct grammatical form of 'thousand' word"""
69
+ if num % 100 in [11, 12, 13, 14]:
70
+ return 'тысяч'
71
+ last_digit = num % 10
72
+ if last_digit == 1:
73
+ return 'тысяча'
74
+ elif last_digit in [2, 3, 4]:
75
+ return 'тысячи'
76
+ else:
77
+ return 'тысяч'
78
+
79
+ def convert(self, num: int) -> str:
80
+ """
81
+ Convert integer to Russian words in nominative case
82
+
83
+ Args:
84
+ num: Integer number to convert
85
+
86
+ Returns:
87
+ String representation of the number in Russian
88
+ """
89
+ if num == 0:
90
+ return 'ноль'
91
+
92
+ if num < 0:
93
+ return 'минус ' + self.convert(-num)
94
+
95
+ result = []
96
+
97
+ # Millions
98
+ millions = num // 1000000
99
+ if millions > 0:
100
+ result.append(self._convert_hundreds(millions))
101
+ if millions % 100 in [11, 12, 13, 14]:
102
+ result.append('миллионов')
103
+ elif millions % 10 == 1:
104
+ result.append('миллион')
105
+ elif millions % 10 in [2, 3, 4]:
106
+ result.append('миллиона')
107
+ else:
108
+ result.append('миллионов')
109
+
110
+ # Thousands
111
+ thousands = (num % 1000000) // 1000
112
+ if thousands > 0:
113
+ result.append(self._convert_thousands(thousands))
114
+ result.append(self._thousand_word(thousands))
115
+
116
+ # Hundreds
117
+ hundreds = num % 1000
118
+ if hundreds > 0:
119
+ result.append(self._convert_hundreds(hundreds))
120
+
121
+ return ' '.join(result)
utils/receipt_parser.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """XML Receipt parser for ESFS documents"""
2
+
3
+ import xml.etree.ElementTree as ET
4
+ from datetime import datetime
5
+ from typing import List, Dict
6
+ import glob
7
+
8
+
9
+ class ReceiptParser:
10
+ """Parser for extracting receipt data from XML files"""
11
+
12
+ # Month names in Russian (genitive case for dates)
13
+ MONTHS_GENITIVE = {
14
+ 1: 'января', 2: 'февраля', 3: 'марта', 4: 'апреля',
15
+ 5: 'мая', 6: 'июня', 7: 'июля', 8: 'августа',
16
+ 9: 'сентября', 10: 'октября', 11: 'ноября', 12: 'декабря'
17
+ }
18
+
19
+ def __init__(self, esfs_folder: str = 'esfs'):
20
+ """
21
+ Initialize receipt parser
22
+
23
+ Args:
24
+ esfs_folder: Path to folder containing XML files
25
+ """
26
+ self.esfs_folder = esfs_folder
27
+
28
+ def find_xml_files(self) -> List[str]:
29
+ """
30
+ Find all XML files in the ESFS folder
31
+
32
+ Returns:
33
+ List of XML file paths
34
+ """
35
+ pattern = f"{self.esfs_folder}/*.xml"
36
+ return glob.glob(pattern)
37
+
38
+ def parse_receipts(self) -> List[ET.Element]:
39
+ """
40
+ Parse all receipts from all XML files in the folder
41
+
42
+ Returns:
43
+ List of receipt XML elements
44
+ """
45
+ xml_files = self.find_xml_files()
46
+ all_receipts = []
47
+
48
+ for xml_file in xml_files:
49
+ tree = ET.parse(xml_file)
50
+ root = tree.getroot()
51
+ receipts = root.findall('.//receipt')
52
+ all_receipts.extend(receipts)
53
+
54
+ return all_receipts
55
+
56
+ def extract_receipt_data(self, receipt: ET.Element) -> Dict[str, any]:
57
+ """
58
+ Extract data from a single receipt element
59
+
60
+ Args:
61
+ receipt: XML element containing receipt data
62
+
63
+ Returns:
64
+ Dictionary with extracted data
65
+ """
66
+ # Contract date
67
+ contract_date_str = receipt.find('deliveryContractDate').text
68
+ contract_date = datetime.fromisoformat(contract_date_str.replace('+06:00', ''))
69
+ contract_date_formatted = f"{contract_date.day} {self.MONTHS_GENITIVE[contract_date.month]} {contract_date.year}"
70
+
71
+ # Price
72
+ price_str = receipt.find('.//goods/good/price').text
73
+ price_int = int(float(price_str))
74
+
75
+ # Today's date (creation date)
76
+ today_date_str = receipt.find('createdDate').text
77
+ today_date = datetime.fromisoformat(today_date_str)
78
+ today_date_formatted = f"{today_date.day} {self.MONTHS_GENITIVE[today_date.month]} {today_date.year}"
79
+
80
+ return {
81
+ 'contract_date': contract_date_formatted,
82
+ 'price': price_int,
83
+ 'today_date': today_date_formatted
84
+ }
85
+
86
+ def get_all_receipt_data(self) -> List[Dict[str, any]]:
87
+ """
88
+ Get data for all receipts in the folder
89
+
90
+ Returns:
91
+ List of dictionaries containing receipt data
92
+ """
93
+ receipts = self.parse_receipts()
94
+ return [self.extract_receipt_data(receipt) for receipt in receipts]