Spaces:

aiacademy-kg
/

acceptance-certificate-maker

Sleeping

App Files Files Community

Simonlob commited on Apr 1

Commit

e6708ef

1 Parent(s): 431a25b

zip

Browse files

Files changed (12) hide show

.DS_Store +0 -0
.gitignore +7 -0
README.md +2 -2
app.py +164 -0
requirements.txt +2 -0
utils/.DS_Store +0 -0
utils/README.md +177 -0
utils/__init__.py +8 -0
utils/document_merger.py +158 -0
utils/document_processor.py +147 -0
utils/number_converter.py +121 -0
utils/receipt_parser.py +94 -0

.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+venv/
+env/
+.env
+*.zip
+draft.zip
+draft.docx
+*.DS_Store

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Acceptance Certificate Maker
 emoji: ⚡
-colorFrom: blue
-colorTo: yellow
 sdk: gradio
 sdk_version: 6.10.0
 app_file: app.py

 ---
 title: Acceptance Certificate Maker
 emoji: ⚡
+colorFrom: green
+colorTo: blue
 sdk: gradio
 sdk_version: 6.10.0
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+#!/usr/bin/env python3
+"""
+ACT Maker - Gradio Web Interface for Hugging Face Spaces
+Web interface for generating ACT documents from ESFS XML receipts.
+"""
+import os
+import shutil
+import tempfile
+import gradio as gr
+from utils import NumberToWords, DocumentProcessor, ReceiptParser, DocumentMerger
+def process_receipt(xml_file, user_name):
+    """
+    Process uploaded XML receipt and generate ACT document
+    Args:
+        xml_file: Uploaded XML file from Gradio
+        user_name: User name string (e.g., "ИП Пупкин Василий Алибабаевич")
+    Returns:
+        Path to generated DOCX file or error message
+    """
+    if xml_file is None:
+        return None, "⚠️ Please upload an XML receipt file"
+    if not user_name or not user_name.strip():
+        return None, "⚠️ Please enter your name"
+    user_name = user_name.strip()
+    try:
+        # Create temporary directory for processing
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Create esfs folder for XML files
+            esfs_folder = os.path.join(temp_dir, 'esfs')
+            os.makedirs(esfs_folder)
+            # Create temp docs folder
+            temp_docs_folder = os.path.join(temp_dir, '.temp_docs')
+            os.makedirs(temp_docs_folder)
+            # Copy uploaded XML file to esfs folder
+            xml_path = os.path.join(esfs_folder, os.path.basename(xml_file.name))
+            shutil.copy(xml_file.name, xml_path)
+            # Initialize components
+            receipt_parser = ReceiptParser(esfs_folder)
+            number_converter = NumberToWords()
+            doc_processor = DocumentProcessor('draft.docx', user_name)
+            doc_merger = DocumentMerger()
+            # Parse receipts
+            receipts_data = receipt_parser.get_all_receipt_data()
+            if not receipts_data:
+                return None, "⚠️ No receipt data found in XML file"
+            # Process each receipt
+            temp_files = []
+            for idx, receipt_data in enumerate(receipts_data, 1):
+                # Convert price to words
+                price_words = number_converter.convert(receipt_data['price'])
+                # Prepare full data
+                full_data = {
+                    'contract_date': receipt_data['contract_date'],
+                    'price': receipt_data['price'],
+                    'price_words': price_words,
+                    'today_date': receipt_data['today_date']
+                }
+                # Process document
+                doc = doc_processor.process_document(full_data)
+                # Save to temporary folder
+                temp_filename = os.path.join(temp_docs_folder, f"act_{idx:03d}.docx")
+                doc.save(temp_filename)
+                temp_files.append(temp_filename)
+            # Merge all documents
+            output_path = os.path.join(temp_dir, 'result.docx')
+            doc_merger.merge_files(temp_files, output_path)
+            # Create a permanent copy for download
+            final_output = tempfile.NamedTemporaryFile(
+                delete=False,
+                suffix='.docx',
+                prefix='act_'
+            )
+            shutil.copy(output_path, final_output.name)
+            success_msg = f"✅ Successfully processed {len(receipts_data)} receipt(s)"
+            return final_output.name, success_msg
+    except Exception as e:
+        error_msg = f"❌ Error processing: {str(e)}"
+        return None, error_msg
+# Create Gradio interface
+with gr.Blocks(title="ACT Maker - Acceptance Certificate Generator") as demo:
+    gr.Markdown(
+        """
+        # 📄 ACT Maker
+        ### Automated Acceptance Certificate Generation from ESFS XML Receipts
+        Upload an XML receipt file and enter your name to generate an acceptance certificate.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            xml_input = gr.File(
+                label="📁 ESFS XML Receipt",
+                file_types=[".xml"],
+                type="filepath"
+            )
+            user_name_input = gr.Textbox(
+                label="👤 User Name",
+                placeholder="e.g., ИП Иванов Иван Иванович",
+                lines=1
+            )
+            submit_btn = gr.Button("🚀 Generate Certificate", variant="primary", size="lg")
+        with gr.Column():
+            status_output = gr.Textbox(
+                label="📊 Status",
+                lines=2,
+                interactive=False
+            )
+            file_output = gr.File(
+                label="📥 Download Document",
+                interactive=False
+            )
+    gr.Markdown(
+        """
+        ---
+        ### 📖 How to Use:
+        1. **Upload XML file** - ESFS receipt in XML format
+        2. **Enter your name** - Full name (e.g., "ИП Иванов Иван Иванович")
+        3. **Click "Generate Certificate"** - Document will be created automatically
+        4. **Download result** - Ready acceptance certificate in DOCX format
+        ℹ️ If the XML file contains multiple receipts, they will be processed and merged into one document.
+        """
+    )
+    # Connect the button to the processing function
+    submit_btn.click(
+        fn=process_receipt,
+        inputs=[xml_input, user_name_input],
+        outputs=[file_output, status_output]
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ python-docx==1.1.2
2	+ gradio==4.44.0

utils/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

utils/README.md ADDED Viewed

	@@ -0,0 +1,177 @@

+# ACT Maker
+Automated document generation tool for creating ACT (acceptance certificate) documents from ESFS XML receipts.
+## Features
+- 📄 **Automated Document Generation**: Process multiple receipts at once
+- 🔄 **Template-based**: Uses customizable DOCX templates
+- 🌍 **Russian Language Support**: Converts numbers to Russian words with proper grammar
+- 📊 **Batch Processing**: Handles multiple XML files and receipts
+- ⚙️ **Configurable**: Command-line arguments for flexibility
+## Project Structure
+```
+act_maker/
+├── main.py                 # Main entry point
+├── merge_documents.py      # Standalone document merger utility
+├── requirements.txt        # Python dependencies
+├── README.md              # This file
+├── .user_name             # User configuration (your name)
+├── draft.docx             # DOCX template file
+├── result.docx            # Final merged output (auto-generated)
+├── esfs/                  # Input folder for XML receipts
+│   └── *.xml             # ESFS receipt XML files
+├── .temp_docs/            # Temporary buffer (auto-created, auto-deleted)
+│   └── act_*.docx        # Individual documents (only with --keep-temp)
+└── utils/                 # Utility modules
+    ├── __init__.py
+    ├── number_converter.py    # Number to Russian words converter
+    ├── receipt_parser.py      # XML receipt parser
+    ├── document_processor.py  # DOCX template processor
+    └── document_merger.py     # Document merging logic
+```
+## Installation
+1. **Clone or download this project**
+2. **Create a virtual environment** (recommended):
+   ```bash
+   python3 -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   ```
+3. **Install dependencies**:
+   ```bash
+   pip install -r requirements.txt
+   ```
+## Configuration
+1. **Create `.user_name` file** with your name:
+   ```
+   ИП Пупкин Василий Алибабаевич
+   ```
+2. **Prepare your template** (`draft.docx`) with the following tags:
+   - `<my-name-caps>` - Your name in UPPERCASE (bold)
+   - `<my-name>` - Your name in original case with last word on new line (bold)
+   - `<contract-date>` - Contract date (bold)
+   - `<price-som>` - Price in numbers (bold)
+   - `<price-leters>` - Price in words (normal)
+   - `<today-date>` - Today's date (bold)
+3. **Place XML receipts** in the `esfs/` folder
+## Usage
+### Basic Usage
+Process all receipts and create a merged document:
+```bash
+python main.py
+```
+This will:
+1. Parse all XML files from `esfs/` folder
+2. Generate individual documents in `.temp_docs/` folder
+3. Merge all documents into `result.docx`
+4. Clean up temporary files
+### Advanced Usage
+Keep temporary files for inspection:
+```bash
+python main.py --keep-temp
+```
+Customize paths and output file:
+```bash
+python main.py --template my_template.docx \
+               --esfs-folder input_xmls \
+               --output my_result.docx \
+               --user-config .my_name
+```
+### Command-Line Arguments
+| Argument | Default | Description |
+|----------|---------|-------------|
+| `--template` | `draft.docx` | Path to DOCX template file |
+| `--esfs-folder` | `esfs` | Folder containing XML receipt files |
+| `--output` | `result.docx` | Output merged document file |
+| `--user-config` | `.user_name` | Config file containing user name |
+| `--keep-temp` | `false` | Keep temporary files after merging |
+### Standalone Document Merger
+You can also merge existing DOCX files using the standalone merger:
+```bash
+python merge_documents.py --input-folder my_docs --output merged.docx
+```
+## How It Works
+1. **Parse XML Receipts**: Scans the ESFS folder for XML files and extracts receipt data
+2. **Extract Data**: Gets contract dates, prices, and creation dates from each receipt
+3. **Convert Numbers**: Converts prices to Russian words (e.g., 87000 → "восемьдесят семь тысяч")
+4. **Fill Template**: Replaces tags in the DOCX template with actual data
+5. **Generate Individual Documents**: Saves each filled document to `.temp_docs/` buffer
+6. **Merge Documents**: Combines all documents into a single `result.docx` file with page breaks
+7. **Cleanup**: Removes temporary files (unless `--keep-temp` is specified)
+## Template Tags
+All tags are replaced with proper formatting (bold where specified):
+- **`<my-name-caps>`**: Full name in UPPERCASE and bold
+- **`<my-name>`**: Full name with last word on new line, bold
+- **`<contract-date>`**: Date in format "2 ноября 2024", bold
+- **`<price-som>`**: Integer price (e.g., "87000"), bold
+- **`<price-leters>`**: Price in words (e.g., "восемьдесят семь тысяч"), regular font
+- **`<today-date>`**: Date in format "1 апреля 2026", bold
+## Example Output
+Input receipt with price `87000` and date `2024-11-02` will generate:
+- Contract date: **2 ноября 2024**
+- Price: **87000** сом (восемьдесят семь тысяч)
+- Name: **ИП ИВАНОВ ИВАН ИВАНОВИЧ** / **ИП Иванов Иван**
+  **Иванович**
+## Development
+### Running Tests
+Test the number converter:
+```bash
+python -c "from utils import NumberToWords; c = NumberToWords(); print(c.convert(87000))"
+```
+### Project Components
+- **NumberToWords**: Converts integers to Russian words with proper grammar
+- **ReceiptParser**: Extracts data from ESFS XML receipts
+- **DocumentProcessor**: Fills DOCX templates while preserving formatting
+- **DocumentMerger**: Merges multiple DOCX files into a single document with page breaks
+## Requirements
+- Python 3.12+
+- python-docx 1.1.2
+## License
+This is a utility tool for internal use.
+## Support
+For issues or questions, please check the code documentation or modify according to your needs.

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""Utility modules for ACT document generation"""
+from .number_converter import NumberToWords
+from .document_processor import DocumentProcessor
+from .receipt_parser import ReceiptParser
+from .document_merger import DocumentMerger
+__all__ = ['NumberToWords', 'DocumentProcessor', 'ReceiptParser', 'DocumentMerger']

utils/document_merger.py ADDED Viewed

	@@ -0,0 +1,158 @@

+"""Document merger for combining multiple DOCX files into one"""
+import os
+import glob
+from docx import Document
+from docx.enum.text import WD_BREAK
+from typing import List
+class DocumentMerger:
+    """Merge multiple DOCX files into a single document with page breaks"""
+    def __init__(self):
+        pass
+    def _add_page_break(self, doc: Document):
+        """
+        Add a page break at the end of document
+        Args:
+            doc: DOCX document object
+        """
+        paragraph = doc.add_paragraph()
+        run = paragraph.add_run()
+        run.add_break(WD_BREAK.PAGE)
+    def _copy_paragraph(self, source_para, target_doc: Document):
+        """
+        Copy paragraph with all formatting to target document
+        Args:
+            source_para: Source paragraph to copy
+            target_doc: Target document to copy to
+        """
+        # Create new paragraph with same style
+        new_para = target_doc.add_paragraph(style=source_para.style)
+        new_para.alignment = source_para.alignment
+        # Copy all runs with formatting
+        for run in source_para.runs:
+            new_run = new_para.add_run(run.text)
+            new_run.bold = run.bold
+            new_run.italic = run.italic
+            new_run.underline = run.underline
+            # Copy font properties
+            if run.font.size:
+                new_run.font.size = run.font.size
+            if run.font.name:
+                new_run.font.name = run.font.name
+    def _copy_table(self, source_table, target_doc: Document):
+        """
+        Copy table with all formatting to target document
+        Args:
+            source_table: Source table to copy
+            target_doc: Target document to copy to
+        """
+        rows = len(source_table.rows)
+        cols = len(source_table.columns)
+        # Create new table
+        new_table = target_doc.add_table(rows=rows, cols=cols)
+        # Copy table style
+        if source_table.style:
+            new_table.style = source_table.style
+        # Copy cell contents
+        for i, row in enumerate(source_table.rows):
+            for j, cell in enumerate(row.cells):
+                new_cell = new_table.rows[i].cells[j]
+                # Remove default paragraph
+                new_cell.text = ''
+                # Copy each paragraph in the cell
+                for para in cell.paragraphs:
+                    new_para = new_cell.add_paragraph(style=para.style)
+                    new_para.alignment = para.alignment
+                    # Copy runs with formatting
+                    for run in para.runs:
+                        new_run = new_para.add_run(run.text)
+                        new_run.bold = run.bold
+                        new_run.italic = run.italic
+                        new_run.underline = run.underline
+                        if run.font.size:
+                            new_run.font.size = run.font.size
+                        if run.font.name:
+                            new_run.font.name = run.font.name
+                # Remove the first empty paragraph that was auto-created
+                if len(new_cell.paragraphs) > len(cell.paragraphs):
+                    p = new_cell.paragraphs[0]._element
+                    p.getparent().remove(p)
+    def merge_files(self, docx_files: List[str], output_file: str):
+        """
+        Merge multiple DOCX files into a single document
+        Args:
+            docx_files: List of paths to DOCX files to merge
+            output_file: Path to output merged document
+        """
+        if not docx_files:
+            raise ValueError("No DOCX files provided to merge")
+        # Sort files to ensure consistent order
+        docx_files = sorted(docx_files)
+        # Start with the first document as base
+        merged_doc = Document(docx_files[0])
+        # Process remaining documents
+        for docx_file in docx_files[1:]:
+            # Add page break before next document
+            self._add_page_break(merged_doc)
+            # Load the document to merge
+            sub_doc = Document(docx_file)
+            # Copy all paragraphs
+            for paragraph in sub_doc.paragraphs:
+                self._copy_paragraph(paragraph, merged_doc)
+            # Copy all tables
+            for table in sub_doc.tables:
+                self._copy_table(table, merged_doc)
+        # Save merged document
+        merged_doc.save(output_file)
+    def merge_from_folder(self, input_folder: str, output_file: str, pattern: str = '*.docx') -> int:
+        """
+        Merge all DOCX files from a folder into a single document
+        Args:
+            input_folder: Folder containing DOCX files
+            output_file: Path to output merged document
+            pattern: File pattern to match (default: *.docx)
+        Returns:
+            Number of documents merged
+        """
+        # Find all matching DOCX files
+        search_pattern = os.path.join(input_folder, pattern)
+        docx_files = glob.glob(search_pattern)
+        if not docx_files:
+            raise ValueError(f"No DOCX files found in {input_folder} matching pattern {pattern}")
+        # Merge files
+        self.merge_files(docx_files, output_file)
+        return len(docx_files)

utils/document_processor.py ADDED Viewed

	@@ -0,0 +1,147 @@

+"""Document processor for filling DOCX templates with data"""
+from docx import Document
+from typing import Dict
+class DocumentProcessor:
+    """Process DOCX templates and fill them with data"""
+    def __init__(self, template_path: str, user_name: str):
+        """
+        Initialize document processor
+        Args:
+            template_path: Path to DOCX template file
+            user_name: User name to fill in the document
+        """
+        self.template_path = template_path
+        self.user_name_original = user_name
+        self.user_name_caps = user_name.upper()
+    def _split_run_with_tag(self, paragraph, run, tag: str, replacement: str, make_bold: bool = True):
+        """
+        Split a run containing a tag into three parts to preserve formatting
+        Args:
+            paragraph: DOCX paragraph object
+            run: DOCX run object
+            tag: Tag to find and replace
+            replacement: Text to replace the tag with
+            make_bold: Whether to make the replacement text bold
+        """
+        if tag not in run.text:
+            return
+        text = run.text
+        tag_pos = text.find(tag)
+        before_text = text[:tag_pos]
+        after_text = text[tag_pos + len(tag):]
+        # Save original formatting
+        font_size = run.font.size
+        font_name = run.font.name
+        original_bold = run.bold
+        run_index = paragraph._element.index(run._element)
+        # Update current run to "before" text
+        run.text = before_text
+        run.bold = original_bold
+        if font_size:
+            run.font.size = font_size
+        if font_name:
+            run.font.name = font_name
+        # Add replacement text as new run
+        new_run = paragraph.add_run(replacement)
+        new_run.bold = make_bold
+        if font_size:
+            new_run.font.size = font_size
+        if font_name:
+            new_run.font.name = font_name
+        paragraph._element.insert(run_index + 1, new_run._element)
+        # Add "after" text as new run with original formatting
+        after_run = paragraph.add_run(after_text)
+        after_run.bold = original_bold
+        if font_size:
+            after_run.font.size = font_size
+        if font_name:
+            after_run.font.name = font_name
+        paragraph._element.insert(run_index + 2, after_run._element)
+    def _process_paragraph(self, paragraph, data: Dict[str, any]):
+        """
+        Process a single paragraph and replace all tags
+        Args:
+            paragraph: DOCX paragraph object
+            data: Dictionary containing data to fill (contract_date, price, price_words, today_date)
+        """
+        tags_to_check = ['<my-name-caps>', '<my-name>', '<contract-date>', '<price-som>', '<price-leters>', '<today-date>']
+        if not any(tag in paragraph.text for tag in tags_to_check):
+            return
+        # Process runs - snapshot because we'll be adding runs
+        runs_snapshot = list(paragraph.runs)
+        for run in runs_snapshot:
+            # Process <my-name-caps> tag
+            if '<my-name-caps>' in run.text:
+                self._split_run_with_tag(paragraph, run, '<my-name-caps>', self.user_name_caps, make_bold=True)
+            # Process <my-name> tag (with last word on new line)
+            elif '<my-name>' in run.text:
+                name_parts = self.user_name_original.rsplit(' ', 1)
+                if len(name_parts) == 2:
+                    name_with_newline = name_parts[0] + '\n' + name_parts[1]
+                else:
+                    name_with_newline = self.user_name_original
+                self._split_run_with_tag(paragraph, run, '<my-name>', name_with_newline, make_bold=True)
+            # Process <contract-date> tag
+            elif '<contract-date>' in run.text:
+                self._split_run_with_tag(paragraph, run, '<contract-date>', data['contract_date'], make_bold=True)
+            # Process <price-som> tag
+            elif '<price-som>' in run.text:
+                self._split_run_with_tag(paragraph, run, '<price-som>', str(data['price']), make_bold=True)
+            # Process <price-leters> tag (NOT bold)
+            elif '<price-leters>' in run.text:
+                self._split_run_with_tag(paragraph, run, '<price-leters>', data['price_words'], make_bold=False)
+            # Process <today-date> tag
+            elif '<today-date>' in run.text:
+                self._split_run_with_tag(paragraph, run, '<today-date>', data['today_date'], make_bold=True)
+    def process_document(self, data: Dict[str, any]) -> Document:
+        """
+        Process template document with given data
+        Args:
+            data: Dictionary containing:
+                - contract_date: Contract date string
+                - price: Price as integer
+                - price_words: Price in words
+                - today_date: Today's date string
+        Returns:
+            Processed DOCX document
+        """
+        # Load template
+        doc = Document(self.template_path)
+        # Process all paragraphs
+        for para in doc.paragraphs:
+            self._process_paragraph(para, data)
+        # Process tables
+        for table in doc.tables:
+            for row in table.rows:
+                for cell in row.cells:
+                    for para in cell.paragraphs:
+                        self._process_paragraph(para, data)
+        return doc

utils/number_converter.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Number to Russian words converter"""
+class NumberToWords:
+    """Convert numbers to Russian words in nominative case"""
+    def __init__(self):
+        self.ones = ['', 'один', 'два', 'три', 'четыре', 'пять', 'шесть', 'семь', 'восемь', 'девять']
+        self.tens = ['', '', 'двадцать', 'тридцать', 'сорок', 'пятьдесят', 'шестьдесят', 'семьдесят', 'восемьдесят', 'девяносто']
+        self.teens = ['десять', 'одиннадцать', 'двенадцать', 'тринадцать', 'четырнадцать', 'пятнадцать',
+                      'шестнадцать', 'семнадцать', 'восемнадцать', 'девятнадцать']
+        self.hundreds = ['', 'сто', 'двести', 'триста', 'четыреста', 'пятьсот', 'шестьсот', 'семьсот', 'восемьсот', 'девятьсот']
+        self.thousands = ['', 'одна', 'две', 'три', 'четыре', 'пять', 'шесть', 'семь', 'восемь', 'девять']
+    def _convert_hundreds(self, num: int) -> str:
+        """Convert number 0-999 to words"""
+        if num == 0:
+            return ''
+        result = []
+        # Hundreds
+        hundred = num // 100
+        if hundred > 0:
+            result.append(self.hundreds[hundred])
+        # Tens and ones
+        remainder = num % 100
+        if 10 <= remainder <= 19:
+            result.append(self.teens[remainder - 10])
+        else:
+            ten = remainder // 10
+            one = remainder % 10
+            if ten > 0:
+                result.append(self.tens[ten])
+            if one > 0:
+                result.append(self.ones[one])
+        return ' '.join(result)
+    def _convert_thousands(self, num: int) -> str:
+        """Convert thousands part (0-999) to words with feminine forms"""
+        if num == 0:
+            return ''
+        result = []
+        # Hundreds
+        hundred = num // 100
+        if hundred > 0:
+            result.append(self.hundreds[hundred])
+        # Tens and ones (with feminine forms for thousands)
+        remainder = num % 100
+        if 10 <= remainder <= 19:
+            result.append(self.teens[remainder - 10])
+        else:
+            ten = remainder // 10
+            one = remainder % 10
+            if ten > 0:
+                result.append(self.tens[ten])
+            if one > 0:
+                result.append(self.thousands[one])
+        return ' '.join(result)
+    def _thousand_word(self, num: int) -> str:
+        """Get correct grammatical form of 'thousand' word"""
+        if num % 100 in [11, 12, 13, 14]:
+            return 'тысяч'
+        last_digit = num % 10
+        if last_digit == 1:
+            return 'тысяча'
+        elif last_digit in [2, 3, 4]:
+            return 'тысячи'
+        else:
+            return 'тысяч'
+    def convert(self, num: int) -> str:
+        """
+        Convert integer to Russian words in nominative case
+        Args:
+            num: Integer number to convert
+        Returns:
+            String representation of the number in Russian
+        """
+        if num == 0:
+            return 'ноль'
+        if num < 0:
+            return 'минус ' + self.convert(-num)
+        result = []
+        # Millions
+        millions = num // 1000000
+        if millions > 0:
+            result.append(self._convert_hundreds(millions))
+            if millions % 100 in [11, 12, 13, 14]:
+                result.append('миллионов')
+            elif millions % 10 == 1:
+                result.append('миллион')
+            elif millions % 10 in [2, 3, 4]:
+                result.append('миллиона')
+            else:
+                result.append('миллионов')
+        # Thousands
+        thousands = (num % 1000000) // 1000
+        if thousands > 0:
+            result.append(self._convert_thousands(thousands))
+            result.append(self._thousand_word(thousands))
+        # Hundreds
+        hundreds = num % 1000
+        if hundreds > 0:
+            result.append(self._convert_hundreds(hundreds))
+        return ' '.join(result)

utils/receipt_parser.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""XML Receipt parser for ESFS documents"""
+import xml.etree.ElementTree as ET
+from datetime import datetime
+from typing import List, Dict
+import glob
+class ReceiptParser:
+    """Parser for extracting receipt data from XML files"""
+    # Month names in Russian (genitive case for dates)
+    MONTHS_GENITIVE = {
+        1: 'января', 2: 'февраля', 3: 'марта', 4: 'апреля',
+        5: 'мая', 6: 'июня', 7: 'июля', 8: 'августа',
+        9: 'сентября', 10: 'октября', 11: 'ноября', 12: 'декабря'
+    }
+    def __init__(self, esfs_folder: str = 'esfs'):
+        """
+        Initialize receipt parser
+        Args:
+            esfs_folder: Path to folder containing XML files
+        """
+        self.esfs_folder = esfs_folder
+    def find_xml_files(self) -> List[str]:
+        """
+        Find all XML files in the ESFS folder
+        Returns:
+            List of XML file paths
+        """
+        pattern = f"{self.esfs_folder}/*.xml"
+        return glob.glob(pattern)
+    def parse_receipts(self) -> List[ET.Element]:
+        """
+        Parse all receipts from all XML files in the folder
+        Returns:
+            List of receipt XML elements
+        """
+        xml_files = self.find_xml_files()
+        all_receipts = []
+        for xml_file in xml_files:
+            tree = ET.parse(xml_file)
+            root = tree.getroot()
+            receipts = root.findall('.//receipt')
+            all_receipts.extend(receipts)
+        return all_receipts
+    def extract_receipt_data(self, receipt: ET.Element) -> Dict[str, any]:
+        """
+        Extract data from a single receipt element
+        Args:
+            receipt: XML element containing receipt data
+        Returns:
+            Dictionary with extracted data
+        """
+        # Contract date
+        contract_date_str = receipt.find('deliveryContractDate').text
+        contract_date = datetime.fromisoformat(contract_date_str.replace('+06:00', ''))
+        contract_date_formatted = f"{contract_date.day} {self.MONTHS_GENITIVE[contract_date.month]} {contract_date.year}"
+        # Price
+        price_str = receipt.find('.//goods/good/price').text
+        price_int = int(float(price_str))
+        # Today's date (creation date)
+        today_date_str = receipt.find('createdDate').text
+        today_date = datetime.fromisoformat(today_date_str)
+        today_date_formatted = f"{today_date.day} {self.MONTHS_GENITIVE[today_date.month]} {today_date.year}"
+        return {
+            'contract_date': contract_date_formatted,
+            'price': price_int,
+            'today_date': today_date_formatted
+        }
+    def get_all_receipt_data(self) -> List[Dict[str, any]]:
+        """
+        Get data for all receipts in the folder
+        Returns:
+            List of dictionaries containing receipt data
+        """
+        receipts = self.parse_receipts()
+        return [self.extract_receipt_data(receipt) for receipt in receipts]