"""XML Receipt parser for ESFS documents""" import xml.etree.ElementTree as ET from datetime import datetime from typing import List, Dict import glob class ReceiptParser: """Parser for extracting receipt data from XML files""" # Month names in Russian (genitive case for dates) MONTHS_GENITIVE = { 1: 'января', 2: 'февраля', 3: 'марта', 4: 'апреля', 5: 'мая', 6: 'июня', 7: 'июля', 8: 'августа', 9: 'сентября', 10: 'октября', 11: 'ноября', 12: 'декабря' } def __init__(self, esfs_folder: str = 'esfs'): """ Initialize receipt parser Args: esfs_folder: Path to folder containing XML files """ self.esfs_folder = esfs_folder def find_xml_files(self) -> List[str]: """ Find all XML files in the ESFS folder Returns: List of XML file paths """ pattern = f"{self.esfs_folder}/*.xml" return glob.glob(pattern) def parse_receipts(self) -> List[ET.Element]: """ Parse all receipts from all XML files in the folder Returns: List of receipt XML elements """ xml_files = self.find_xml_files() all_receipts = [] for xml_file in xml_files: tree = ET.parse(xml_file) root = tree.getroot() receipts = root.findall('.//receipt') all_receipts.extend(receipts) return all_receipts def extract_receipt_data(self, receipt: ET.Element) -> Dict[str, any]: """ Extract data from a single receipt element Args: receipt: XML element containing receipt data Returns: Dictionary with extracted data """ # Contract date contract_date_str = receipt.find('deliveryContractDate').text contract_date = datetime.fromisoformat(contract_date_str.replace('+06:00', '')) contract_date_formatted = f"{contract_date.day} {self.MONTHS_GENITIVE[contract_date.month]} {contract_date.year}" # Price price_str = receipt.find('.//goods/good/price').text price_int = int(float(price_str)) # Today's date (creation date) today_date_str = receipt.find('createdDate').text today_date = datetime.fromisoformat(today_date_str) today_date_formatted = f"{today_date.day} {self.MONTHS_GENITIVE[today_date.month]} {today_date.year}" return { 'contract_date': contract_date_formatted, 'price': price_int, 'today_date': today_date_formatted } def get_all_receipt_data(self) -> List[Dict[str, any]]: """ Get data for all receipts in the folder Returns: List of dictionaries containing receipt data """ receipts = self.parse_receipts() return [self.extract_receipt_data(receipt) for receipt in receipts]