"""Document processor for filling DOCX templates with data""" from docx import Document from typing import Dict class DocumentProcessor: """Process DOCX templates and fill them with data""" def __init__(self, template_path: str, user_name: str): """ Initialize document processor Args: template_path: Path to DOCX template file user_name: User name to fill in the document """ self.template_path = template_path self.user_name_original = user_name self.user_name_caps = user_name.upper() def _split_run_with_tag(self, paragraph, run, tag: str, replacement: str, make_bold: bool = True): """ Split a run containing a tag into three parts to preserve formatting Args: paragraph: DOCX paragraph object run: DOCX run object tag: Tag to find and replace replacement: Text to replace the tag with make_bold: Whether to make the replacement text bold """ if tag not in run.text: return text = run.text tag_pos = text.find(tag) before_text = text[:tag_pos] after_text = text[tag_pos + len(tag):] # Save original formatting font_size = run.font.size font_name = run.font.name original_bold = run.bold run_index = paragraph._element.index(run._element) # Update current run to "before" text run.text = before_text run.bold = original_bold if font_size: run.font.size = font_size if font_name: run.font.name = font_name # Add replacement text as new run new_run = paragraph.add_run(replacement) new_run.bold = make_bold if font_size: new_run.font.size = font_size if font_name: new_run.font.name = font_name paragraph._element.insert(run_index + 1, new_run._element) # Add "after" text as new run with original formatting after_run = paragraph.add_run(after_text) after_run.bold = original_bold if font_size: after_run.font.size = font_size if font_name: after_run.font.name = font_name paragraph._element.insert(run_index + 2, after_run._element) def _process_paragraph(self, paragraph, data: Dict[str, any]): """ Process a single paragraph and replace all tags Args: paragraph: DOCX paragraph object data: Dictionary containing data to fill (contract_date, price, price_words, today_date) """ tags_to_check = ['', '', '', '', '', ''] if not any(tag in paragraph.text for tag in tags_to_check): return # Process runs - snapshot because we'll be adding runs runs_snapshot = list(paragraph.runs) for run in runs_snapshot: # Process tag if '' in run.text: self._split_run_with_tag(paragraph, run, '', self.user_name_caps, make_bold=True) # Process tag (with last word on new line) elif '' in run.text: name_parts = self.user_name_original.rsplit(' ', 1) if len(name_parts) == 2: name_with_newline = name_parts[0] + '\n' + name_parts[1] else: name_with_newline = self.user_name_original self._split_run_with_tag(paragraph, run, '', name_with_newline, make_bold=True) # Process tag elif '' in run.text: self._split_run_with_tag(paragraph, run, '', data['contract_date'], make_bold=True) # Process tag elif '' in run.text: self._split_run_with_tag(paragraph, run, '', str(data['price']), make_bold=True) # Process tag (NOT bold) elif '' in run.text: self._split_run_with_tag(paragraph, run, '', data['price_words'], make_bold=False) # Process tag elif '' in run.text: self._split_run_with_tag(paragraph, run, '', data['today_date'], make_bold=True) def process_document(self, data: Dict[str, any]) -> Document: """ Process template document with given data Args: data: Dictionary containing: - contract_date: Contract date string - price: Price as integer - price_words: Price in words - today_date: Today's date string Returns: Processed DOCX document """ # Load template doc = Document(self.template_path) # Process all paragraphs for para in doc.paragraphs: self._process_paragraph(para, data) # Process tables for table in doc.tables: for row in table.rows: for cell in row.cells: for para in cell.paragraphs: self._process_paragraph(para, data) return doc