pharmaspine-backend / services /chunking-service /alembic /versions /20260521_0002_chunk_claim_foundation.py
| """chunk and claim foundation | |
| Revision ID: 20260521_0002 | |
| Revises: | |
| Create Date: 2026-05-21 13:40:00 | |
| """ | |
| from alembic import op | |
| import sqlalchemy as sa | |
| revision = "20260521_0002" | |
| down_revision = None | |
| branch_labels = None | |
| depends_on = None | |
| claim_type_enum = sa.Enum( | |
| "efficacy", | |
| "safety", | |
| "dose", | |
| "biomarker", | |
| "monitoring", | |
| "interaction", | |
| "line_of_therapy", | |
| "comparison_boundary", | |
| name="claimtype", | |
| ) | |
| strength_band_enum = sa.Enum("High", "Medium", "Low", name="strengthband") | |
| support_type_enum = sa.Enum("primary", "supporting", "conflicting", name="supporttype") | |
| graph_relation_type_enum = sa.Enum( | |
| "TREATS", | |
| "EVIDENCE_FROM", | |
| "HAS_ENDPOINT", | |
| "HAS_RISK", | |
| "APPROVED_IN_REGION", | |
| "APPLIES_TO_POPULATION", | |
| "SUPPORTED_BY", | |
| "CONTRADICTED_BY", | |
| "REPLACES_VERSION", | |
| "EXTRACTED_FROM", | |
| "VISIBLE_TO_AUDIENCE", | |
| name="graphrelationtype", | |
| ) | |
| def upgrade() -> None: | |
| bind = op.get_bind() | |
| claim_type_enum.create(bind, checkfirst=True) | |
| strength_band_enum.create(bind, checkfirst=True) | |
| support_type_enum.create(bind, checkfirst=True) | |
| graph_relation_type_enum.create(bind, checkfirst=True) | |
| op.create_table( | |
| "chunks", | |
| sa.Column("chunk_id", sa.String(length=36), primary_key=True), | |
| sa.Column("source_id", sa.String(length=128), sa.ForeignKey("sources.source_id", ondelete="CASCADE"), nullable=False), | |
| sa.Column("version_id", sa.String(length=36), sa.ForeignKey("source_versions.version_id", ondelete="CASCADE"), nullable=False), | |
| sa.Column("text", sa.Text(), nullable=False), | |
| sa.Column("claim_type", claim_type_enum, nullable=False), | |
| sa.Column("section_path", sa.String(length=512), nullable=False), | |
| sa.Column("page_start", sa.Integer(), nullable=False), | |
| sa.Column("page_end", sa.Integer(), nullable=False), | |
| sa.Column("token_count", sa.Integer(), nullable=False), | |
| sa.Column("audience_fit", sa.JSON(), nullable=False), | |
| sa.Column("geography_fit", sa.String(length=128), nullable=False), | |
| sa.Column("therapy_area", sa.String(length=128), nullable=False), | |
| sa.Column("embedding_id", sa.String(length=128), nullable=True), | |
| sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), | |
| ) | |
| op.create_table( | |
| "claims", | |
| sa.Column("claim_id", sa.String(length=36), primary_key=True), | |
| sa.Column("canonical_text", sa.Text(), nullable=False), | |
| sa.Column("claim_type", claim_type_enum, nullable=False), | |
| sa.Column("disease_id", sa.String(length=64), nullable=True), | |
| sa.Column("molecule_id", sa.String(length=64), nullable=True), | |
| sa.Column("population_id", sa.String(length=64), nullable=True), | |
| sa.Column("geography_id", sa.String(length=64), nullable=True), | |
| sa.Column("approval_state", sa.String(length=32), nullable=False), | |
| sa.Column("sme_validated", sa.Boolean(), nullable=False, server_default=sa.text("false")), | |
| sa.Column("primary_source_id", sa.String(length=128), sa.ForeignKey("sources.source_id", ondelete="SET NULL"), nullable=True), | |
| sa.Column("current_evidence_score", sa.Float(), nullable=True), | |
| sa.Column("strength_band", strength_band_enum, nullable=True), | |
| sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), | |
| ) | |
| op.create_table( | |
| "claim_evidence_links", | |
| sa.Column("claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), primary_key=True), | |
| sa.Column("chunk_id", sa.String(length=36), sa.ForeignKey("chunks.chunk_id", ondelete="CASCADE"), primary_key=True), | |
| sa.Column("source_id", sa.String(length=128), sa.ForeignKey("sources.source_id", ondelete="CASCADE"), nullable=False), | |
| sa.Column("support_type", support_type_enum, nullable=False), | |
| sa.Column("extraction_confidence", sa.Float(), nullable=False), | |
| sa.Column("is_primary_support", sa.Boolean(), nullable=False, server_default=sa.text("false")), | |
| ) | |
| op.create_table( | |
| "evidence_assessments", | |
| sa.Column("assessment_id", sa.String(length=36), primary_key=True), | |
| sa.Column("claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), nullable=False), | |
| sa.Column("source_prior_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("recency_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("approval_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("sme_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("consistency_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("audience_fit_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("geography_fit_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("penalty_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("evidence_score", sa.Numeric(5, 4), nullable=False), | |
| sa.Column("strength_band", strength_band_enum, nullable=False), | |
| sa.Column("explanation_json", sa.JSON(), nullable=False), | |
| sa.Column("scored_at", sa.DateTime(timezone=True), nullable=False), | |
| ) | |
| op.create_table( | |
| "claim_relationships", | |
| sa.Column("relationship_id", sa.String(length=36), primary_key=True), | |
| sa.Column("from_claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), nullable=False), | |
| sa.Column("to_claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), nullable=False), | |
| sa.Column("relation_type", graph_relation_type_enum, nullable=False), | |
| sa.Column("relation_metadata", sa.JSON(), nullable=False), | |
| sa.Column("created_at", sa.DateTime(timezone=True), nullable=False), | |
| ) | |
| op.create_index("ix_chunks_source_id", "chunks", ["source_id"]) | |
| op.create_index("ix_chunks_version_id", "chunks", ["version_id"]) | |
| op.create_index("ix_chunks_claim_type", "chunks", ["claim_type"]) | |
| op.create_index("ix_chunks_therapy_area", "chunks", ["therapy_area"]) | |
| op.create_index("ix_chunks_geography_fit", "chunks", ["geography_fit"]) | |
| op.create_index("ix_claims_claim_type", "claims", ["claim_type"]) | |
| op.create_index("ix_claims_molecule_id", "claims", ["molecule_id"]) | |
| op.create_index("ix_claims_geography_id", "claims", ["geography_id"]) | |
| op.create_index("ix_claims_current_evidence_score", "claims", ["current_evidence_score"]) | |
| op.create_index("ix_claim_evidence_links_source_id", "claim_evidence_links", ["source_id"]) | |
| op.create_index("ix_claim_evidence_links_support_type", "claim_evidence_links", ["support_type"]) | |
| op.create_index("ix_evidence_assessments_claim_id", "evidence_assessments", ["claim_id"]) | |
| op.create_index("ix_evidence_assessments_evidence_score", "evidence_assessments", ["evidence_score"]) | |
| op.create_index("ix_evidence_assessments_strength_band", "evidence_assessments", ["strength_band"]) | |
| op.create_index("ix_claim_relationships_from_claim_id", "claim_relationships", ["from_claim_id"]) | |
| op.create_index("ix_claim_relationships_to_claim_id", "claim_relationships", ["to_claim_id"]) | |
| op.create_index("ix_claim_relationships_relation_type", "claim_relationships", ["relation_type"]) | |
| def downgrade() -> None: | |
| op.drop_index("ix_claim_relationships_relation_type", table_name="claim_relationships") | |
| op.drop_index("ix_claim_relationships_to_claim_id", table_name="claim_relationships") | |
| op.drop_index("ix_claim_relationships_from_claim_id", table_name="claim_relationships") | |
| op.drop_index("ix_evidence_assessments_strength_band", table_name="evidence_assessments") | |
| op.drop_index("ix_evidence_assessments_evidence_score", table_name="evidence_assessments") | |
| op.drop_index("ix_evidence_assessments_claim_id", table_name="evidence_assessments") | |
| op.drop_index("ix_claim_evidence_links_support_type", table_name="claim_evidence_links") | |
| op.drop_index("ix_claim_evidence_links_source_id", table_name="claim_evidence_links") | |
| op.drop_index("ix_claims_current_evidence_score", table_name="claims") | |
| op.drop_index("ix_claims_geography_id", table_name="claims") | |
| op.drop_index("ix_claims_molecule_id", table_name="claims") | |
| op.drop_index("ix_claims_claim_type", table_name="claims") | |
| op.drop_index("ix_chunks_geography_fit", table_name="chunks") | |
| op.drop_index("ix_chunks_therapy_area", table_name="chunks") | |
| op.drop_index("ix_chunks_claim_type", table_name="chunks") | |
| op.drop_index("ix_chunks_version_id", table_name="chunks") | |
| op.drop_index("ix_chunks_source_id", table_name="chunks") | |
| op.drop_table("claim_relationships") | |
| op.drop_table("evidence_assessments") | |
| op.drop_table("claim_evidence_links") | |
| op.drop_table("claims") | |
| op.drop_table("chunks") | |
| bind = op.get_bind() | |
| graph_relation_type_enum.drop(bind, checkfirst=True) | |
| support_type_enum.drop(bind, checkfirst=True) | |
| strength_band_enum.drop(bind, checkfirst=True) | |
| claim_type_enum.drop(bind, checkfirst=True) | |