pharmaspine-backend / services /chunking-service /alembic /versions /20260521_0002_chunk_claim_foundation.py
ashish1265659565's picture
Upload folder using huggingface_hub
08fd094 verified
Raw
History Blame Contribute Delete
9.01 kB
"""chunk and claim foundation
Revision ID: 20260521_0002
Revises:
Create Date: 2026-05-21 13:40:00
"""
from alembic import op
import sqlalchemy as sa
revision = "20260521_0002"
down_revision = None
branch_labels = None
depends_on = None
claim_type_enum = sa.Enum(
"efficacy",
"safety",
"dose",
"biomarker",
"monitoring",
"interaction",
"line_of_therapy",
"comparison_boundary",
name="claimtype",
)
strength_band_enum = sa.Enum("High", "Medium", "Low", name="strengthband")
support_type_enum = sa.Enum("primary", "supporting", "conflicting", name="supporttype")
graph_relation_type_enum = sa.Enum(
"TREATS",
"EVIDENCE_FROM",
"HAS_ENDPOINT",
"HAS_RISK",
"APPROVED_IN_REGION",
"APPLIES_TO_POPULATION",
"SUPPORTED_BY",
"CONTRADICTED_BY",
"REPLACES_VERSION",
"EXTRACTED_FROM",
"VISIBLE_TO_AUDIENCE",
name="graphrelationtype",
)
def upgrade() -> None:
bind = op.get_bind()
claim_type_enum.create(bind, checkfirst=True)
strength_band_enum.create(bind, checkfirst=True)
support_type_enum.create(bind, checkfirst=True)
graph_relation_type_enum.create(bind, checkfirst=True)
op.create_table(
"chunks",
sa.Column("chunk_id", sa.String(length=36), primary_key=True),
sa.Column("source_id", sa.String(length=128), sa.ForeignKey("sources.source_id", ondelete="CASCADE"), nullable=False),
sa.Column("version_id", sa.String(length=36), sa.ForeignKey("source_versions.version_id", ondelete="CASCADE"), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("claim_type", claim_type_enum, nullable=False),
sa.Column("section_path", sa.String(length=512), nullable=False),
sa.Column("page_start", sa.Integer(), nullable=False),
sa.Column("page_end", sa.Integer(), nullable=False),
sa.Column("token_count", sa.Integer(), nullable=False),
sa.Column("audience_fit", sa.JSON(), nullable=False),
sa.Column("geography_fit", sa.String(length=128), nullable=False),
sa.Column("therapy_area", sa.String(length=128), nullable=False),
sa.Column("embedding_id", sa.String(length=128), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_table(
"claims",
sa.Column("claim_id", sa.String(length=36), primary_key=True),
sa.Column("canonical_text", sa.Text(), nullable=False),
sa.Column("claim_type", claim_type_enum, nullable=False),
sa.Column("disease_id", sa.String(length=64), nullable=True),
sa.Column("molecule_id", sa.String(length=64), nullable=True),
sa.Column("population_id", sa.String(length=64), nullable=True),
sa.Column("geography_id", sa.String(length=64), nullable=True),
sa.Column("approval_state", sa.String(length=32), nullable=False),
sa.Column("sme_validated", sa.Boolean(), nullable=False, server_default=sa.text("false")),
sa.Column("primary_source_id", sa.String(length=128), sa.ForeignKey("sources.source_id", ondelete="SET NULL"), nullable=True),
sa.Column("current_evidence_score", sa.Float(), nullable=True),
sa.Column("strength_band", strength_band_enum, nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_table(
"claim_evidence_links",
sa.Column("claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), primary_key=True),
sa.Column("chunk_id", sa.String(length=36), sa.ForeignKey("chunks.chunk_id", ondelete="CASCADE"), primary_key=True),
sa.Column("source_id", sa.String(length=128), sa.ForeignKey("sources.source_id", ondelete="CASCADE"), nullable=False),
sa.Column("support_type", support_type_enum, nullable=False),
sa.Column("extraction_confidence", sa.Float(), nullable=False),
sa.Column("is_primary_support", sa.Boolean(), nullable=False, server_default=sa.text("false")),
)
op.create_table(
"evidence_assessments",
sa.Column("assessment_id", sa.String(length=36), primary_key=True),
sa.Column("claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), nullable=False),
sa.Column("source_prior_score", sa.Numeric(5, 4), nullable=False),
sa.Column("recency_score", sa.Numeric(5, 4), nullable=False),
sa.Column("approval_score", sa.Numeric(5, 4), nullable=False),
sa.Column("sme_score", sa.Numeric(5, 4), nullable=False),
sa.Column("consistency_score", sa.Numeric(5, 4), nullable=False),
sa.Column("audience_fit_score", sa.Numeric(5, 4), nullable=False),
sa.Column("geography_fit_score", sa.Numeric(5, 4), nullable=False),
sa.Column("penalty_score", sa.Numeric(5, 4), nullable=False),
sa.Column("evidence_score", sa.Numeric(5, 4), nullable=False),
sa.Column("strength_band", strength_band_enum, nullable=False),
sa.Column("explanation_json", sa.JSON(), nullable=False),
sa.Column("scored_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_table(
"claim_relationships",
sa.Column("relationship_id", sa.String(length=36), primary_key=True),
sa.Column("from_claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), nullable=False),
sa.Column("to_claim_id", sa.String(length=36), sa.ForeignKey("claims.claim_id", ondelete="CASCADE"), nullable=False),
sa.Column("relation_type", graph_relation_type_enum, nullable=False),
sa.Column("relation_metadata", sa.JSON(), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_index("ix_chunks_source_id", "chunks", ["source_id"])
op.create_index("ix_chunks_version_id", "chunks", ["version_id"])
op.create_index("ix_chunks_claim_type", "chunks", ["claim_type"])
op.create_index("ix_chunks_therapy_area", "chunks", ["therapy_area"])
op.create_index("ix_chunks_geography_fit", "chunks", ["geography_fit"])
op.create_index("ix_claims_claim_type", "claims", ["claim_type"])
op.create_index("ix_claims_molecule_id", "claims", ["molecule_id"])
op.create_index("ix_claims_geography_id", "claims", ["geography_id"])
op.create_index("ix_claims_current_evidence_score", "claims", ["current_evidence_score"])
op.create_index("ix_claim_evidence_links_source_id", "claim_evidence_links", ["source_id"])
op.create_index("ix_claim_evidence_links_support_type", "claim_evidence_links", ["support_type"])
op.create_index("ix_evidence_assessments_claim_id", "evidence_assessments", ["claim_id"])
op.create_index("ix_evidence_assessments_evidence_score", "evidence_assessments", ["evidence_score"])
op.create_index("ix_evidence_assessments_strength_band", "evidence_assessments", ["strength_band"])
op.create_index("ix_claim_relationships_from_claim_id", "claim_relationships", ["from_claim_id"])
op.create_index("ix_claim_relationships_to_claim_id", "claim_relationships", ["to_claim_id"])
op.create_index("ix_claim_relationships_relation_type", "claim_relationships", ["relation_type"])
def downgrade() -> None:
op.drop_index("ix_claim_relationships_relation_type", table_name="claim_relationships")
op.drop_index("ix_claim_relationships_to_claim_id", table_name="claim_relationships")
op.drop_index("ix_claim_relationships_from_claim_id", table_name="claim_relationships")
op.drop_index("ix_evidence_assessments_strength_band", table_name="evidence_assessments")
op.drop_index("ix_evidence_assessments_evidence_score", table_name="evidence_assessments")
op.drop_index("ix_evidence_assessments_claim_id", table_name="evidence_assessments")
op.drop_index("ix_claim_evidence_links_support_type", table_name="claim_evidence_links")
op.drop_index("ix_claim_evidence_links_source_id", table_name="claim_evidence_links")
op.drop_index("ix_claims_current_evidence_score", table_name="claims")
op.drop_index("ix_claims_geography_id", table_name="claims")
op.drop_index("ix_claims_molecule_id", table_name="claims")
op.drop_index("ix_claims_claim_type", table_name="claims")
op.drop_index("ix_chunks_geography_fit", table_name="chunks")
op.drop_index("ix_chunks_therapy_area", table_name="chunks")
op.drop_index("ix_chunks_claim_type", table_name="chunks")
op.drop_index("ix_chunks_version_id", table_name="chunks")
op.drop_index("ix_chunks_source_id", table_name="chunks")
op.drop_table("claim_relationships")
op.drop_table("evidence_assessments")
op.drop_table("claim_evidence_links")
op.drop_table("claims")
op.drop_table("chunks")
bind = op.get_bind()
graph_relation_type_enum.drop(bind, checkfirst=True)
support_type_enum.drop(bind, checkfirst=True)
strength_band_enum.drop(bind, checkfirst=True)
claim_type_enum.drop(bind, checkfirst=True)