-- Database Schema for Lung Cancer Treatment Recommendation System (Version 1) -- Enable the pgvector extension CREATE EXTENSION IF NOT EXISTS vector; CREATE TABLE sources ( id SERIAL PRIMARY KEY, name VARCHAR(255), type VARCHAR(50), -- "fda_label", "guideline", "research_paper" disease VARCHAR(50), -- "nsclc" publication_date DATE, version VARCHAR(20), content_raw TEXT -- Full raw text ); CREATE TABLE chunks ( id SERIAL PRIMARY KEY, source_id INT REFERENCES sources(id), chunk_text TEXT, chunk_index INT, -- Position in document token_count INT, created_at TIMESTAMP DEFAULT NOW() ); CREATE TABLE embeddings ( chunk_id INT REFERENCES chunks(id), embedding vector(384), created_at TIMESTAMP DEFAULT NOW() ); CREATE TABLE entities ( id SERIAL PRIMARY KEY, name VARCHAR(255), entity_type VARCHAR(50), -- "drug", "disease", "symptom", "dosage" source_id INT REFERENCES sources(id), properties JSONB -- e.g., {"dosage": "500mg", "route": "IV"} ); CREATE TABLE relationships ( id SERIAL PRIMARY KEY, source_entity_id INT REFERENCES entities(id), target_entity_id INT REFERENCES entities(id), relationship_type VARCHAR(100), -- "treats", "causes", "contraindicated_with" confidence FLOAT, -- 0.0-1.0 source_id INT REFERENCES sources(id), properties JSONB ); CREATE TABLE search_cache ( id SERIAL PRIMARY KEY, query_hash VARCHAR(256), results JSONB, created_at TIMESTAMP DEFAULT NOW() ); -- Indexes for performance CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops); CREATE INDEX ON chunks (source_id); CREATE INDEX ON entities (entity_type);