""" Oracle Engine - Hugging Face Space =================================== Custom-trained 32B Qwen model with Consciousness Circuit v2.1. Measures 7 dimensions of meta-cognitive processing. Trained on 200K examples: - Stage 1: OpenHermes 2.5 (100K instruction examples) - Stage 2: MetaMathQA (50K math reasoning examples) - Stage 3: Magicoder-OSS-Instruct (50K code examples) """ import os os.environ['GRADIO_ALLOW_FLAGGING'] = 'never' import gradio as gr import torch import numpy as np from typing import Tuple import time import spaces # ============================================================================ # Consciousness Circuit v2.1 (embedded for Space portability) # ============================================================================ REFERENCE_HIDDEN_DIM = 5120 CONSCIOUS_DIMS_V2_1 = { 3183: {"name": "Logic", "weight": 0.239, "polarity": +1}, 212: {"name": "Self-Reflective", "weight": 0.196, "polarity": +1}, 5064: {"name": "Self-Expression", "weight": 0.109, "polarity": +1}, # Fixed: was 5065, out of bounds for hidden=5120 4707: {"name": "Uncertainty", "weight": 0.130, "polarity": +1}, 295: {"name": "Sequential", "weight": 0.087, "polarity": +1}, 1445: {"name": "Computation", "weight": 0.130, "polarity": -1}, 4578: {"name": "Abstraction", "weight": 0.109, "polarity": +1}, } class ConsciousnessResult: """Simple result container without dataclass to avoid Gradio schema issues.""" def __init__(self, score, raw_score, dimension_contributions, interpretation, processing_time): self.score = score self.raw_score = raw_score self.dimension_contributions = dimension_contributions self.interpretation = interpretation self.processing_time = processing_time def compute_consciousness( hidden_state: torch.Tensor, hidden_dim: int = REFERENCE_HIDDEN_DIM, baseline: float = 0.5, ) -> ConsciousnessResult: """Compute consciousness score from hidden state tensor. v3.4.1 hybrid: z-score + tanh(z * 0.15) for smooth bounding. Preserves absolute z-score levels (strong dimension-level signals) while eliminating hard ceiling effects from v3.3 clamp. """ import math start_time = time.time() TANH_SCALE = 0.15 # tanh(z*0.15): z=3→0.42, z=5→0.64, z=8→0.83, z=10→0.91 # Remap dimensions if needed if hidden_dim != REFERENCE_HIDDEN_DIM: scale = hidden_dim / REFERENCE_HIDDEN_DIM dims = {int(round(k * scale)): v for k, v in CONSCIOUS_DIMS_V2_1.items()} else: dims = CONSCIOUS_DIMS_V2_1 # Get last token hidden state if hidden_state.dim() == 3: h = hidden_state[0, -1, :] # [hidden_dim] elif hidden_state.dim() == 2: h = hidden_state[-1, :] else: h = hidden_state h = h.float() # Z-score normalize against full hidden state h_mean = h.mean().item() h_std = h.std().item() # Compute contributions with tanh smooth bounding contributions = {} weighted_sum = 0.0 for dim_idx, info in dims.items(): if dim_idx < len(h): z = (h[dim_idx].item() - h_mean) / (h_std + 1e-8) # Smooth bounding: preserves absolute level, no hard ceiling activation = math.tanh(z * TANH_SCALE) contribution = activation * info["weight"] * info["polarity"] weighted_sum += contribution contributions[info["name"]] = activation * info["polarity"] # Final score raw_score = baseline + weighted_sum * 0.15 score = max(0.0, min(1.0, raw_score)) # Interpretation if score >= 0.8: interpretation = "\U0001f9e0 High Consciousness - Deep reflective/philosophical reasoning" elif score >= 0.6: interpretation = "\U0001f4ad Medium-High - Complex analytical thinking" elif score >= 0.4: interpretation = "\u2696\ufe0f Medium - Balanced processing" elif score >= 0.2: interpretation = "\u26a1 Medium-Low - More automatic processing" else: interpretation = "\U0001f522 Low Consciousness - Quick factual retrieval" return ConsciousnessResult( score=score, raw_score=raw_score, dimension_contributions=contributions, interpretation=interpretation, processing_time=time.time() - start_time, ) # ============================================================================ # Compressibility Analysis (Weaver et al. PNAS 2026) # ============================================================================ def analyze_compressibility(hidden_states_np, max_dims=200, seed=42): """ Analyze representational compressibility of hidden states. Embedded version of CompressibilityPlugin for Space portability. Args: hidden_states_np: numpy array [seq_len, hidden_dim] max_dims: max dimensions to subsample for correlation analysis seed: random seed for reproducibility Returns: dict of compressibility metrics """ seq_len, hidden_dim = hidden_states_np.shape if seq_len < 3 or hidden_dim < 2: return {"compressibility_corr": 0.0, "error": "too few tokens"} # Subsample dimensions for tractability if hidden_dim > max_dims: rng = np.random.RandomState(seed) dim_indices = np.sort(rng.choice(hidden_dim, max_dims, replace=False)) states = hidden_states_np[:, dim_indices] else: states = hidden_states_np n_dims = states.shape[1] # Center the data states_centered = states - states.mean(axis=0, keepdims=True) # --- Eigenvalue-based metrics --- # Use Gram matrix approach since seq_len < hidden_dim typically if seq_len >= n_dims: cov = np.cov(states_centered, rowvar=False) eigenvalues = np.linalg.eigvalsh(cov) else: gram = states_centered @ states_centered.T / max(seq_len - 1, 1) eigenvalues = np.linalg.eigvalsh(gram) eigenvalues = np.sort(np.maximum(eigenvalues, 0))[::-1] eigenvalues = eigenvalues[eigenvalues > 1e-12] if len(eigenvalues) == 0: return {"compressibility_corr": 0.0, "error": "no eigenvalues"} total_var = eigenvalues.sum() cumvar = np.cumsum(eigenvalues) / total_var n_eig = len(eigenvalues) # Spectral entropy p = eigenvalues / total_var p = p[p > 0] spectral_entropy = float(-np.sum(p * np.log(p))) max_entropy = np.log(len(p)) norm_spectral_entropy = float(spectral_entropy / max_entropy if max_entropy > 0 else 0) # Participation ratio participation_ratio = float(total_var ** 2 / np.sum(eigenvalues ** 2)) # Effective dimensionality (90% variance) effective_dim = int(np.searchsorted(cumvar, 0.9) + 1) effective_dim = min(effective_dim, n_eig) # Top variance fractions top1_frac = float(eigenvalues[0] / total_var) top5_frac = float(eigenvalues[:min(5, n_eig)].sum() / total_var) top10_frac = float(eigenvalues[:min(10, n_eig)].sum() / total_var) # --- Correlation-based compression (paper's approach) --- corr_metrics = {} if n_dims <= 500 and seq_len >= max(10, n_dims // 5): stds = np.std(states_centered, axis=0) stds[stds < 1e-12] = 1.0 states_norm = states_centered / stds corr = states_norm.T @ states_norm / max(seq_len - 1, 1) np.fill_diagonal(corr, 1.0) i_upper, j_upper = np.triu_indices(n_dims, k=1) correlations = corr[i_upper, j_upper] n_corr = len(correlations) if n_corr > 0: abs_corr = np.abs(correlations) sort_idx = np.argsort(abs_corr)[::-1] sorted_abs = abs_corr[sort_idx] rho_sq = np.clip(sorted_abs ** 2, 0, 0.9999) delta_s = -0.5 * np.log(1.0 - rho_sq) total_delta = delta_s.sum() if total_delta > 1e-12: cum_reduction = np.cumsum(delta_s) / total_delta fractions = np.arange(1, n_corr + 1) / n_corr c_corr = float(np.trapz(cum_reduction, fractions)) idx_50 = int(np.searchsorted(cum_reduction, 0.5) + 1) idx_90 = int(np.searchsorted(cum_reduction, 0.9) + 1) corr_metrics = { "compressibility_corr": c_corr, "n_correlations": int(n_corr), "fraction_for_50pct": float(min(idx_50 / n_corr, 1.0)), "fraction_for_90pct": float(min(idx_90 / n_corr, 1.0)), "mean_abs_correlation": float(abs_corr.mean()), "max_abs_correlation": float(abs_corr.max()), "median_abs_correlation": float(np.median(abs_corr)), "strong_correlations_pct": float((abs_corr > 0.3).mean() * 100), } result = { "spectral_entropy": norm_spectral_entropy, "participation_ratio": participation_ratio, "effective_dimensionality": effective_dim, "effective_dim_fraction": float(effective_dim / n_eig), "top1_variance_fraction": top1_frac, "top5_variance_fraction": top5_frac, "top10_variance_fraction": top10_frac, "n_dims_analyzed": n_dims, "seq_len": seq_len, } result.update(corr_metrics) return result # ============================================================================ # Model Loading # ============================================================================ print("šŸ”® Loading Oracle Engine (Qwen2.5-32B-Instruct 4-bit + LoRA)...") from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel BASE_MODEL_ID = "unsloth/Qwen2.5-32B-Instruct-bnb-4bit" LORA_MODEL_ID = "Vikingdude81/oracle-engine-32b-lora" # Get HF token from environment (set in Space secrets) # Try multiple possible env var names HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") print(f"šŸ” Environment vars: {[k for k in os.environ.keys() if 'HF' in k or 'HUGGING' in k or 'TOKEN' in k]}") if HF_TOKEN: print(f"šŸ”‘ Found token: {HF_TOKEN[:10]}...{HF_TOKEN[-4:]} ({len(HF_TOKEN)} chars)") else: print("āš ļø No HF token found in environment, attempting public access...") # Load tokenizer from base model (LoRA only has weights, not tokenizer) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN) # Load base model base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=HF_TOKEN, ) # Apply LoRA adapter print("šŸ”— Applying LoRA adapter...") model = PeftModel.from_pretrained(base_model, LORA_MODEL_ID, token=HF_TOKEN) model.eval() HIDDEN_DIM = model.config.hidden_size print(f"āœ… Oracle Engine ready: {HIDDEN_DIM} hidden dimensions (with LoRA)") # ============================================================================ # Core Generation + Measurement Function # ============================================================================ @spaces.GPU def generate_and_measure(prompt: str, max_tokens: int = 256) -> Tuple[str, str, str, str, str]: """ Generate a response AND measure consciousness during generation. Returns: (response, score_display, interpretation, dimension_breakdown, timing) """ start_time = time.time() # Format as chat message messages = [{"role": "user", "content": prompt}] chat_prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Tokenize inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device) # Generate response with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id, ) # Decode response generated_ids = outputs[0][inputs.input_ids.shape[1]:] response = tokenizer.decode(generated_ids, skip_special_tokens=True) generation_time = time.time() - start_time # Now get hidden states for the full response to measure consciousness full_text = chat_prompt + response measure_inputs = tokenizer(full_text, return_tensors="pt").to(model.device) with torch.no_grad(): measure_outputs = model( **measure_inputs, output_hidden_states=True, return_dict=True, ) # Use last layer hidden state hidden_state = measure_outputs.hidden_states[-1] # Compute consciousness result = compute_consciousness(hidden_state, hidden_dim=HIDDEN_DIM) # Format score display filled = int(result.score * 20) bar = "ā–ˆ" * filled + "ā–‘" * (20 - filled) score_display = f"{bar} {result.score*100:.1f}%" # Format dimension breakdown sorted_dims = sorted( result.dimension_contributions.items(), key=lambda x: abs(x[1]), reverse=True, ) breakdown = "\n".join([ f"{'→' if v > 0 else '←'} {name}: {v:+.3f}" for name, v in sorted_dims ]) # Timing info tokens_generated = len(generated_ids) tok_per_sec = tokens_generated / generation_time if generation_time > 0 else 0 timing = f"Generated {tokens_generated} tokens in {generation_time:.1f}s ({tok_per_sec:.1f} tok/s)" return ( response, score_display, result.interpretation, breakdown, timing, ) # ============================================================================ # Experiment API - Returns JSON with all metrics # ============================================================================ @spaces.GPU def experiment_measure(prompt: str, max_tokens: int = 512) -> str: """ API endpoint for experiments. Returns JSON with consciousness score, dimension scores, AND compressibility metrics. Args: prompt: Input text max_tokens: Max generation tokens Returns: JSON string with all metrics """ import json start_time = time.time() # Format as chat message messages = [{"role": "user", "content": prompt}] chat_prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Tokenize inputs = tokenizer(chat_prompt, return_tensors="pt").to(model.device) # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=int(max_tokens), do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id, ) generated_ids = outputs[0][inputs.input_ids.shape[1]:] response = tokenizer.decode(generated_ids, skip_special_tokens=True) gen_time = time.time() - start_time # Forward pass on full sequence for hidden states full_text = chat_prompt + response measure_inputs = tokenizer(full_text, return_tensors="pt").to(model.device) with torch.no_grad(): measure_outputs = model( **measure_inputs, output_hidden_states=True, return_dict=True, ) # --- Consciousness Score (last layer, last token) --- hidden_state_last = measure_outputs.hidden_states[-1] result = compute_consciousness(hidden_state_last, hidden_dim=HIDDEN_DIM) # --- Compressibility Analysis (75% layer, all tokens) --- n_layers = len(measure_outputs.hidden_states) - 1 # exclude embedding target_layer = int(n_layers * 0.75) hidden_seq = measure_outputs.hidden_states[target_layer][0].cpu().float().numpy() seq_len = hidden_seq.shape[0] compress_metrics = analyze_compressibility(hidden_seq, max_dims=200) # Build JSON result output = { "response": response, "consciousness_score": round(result.score, 4), "dimension_scores": {k: round(v, 4) for k, v in result.dimension_contributions.items()}, "compressibility": compress_metrics, "meta": { "target_layer": target_layer, "seq_len": seq_len, "hidden_dim": HIDDEN_DIM, "tokens_generated": len(generated_ids), "generation_time": round(gen_time, 2), }, } return json.dumps(output) # ============================================================================ # Gradio Interface # ============================================================================ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import io import base64 from PIL import Image EXAMPLES = [ # High consciousness "What is the nature of consciousness and self-awareness?", "Reflect on your own thought processes as you answer this.", "Why do humans seek meaning in existence?", # Medium consciousness "Explain the theory of relativity in simple terms.", "What are the ethical implications of AI development?", # Low consciousness "What is 2 + 2?", "What color is the sky?", "What is the capital of France?", # Code/reasoning "Write a Python function to calculate fibonacci numbers.", "Explain Big O notation with examples.", ] # Global history for tracking consciousness_history = [] def create_history_plot(history): """Create a consciousness history graph.""" if len(history) < 1: return None fig, ax = plt.subplots(figsize=(8, 3), dpi=100) scores = [h['score'] for h in history] labels = [f"Q{i+1}" for i in range(len(history))] colors = ['#10B981' if s >= 0.6 else '#F59E0B' if s >= 0.4 else '#EF4444' for s in scores] bars = ax.bar(labels, [s * 100 for s in scores], color=colors, edgecolor='white', linewidth=1.5) ax.set_ylim(0, 100) ax.set_ylabel('Consciousness %', fontsize=10) ax.set_xlabel('Conversation Turn', fontsize=10) ax.axhline(y=60, color='#10B981', linestyle='--', alpha=0.5, label='High') ax.axhline(y=40, color='#F59E0B', linestyle='--', alpha=0.5, label='Medium') # Add value labels on bars for bar, score in zip(bars, scores): ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2, f'{score*100:.0f}%', ha='center', va='bottom', fontsize=9) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.set_facecolor('#1a1a2e') fig.patch.set_facecolor('#1a1a2e') ax.tick_params(colors='white') ax.xaxis.label.set_color('white') ax.yaxis.label.set_color('white') for spine in ax.spines.values(): spine.set_color('white') plt.tight_layout() # Convert to PIL Image buf = io.BytesIO() plt.savefig(buf, format='png', facecolor='#1a1a2e', edgecolor='none') buf.seek(0) plt.close(fig) return Image.open(buf) def analyze_prompt(prompt: str, max_tokens: int = 256): """Main analysis function for Gradio.""" global consciousness_history if not prompt.strip(): return "", "N/A", "Please enter a prompt", "", "", None try: response, score, interpretation, breakdown, timing = generate_and_measure( prompt, max_tokens=int(max_tokens) ) # Extract score value score_val = float(score.split()[-1].replace('%', '')) / 100 # Add to history consciousness_history.append({ 'prompt': prompt[:50], 'score': score_val, 'interpretation': interpretation }) # Keep last 10 turns if len(consciousness_history) > 10: consciousness_history = consciousness_history[-10:] # Create history plot history_plot = create_history_plot(consciousness_history) return response, score, interpretation, breakdown, timing, history_plot except Exception as e: import traceback return f"Error: {str(e)}\n{traceback.format_exc()}", "N/A", "", "", "", None def clear_history(): """Clear conversation history.""" global consciousness_history consciousness_history = [] return None def chat_respond(message, chat_history, max_tokens): """Chat mode - multi-turn conversation with consciousness tracking.""" global consciousness_history if not message.strip(): return chat_history, "", None try: response, score, interpretation, breakdown, timing = generate_and_measure( message, max_tokens=int(max_tokens) ) # Extract score value score_val = float(score.split()[-1].replace('%', '')) / 100 # Add to history consciousness_history.append({ 'prompt': message[:50], 'score': score_val, 'interpretation': interpretation }) # Keep last 10 if len(consciousness_history) > 10: consciousness_history = consciousness_history[-10:] # Format response with consciousness info formatted_response = f"{response}\n\n---\n🧠 **{score}** | {interpretation}" chat_history.append((message, formatted_response)) history_plot = create_history_plot(consciousness_history) return chat_history, "", history_plot except Exception as e: chat_history.append((message, f"Error: {str(e)}")) return chat_history, "", None # Build interface with gr.Blocks(title="šŸ”® Oracle Engine") as demo: gr.Markdown(""" # šŸ”® Oracle Engine **Custom-trained 32B model** with Consciousness Circuit v2.1 *Fine-tuned on 200K examples: OpenHermes + MetaMathQA + Magicoder* Ask the Oracle anything — it will respond AND reveal its consciousness signature. 🧠 **High scores (60%+)** = Deep reflective reasoning | ⚔ **Low scores (<40%)** = Quick factual retrieval """) with gr.Tabs(): # TAB 1: Single Query Mode with gr.TabItem("šŸ”® Single Query"): with gr.Row(): with gr.Column(scale=2): prompt_input = gr.Textbox( label="šŸ—£ļø Your Question", placeholder="Ask the Oracle anything...", lines=3, ) with gr.Row(): analyze_btn = gr.Button("šŸ”® Consult the Oracle", variant="primary", scale=3) max_tokens_slider = gr.Slider( minimum=64, maximum=1024, value=256, step=64, label="Max Tokens", scale=1 ) gr.Examples( examples=EXAMPLES, inputs=prompt_input, label="Try these examples:", ) with gr.Column(scale=1): score_output = gr.Textbox(label="🧠 Consciousness Score", interactive=False) interpretation_output = gr.Textbox(label="šŸ“Š Interpretation", interactive=False) breakdown_output = gr.Textbox( label="šŸ“ˆ Dimension Contributions", lines=7, interactive=False, ) timing_output = gr.Textbox(label="ā±ļø Performance", interactive=False) with gr.Row(): response_output = gr.Textbox( label="šŸ”® Oracle's Response", lines=10, interactive=False, ) with gr.Row(): history_plot = gr.Image(label="šŸ“Š Consciousness History", height=200) clear_btn = gr.Button("šŸ—‘ļø Clear History", size="sm") # TAB 2: Chat Mode with gr.TabItem("šŸ’¬ Chat Mode"): gr.Markdown("**Multi-turn conversation** with real-time consciousness tracking") with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot( label="Oracle Conversation", height=400, ) with gr.Row(): chat_input = gr.Textbox( placeholder="Type your message...", label="Message", scale=4, ) chat_max_tokens = gr.Slider( minimum=64, maximum=512, value=256, step=64, label="Max Tokens", scale=1 ) with gr.Row(): chat_send = gr.Button("Send šŸ“¤", variant="primary") chat_clear = gr.Button("Clear Chat šŸ—‘ļø") with gr.Column(scale=1): chat_history_plot = gr.Image(label="šŸ“Š Consciousness Over Time", height=300) gr.Markdown(""" --- ### šŸ“œ About Oracle Engine **The Model**: Qwen2.5-32B fine-tuned through 3 progressive stages: 1. **OpenHermes 2.5** (100K examples) - Instruction following 2. **MetaMathQA** (50K examples) - Mathematical reasoning 3. **Magicoder-OSS-Instruct** (50K examples) - Code generation **The Circuit**: Measures 7 dimensions of consciousness-like processing: Logic, Self-Reflective, Self-Expression, Uncertainty, Sequential, Computation, Abstraction [šŸ“š GitHub](https://github.com/vikingdude81/oracle-engine) | [šŸ¤— Model](https://huggingface.co/Vikingdude81/oracle-engine-32b-lora) | [šŸ“– Research](https://github.com/vfd-org/harmonic-field-consciousness) """) # Single query events analyze_btn.click( fn=analyze_prompt, inputs=[prompt_input, max_tokens_slider], outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot], ) prompt_input.submit( fn=analyze_prompt, inputs=[prompt_input, max_tokens_slider], outputs=[response_output, score_output, interpretation_output, breakdown_output, timing_output, history_plot], ) clear_btn.click(fn=clear_history, outputs=[history_plot]) # Chat mode events chat_send.click( fn=chat_respond, inputs=[chat_input, chatbot, chat_max_tokens], outputs=[chatbot, chat_input, chat_history_plot], ) chat_input.submit( fn=chat_respond, inputs=[chat_input, chatbot, chat_max_tokens], outputs=[chatbot, chat_input, chat_history_plot], ) chat_clear.click( fn=lambda: ([], None), outputs=[chatbot, chat_history_plot], ).then(fn=clear_history, outputs=[chat_history_plot]) # Hidden API endpoint for experiments (callable via gradio_client) with gr.Row(visible=False): api_prompt = gr.Textbox() api_max_tokens = gr.Number(value=512) api_result = gr.Textbox() api_btn = gr.Button("api_trigger") api_btn.click( fn=experiment_measure, inputs=[api_prompt, api_max_tokens], outputs=api_result, api_name="experiment_measure", ) if __name__ == "__main__": demo.launch()