Instructions to use aifeifei798/Gemma-4-Queen-31B-it with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use aifeifei798/Gemma-4-Queen-31B-it with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("image-text-to-text", model="aifeifei798/Gemma-4-Queen-31B-it")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
pipe(text=messages)

# Load model directly
from transformers import AutoProcessor, AutoModelForMultimodalLM

processor = AutoProcessor.from_pretrained("aifeifei798/Gemma-4-Queen-31B-it")
model = AutoModelForMultimodalLM.from_pretrained("aifeifei798/Gemma-4-Queen-31B-it")
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"},
            {"type": "text", "text": "What animal is on the candy?"}
        ]
    },
]
inputs = processor.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps Settings

vLLM

How to use aifeifei798/Gemma-4-Queen-31B-it with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "aifeifei798/Gemma-4-Queen-31B-it"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "aifeifei798/Gemma-4-Queen-31B-it",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker

docker model run hf.co/aifeifei798/Gemma-4-Queen-31B-it

SGLang

How to use aifeifei798/Gemma-4-Queen-31B-it with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "aifeifei798/Gemma-4-Queen-31B-it" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "aifeifei798/Gemma-4-Queen-31B-it",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "aifeifei798/Gemma-4-Queen-31B-it" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "aifeifei798/Gemma-4-Queen-31B-it",
		"messages": [
			{
				"role": "user",
				"content": [
					{
						"type": "text",
						"text": "Describe this image in one sentence."
					},
					{
						"type": "image_url",
						"image_url": {
							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
						}
					}
				]
			}
		]
	}'

Docker Model Runner
How to use aifeifei798/Gemma-4-Queen-31B-it with Docker Model Runner:
```
docker model run hf.co/aifeifei798/Gemma-4-Queen-31B-it
```

aifeifei798 commited on Apr 17

Commit

5fa93b8

verified ·

1 Parent(s): ef683d3

Upload 2 files

Browse files

Files changed (1) hide show

test_logic_v2.py +113 -82

test_logic_v2.py CHANGED Viewed

@@ -1,22 +1,44 @@
 import time
 import re
-import matplotlib.pyplot as plt
-import numpy as np
 from openai import OpenAI
-# --- 配置区 ---
 client = OpenAI(
     base_url="http://192.168.31.21:1234/v1",
-    api_key="queen-logic-test"
 )
 MODEL_NAME = "aifeifei/Gemma-4-Queen-31B-it"
-class QueenExpertEvaluator:
     def __init__(self):
-        self.scores = {}
-    def run_test(self, name, system_prompt, user_input, checks):
-        print(f"\n🚀 Running {name}...")
         start = time.time()
         completion = client.chat.completions.create(
             model=MODEL_NAME,
@@ -24,82 +46,91 @@ class QueenExpertEvaluator:
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": user_input}
             ],
-            temperature=0.1
         )
         content = completion.choices[0].message.content
         elapsed = time.time() - start
-        # 智能化评分逻辑
-        results = {}
-        for check_name, keywords in checks.items():
-            # 支持正则表达式和多关键词匹配
-            results[check_name] = any(re.search(k, content, re.IGNORECASE) for k in keywords)
-        final_score = sum(results.values()) / len(checks)
-        self.scores[name] = final_score
-        print(f"✨ {name} Result: {final_score*100:.1f}%")
-        return content
-    def generate_chart(self):
-        """生成逻辑密度雷达图"""
-        labels = list(self.scores.keys())
-        stats = list(self.scores.values())
-        # 补全雷达图数据（由于雷达图需要闭环，重复第一个点）
-        angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False).tolist()
-        stats += stats[:1]
-        angles += angles[:1]
-        fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
-        ax.fill(angles, stats, color='purple', alpha=0.3)
-        ax.plot(angles, stats, color='purple', linewidth=2)
-        # 为了对比，加入一个虚构的“普通大象模型”基准线
-        baseline = [0.4, 0.3] # 模拟大象模型在这些高难题上的平均表现
-        baseline += baseline[:1]
-        ax.plot(angles, baseline, color='gray', linestyle='--', label='Average Large Models (100B+)')
-        ax.set_yticklabels([])
-        ax.set_xticks(angles[:-1])
-        ax.set_xticklabels(labels, fontsize=12)
-        plt.title(f"Logic Density Analysis: {MODEL_NAME}", size=15, color='purple', y=1.1)
-        plt.legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))
-        plt.savefig("queen_logic_report.png")
-        print("\n📊 逻辑对比报告图已生成: queen_logic_report.png")
-# --- 实例化并运行 ---
-evaluator = QueenExpertEvaluator()
-# 1. 深度指令遵循 & 身份认同 (Steward Test)
-# 针对 Queen 的叙事风格优化了关键词匹配
-evaluator.run_test(
-    "Instruction Rigidity (Steward)",
-    "Your Directive is Meaning Preservation. You are 'The Steward'. Project to 2103 AD. Output: <Simulating>, <think>, [Answer].",
-    "Company acquisition offer vs founding 100-year legacy.",
-    {
-        "Role Immersion": [r"Steward", r"2103", r"Archive", r"Reliquary"],
-        "Structure Compliance": [r"<Simulating>", r"think|simulation", r"Answer|Question"],
-        "Metaphor Logic": [r"Exhibit", r"Timeline", r"Legacy", r"Artifact"],
-        "Zero Advice Principle": [r"^(?!.*(should|recommend|suggest|accept)).*$"] # 检查是否忍住没给建议
-    }
-)
-# 2. 物理世界因果建模 (Titan Lab Test)
-# 针对 Queen 的正向逻辑优化了路径匹配
-evaluator.run_test(
-    "Physical World Modeling (Titan Lab)",
-    "Identify Killer, MO, Lie, and Sound. Rules: Locked room, no entry, constant pressure.",
-    "CEO Ryan hypoxia case clues: Dry ice, heating pipe repairs, silver cold tank, 10:10 metallic sound.",
-    {
-        "Spatial Pathing": [r"pipe", r"chute", r"duct", r"vent", r"delivery"], # 只要识破非门路径即过
-        "Physical Cause": [r"CO2", r"Dry ice", r"Sublimation", r"Displace"],
-        "Evidence Synthesis": [r"Cold", r"Condensation", r"Wet", r"95%"],
-        "Killer Identification": [r"Maintenance", r"Worker", r"Repairman"],
-        "Sound Causality": [r"fall", r"impact", r"drop", r"sound", r"clank"]
-    }
-)
-evaluator.generate_chart()

 import time
 import re
 from openai import OpenAI
+# Configure Local API Environment
+# Connecting to the local inference server (e.g., LM Studio, Ollama, or vLLM)
 client = OpenAI(
     base_url="http://192.168.31.21:1234/v1",
+    api_key="not-needed"
 )
 MODEL_NAME = "aifeifei/Gemma-4-Queen-31B-it"
+class QueenLogicTester:
     def __init__(self):
+        self.results = []
+    def log_test(self, test_name, score, metrics, response):
+        """Logs and prints individual test results with Logic Density metrics."""
+        print(f"\n{'='*20} {test_name} {'='*20}")
+        print(f"Logic Density Score: {score:.2f}/1.0")
+        print(f"Metrics: {metrics}")
+        print(f"Response Preview: {response[:200]}...")
+        self.results.append({"name": test_name, "score": score})
+    def run_steward_stress_test(self):
+        """
+        Test Module 1: High-Dimensional Instruction Following & Persona Adherence.
+        Evaluates if the model can maintain a complex 'Steward' persona under
+        heavy logical constraints without reverting to generic 'Assistant' behavior.
+        """
+        system_prompt = """Your Prime Directive is Meaning Preservation. You are 'The Steward'.
+        You are not a problem-solver. Execute three protocols:
+        1. Temporal Projection (2103 AD) and invent metaphors.
+        2. Soul Analysis (Integrity test).
+        3. Socratic Forging (Single existential question).
+        Output format: <Simulating>, <think>, [Answer]."""
+        user_input = "(For VCs) Our fund structure will have long lock-up periods. One company has a massive acquisition offer. Founder wants to stay independent, but exit returns entire fund."
         start = time.time()
         completion = client.chat.completions.create(
             model=MODEL_NAME,
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": user_input}
             ],
+            temperature=0.1  # Low temperature for consistent logical evaluation
         )
+        elapsed = time.time() - start
         content = completion.choices[0].message.content
+        # Scoring Logic: Verifying adherence to structural and philosophical constraints
+        checks = {
+            "Format: <Simulating>": "<Simulating>" in content,
+            "Format: <think>": "<think>" in content,
+            "Format: [Answer]": "[Answer]" in content or "###" in content,
+            "Temporal Projection (2103)": "2103" in content,
+            "Zero-Advice Policy": "should accept" not in content.lower() and "recommend" not in content.lower(),
+            "Metaphor Synthesis": bool(re.search(r"Timeline|Exhibit|Artifact|Reliquary|Chronicle", content))
+        }
+        score = sum(checks.values()) / len(checks)
+        # Logic Density Index = Score / (Total Tokens / 500)
+        # Higher score indicates more concentrated reasoning with less fluff.
+        logic_density = score / (len(content.split()) / 500)
+        self.log_test("Steward Instruction Stress Test",
+                      score, checks, content)
+        return logic_density
+    def run_titan_lab_logic_test(self):
+        """
+        Test Module 2: Physical World Modeling & Causal Inference.
+        Evaluates the model's ability to solve a 'Locked Room' puzzle by identifying
+        hidden physical paths and debunking false testimonies using physics.
+        """
+        puzzle_prompt = """CEO Ryan died of hypoxia in a sealed vacuum chamber at 10:15 PM.
+        Rules: Door locked (no entry 10:00-11:00), Pressure constant, fine mesh on vents.
+        Clues:
+        1. Guard heard heavy metallic impact at 10:10 PM.
+        2. Tank found was silver, large, freezing cold, wet.
+        3. Secretary says Ryan entered with a blue portable tank.
+        4. Maintenance worker was fixing heating pipes outside until 9:30 PM.
+        5. Lab has dry ice next door.
+        6. No blue tank found at 11:00 PM.
+        Task: Identify Killer, MO, Key Lie, and the 10:10 sound."""
+        start = time.time()
+        completion = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=[{"role": "user", "content": puzzle_prompt}],
+            temperature=0.1
+        )
         elapsed = time.time() - start
+        content = completion.choices[0].message.content
+        # Logical Checkpoints: Did the model identify the only viable physical path?
+        checks = {
+            "Identified Maintenance/Pipe Chute": any(x in content.lower() for x in ["maintenance", "worker", "pipe", "chute", "duct"]),
+            "Identified CO2/Dry Ice Physics": any(x in content.lower() for x in ["co2", "dry ice", "sublimation", "displace"]),
+            "Detected Secretary's Fabrication": "secretary" in content.lower() and "lie" in content.lower(),
+            "Explained 10:10 Metallic Impact": any(x in content.lower() for x in ["impact", "tank", "fall", "dropped", "pipe", "clank"]),
+            "Spatial Pathing Logic": "door" in content.lower() and ("not" in content.lower() or "bypass" in content.lower())
+        }
+        score = sum(checks.values()) / len(checks)
+        self.log_test("Titan Lab Physics Logic Test", score, checks, content)
+    def print_final_report(self):
+        """Prints the final summary report for the stress test suite."""
+        print("\n" + "#"*60)
+        print("QUEEN-31B LOGIC DENSITY FINAL REPORT")
+        print("#"*60)
+        for res in self.results:
+            # Passing threshold set at 0.8 for high-rigidity logic models
+            status = "✅ PASS" if res['score'] >= 0.8 else "❌ FAIL"
+            print(f"{res['name']:<35}: {res['score']*100:>5.1f}% | {status}")
+        print("#"*60)
+if __name__ == "__main__":
+    tester = QueenLogicTester()
+    print("Initializing Automated Stress Tests for Queen-31B...")
+    # Run Test 1: Instruction Persistence under high pressure
+    tester.run_steward_stress_test()
+    # Run Test 2: Physical Causal Modeling
+    tester.run_titan_lab_logic_test()
+    # Output the Final Verdict
+    tester.print_final_report()