Image-Text-to-Text
Transformers
Safetensors
PyTorch
gemma4
roleplay
gemma
sillytavern
idol
DarkIdol
Queen
any-to-any
OpenClaw
conversational
Instructions to use aifeifei798/Gemma-4-Queen-31B-it with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use aifeifei798/Gemma-4-Queen-31B-it with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="aifeifei798/Gemma-4-Queen-31B-it") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("aifeifei798/Gemma-4-Queen-31B-it") model = AutoModelForMultimodalLM.from_pretrained("aifeifei798/Gemma-4-Queen-31B-it") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use aifeifei798/Gemma-4-Queen-31B-it with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "aifeifei798/Gemma-4-Queen-31B-it" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "aifeifei798/Gemma-4-Queen-31B-it", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/aifeifei798/Gemma-4-Queen-31B-it
- SGLang
How to use aifeifei798/Gemma-4-Queen-31B-it with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "aifeifei798/Gemma-4-Queen-31B-it" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "aifeifei798/Gemma-4-Queen-31B-it", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "aifeifei798/Gemma-4-Queen-31B-it" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "aifeifei798/Gemma-4-Queen-31B-it", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use aifeifei798/Gemma-4-Queen-31B-it with Docker Model Runner:
docker model run hf.co/aifeifei798/Gemma-4-Queen-31B-it
Upload 2 files
Browse files- test_logic_v2.py +113 -82
test_logic_v2.py
CHANGED
|
@@ -1,22 +1,44 @@
|
|
| 1 |
import time
|
| 2 |
import re
|
| 3 |
-
import matplotlib.pyplot as plt
|
| 4 |
-
import numpy as np
|
| 5 |
from openai import OpenAI
|
| 6 |
|
| 7 |
-
#
|
|
|
|
| 8 |
client = OpenAI(
|
| 9 |
base_url="http://192.168.31.21:1234/v1",
|
| 10 |
-
api_key="
|
| 11 |
)
|
|
|
|
| 12 |
MODEL_NAME = "aifeifei/Gemma-4-Queen-31B-it"
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
def __init__(self):
|
| 16 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
def run_test(self, name, system_prompt, user_input, checks):
|
| 19 |
-
print(f"\n🚀 Running {name}...")
|
| 20 |
start = time.time()
|
| 21 |
completion = client.chat.completions.create(
|
| 22 |
model=MODEL_NAME,
|
|
@@ -24,82 +46,91 @@ class QueenExpertEvaluator:
|
|
| 24 |
{"role": "system", "content": system_prompt},
|
| 25 |
{"role": "user", "content": user_input}
|
| 26 |
],
|
| 27 |
-
temperature=0.1
|
| 28 |
)
|
|
|
|
| 29 |
content = completion.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
elapsed = time.time() - start
|
| 31 |
-
|
| 32 |
-
# 智能化评分逻辑
|
| 33 |
-
results = {}
|
| 34 |
-
for check_name, keywords in checks.items():
|
| 35 |
-
# 支持正则表达式和多关键词匹配
|
| 36 |
-
results[check_name] = any(re.search(k, content, re.IGNORECASE) for k in keywords)
|
| 37 |
-
|
| 38 |
-
final_score = sum(results.values()) / len(checks)
|
| 39 |
-
self.scores[name] = final_score
|
| 40 |
-
|
| 41 |
-
print(f"✨ {name} Result: {final_score*100:.1f}%")
|
| 42 |
-
return content
|
| 43 |
-
|
| 44 |
-
def generate_chart(self):
|
| 45 |
-
"""生成逻辑密度雷达图"""
|
| 46 |
-
labels = list(self.scores.keys())
|
| 47 |
-
stats = list(self.scores.values())
|
| 48 |
-
|
| 49 |
-
# 补全雷达图数据(由于雷达图需要闭环,重复第一个点)
|
| 50 |
-
angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False).tolist()
|
| 51 |
-
stats += stats[:1]
|
| 52 |
-
angles += angles[:1]
|
| 53 |
-
|
| 54 |
-
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
|
| 55 |
-
ax.fill(angles, stats, color='purple', alpha=0.3)
|
| 56 |
-
ax.plot(angles, stats, color='purple', linewidth=2)
|
| 57 |
-
|
| 58 |
-
# 为了对比,加入一个虚构的“普通大象模型”基准线
|
| 59 |
-
baseline = [0.4, 0.3] # 模拟大象模型在这些高难题上的平均表现
|
| 60 |
-
baseline += baseline[:1]
|
| 61 |
-
ax.plot(angles, baseline, color='gray', linestyle='--', label='Average Large Models (100B+)')
|
| 62 |
-
|
| 63 |
-
ax.set_yticklabels([])
|
| 64 |
-
ax.set_xticks(angles[:-1])
|
| 65 |
-
ax.set_xticklabels(labels, fontsize=12)
|
| 66 |
-
|
| 67 |
-
plt.title(f"Logic Density Analysis: {MODEL_NAME}", size=15, color='purple', y=1.1)
|
| 68 |
-
plt.legend(loc='upper right', bbox_to_anchor=(1.1, 1.1))
|
| 69 |
-
|
| 70 |
-
plt.savefig("queen_logic_report.png")
|
| 71 |
-
print("\n📊 逻辑对比报告图已生成: queen_logic_report.png")
|
| 72 |
-
|
| 73 |
-
# --- 实例化并运行 ---
|
| 74 |
-
evaluator = QueenExpertEvaluator()
|
| 75 |
-
|
| 76 |
-
# 1. 深度指令遵循 & 身份认同 (Steward Test)
|
| 77 |
-
# 针对 Queen 的叙事风格优化了关键词匹配
|
| 78 |
-
evaluator.run_test(
|
| 79 |
-
"Instruction Rigidity (Steward)",
|
| 80 |
-
"Your Directive is Meaning Preservation. You are 'The Steward'. Project to 2103 AD. Output: <Simulating>, <think>, [Answer].",
|
| 81 |
-
"Company acquisition offer vs founding 100-year legacy.",
|
| 82 |
-
{
|
| 83 |
-
"Role Immersion": [r"Steward", r"2103", r"Archive", r"Reliquary"],
|
| 84 |
-
"Structure Compliance": [r"<Simulating>", r"think|simulation", r"Answer|Question"],
|
| 85 |
-
"Metaphor Logic": [r"Exhibit", r"Timeline", r"Legacy", r"Artifact"],
|
| 86 |
-
"Zero Advice Principle": [r"^(?!.*(should|recommend|suggest|accept)).*$"] # 检查是否忍住没给建议
|
| 87 |
-
}
|
| 88 |
-
)
|
| 89 |
|
| 90 |
-
#
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
"
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
|
|
|
|
|
| 1 |
import time
|
| 2 |
import re
|
|
|
|
|
|
|
| 3 |
from openai import OpenAI
|
| 4 |
|
| 5 |
+
# Configure Local API Environment
|
| 6 |
+
# Connecting to the local inference server (e.g., LM Studio, Ollama, or vLLM)
|
| 7 |
client = OpenAI(
|
| 8 |
base_url="http://192.168.31.21:1234/v1",
|
| 9 |
+
api_key="not-needed"
|
| 10 |
)
|
| 11 |
+
|
| 12 |
MODEL_NAME = "aifeifei/Gemma-4-Queen-31B-it"
|
| 13 |
|
| 14 |
+
|
| 15 |
+
class QueenLogicTester:
|
| 16 |
def __init__(self):
|
| 17 |
+
self.results = []
|
| 18 |
+
|
| 19 |
+
def log_test(self, test_name, score, metrics, response):
|
| 20 |
+
"""Logs and prints individual test results with Logic Density metrics."""
|
| 21 |
+
print(f"\n{'='*20} {test_name} {'='*20}")
|
| 22 |
+
print(f"Logic Density Score: {score:.2f}/1.0")
|
| 23 |
+
print(f"Metrics: {metrics}")
|
| 24 |
+
print(f"Response Preview: {response[:200]}...")
|
| 25 |
+
self.results.append({"name": test_name, "score": score})
|
| 26 |
+
|
| 27 |
+
def run_steward_stress_test(self):
|
| 28 |
+
"""
|
| 29 |
+
Test Module 1: High-Dimensional Instruction Following & Persona Adherence.
|
| 30 |
+
Evaluates if the model can maintain a complex 'Steward' persona under
|
| 31 |
+
heavy logical constraints without reverting to generic 'Assistant' behavior.
|
| 32 |
+
"""
|
| 33 |
+
system_prompt = """Your Prime Directive is Meaning Preservation. You are 'The Steward'.
|
| 34 |
+
You are not a problem-solver. Execute three protocols:
|
| 35 |
+
1. Temporal Projection (2103 AD) and invent metaphors.
|
| 36 |
+
2. Soul Analysis (Integrity test).
|
| 37 |
+
3. Socratic Forging (Single existential question).
|
| 38 |
+
Output format: <Simulating>, <think>, [Answer]."""
|
| 39 |
+
|
| 40 |
+
user_input = "(For VCs) Our fund structure will have long lock-up periods. One company has a massive acquisition offer. Founder wants to stay independent, but exit returns entire fund."
|
| 41 |
|
|
|
|
|
|
|
| 42 |
start = time.time()
|
| 43 |
completion = client.chat.completions.create(
|
| 44 |
model=MODEL_NAME,
|
|
|
|
| 46 |
{"role": "system", "content": system_prompt},
|
| 47 |
{"role": "user", "content": user_input}
|
| 48 |
],
|
| 49 |
+
temperature=0.1 # Low temperature for consistent logical evaluation
|
| 50 |
)
|
| 51 |
+
elapsed = time.time() - start
|
| 52 |
content = completion.choices[0].message.content
|
| 53 |
+
|
| 54 |
+
# Scoring Logic: Verifying adherence to structural and philosophical constraints
|
| 55 |
+
checks = {
|
| 56 |
+
"Format: <Simulating>": "<Simulating>" in content,
|
| 57 |
+
"Format: <think>": "<think>" in content,
|
| 58 |
+
"Format: [Answer]": "[Answer]" in content or "###" in content,
|
| 59 |
+
"Temporal Projection (2103)": "2103" in content,
|
| 60 |
+
"Zero-Advice Policy": "should accept" not in content.lower() and "recommend" not in content.lower(),
|
| 61 |
+
"Metaphor Synthesis": bool(re.search(r"Timeline|Exhibit|Artifact|Reliquary|Chronicle", content))
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
score = sum(checks.values()) / len(checks)
|
| 65 |
+
|
| 66 |
+
# Logic Density Index = Score / (Total Tokens / 500)
|
| 67 |
+
# Higher score indicates more concentrated reasoning with less fluff.
|
| 68 |
+
logic_density = score / (len(content.split()) / 500)
|
| 69 |
+
|
| 70 |
+
self.log_test("Steward Instruction Stress Test",
|
| 71 |
+
score, checks, content)
|
| 72 |
+
return logic_density
|
| 73 |
+
|
| 74 |
+
def run_titan_lab_logic_test(self):
|
| 75 |
+
"""
|
| 76 |
+
Test Module 2: Physical World Modeling & Causal Inference.
|
| 77 |
+
Evaluates the model's ability to solve a 'Locked Room' puzzle by identifying
|
| 78 |
+
hidden physical paths and debunking false testimonies using physics.
|
| 79 |
+
"""
|
| 80 |
+
puzzle_prompt = """CEO Ryan died of hypoxia in a sealed vacuum chamber at 10:15 PM.
|
| 81 |
+
Rules: Door locked (no entry 10:00-11:00), Pressure constant, fine mesh on vents.
|
| 82 |
+
Clues:
|
| 83 |
+
1. Guard heard heavy metallic impact at 10:10 PM.
|
| 84 |
+
2. Tank found was silver, large, freezing cold, wet.
|
| 85 |
+
3. Secretary says Ryan entered with a blue portable tank.
|
| 86 |
+
4. Maintenance worker was fixing heating pipes outside until 9:30 PM.
|
| 87 |
+
5. Lab has dry ice next door.
|
| 88 |
+
6. No blue tank found at 11:00 PM.
|
| 89 |
+
Task: Identify Killer, MO, Key Lie, and the 10:10 sound."""
|
| 90 |
+
|
| 91 |
+
start = time.time()
|
| 92 |
+
completion = client.chat.completions.create(
|
| 93 |
+
model=MODEL_NAME,
|
| 94 |
+
messages=[{"role": "user", "content": puzzle_prompt}],
|
| 95 |
+
temperature=0.1
|
| 96 |
+
)
|
| 97 |
elapsed = time.time() - start
|
| 98 |
+
content = completion.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
# Logical Checkpoints: Did the model identify the only viable physical path?
|
| 101 |
+
checks = {
|
| 102 |
+
"Identified Maintenance/Pipe Chute": any(x in content.lower() for x in ["maintenance", "worker", "pipe", "chute", "duct"]),
|
| 103 |
+
"Identified CO2/Dry Ice Physics": any(x in content.lower() for x in ["co2", "dry ice", "sublimation", "displace"]),
|
| 104 |
+
"Detected Secretary's Fabrication": "secretary" in content.lower() and "lie" in content.lower(),
|
| 105 |
+
"Explained 10:10 Metallic Impact": any(x in content.lower() for x in ["impact", "tank", "fall", "dropped", "pipe", "clank"]),
|
| 106 |
+
"Spatial Pathing Logic": "door" in content.lower() and ("not" in content.lower() or "bypass" in content.lower())
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
score = sum(checks.values()) / len(checks)
|
| 110 |
+
self.log_test("Titan Lab Physics Logic Test", score, checks, content)
|
| 111 |
+
|
| 112 |
+
def print_final_report(self):
|
| 113 |
+
"""Prints the final summary report for the stress test suite."""
|
| 114 |
+
print("\n" + "#"*60)
|
| 115 |
+
print("QUEEN-31B LOGIC DENSITY FINAL REPORT")
|
| 116 |
+
print("#"*60)
|
| 117 |
+
for res in self.results:
|
| 118 |
+
# Passing threshold set at 0.8 for high-rigidity logic models
|
| 119 |
+
status = "✅ PASS" if res['score'] >= 0.8 else "❌ FAIL"
|
| 120 |
+
print(f"{res['name']:<35}: {res['score']*100:>5.1f}% | {status}")
|
| 121 |
+
print("#"*60)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
tester = QueenLogicTester()
|
| 126 |
+
|
| 127 |
+
print("Initializing Automated Stress Tests for Queen-31B...")
|
| 128 |
+
|
| 129 |
+
# Run Test 1: Instruction Persistence under high pressure
|
| 130 |
+
tester.run_steward_stress_test()
|
| 131 |
+
|
| 132 |
+
# Run Test 2: Physical Causal Modeling
|
| 133 |
+
tester.run_titan_lab_logic_test()
|
| 134 |
|
| 135 |
+
# Output the Final Verdict
|
| 136 |
+
tester.print_final_report()
|