jmcinern commited on
Commit
43c9011
·
verified ·
1 Parent(s): 2499997

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -2,8 +2,7 @@ import gradio as gr
2
  import torch
3
  import re
4
  import threading
5
- from llmcompressor.transformers import SparseAutoModelForCausalLM
6
- from transformers import AutoTokenizer
7
 
8
  # Model configuration
9
  MODEL_NAME = "jmcinern/qwen3-8B-cpt-sft-awq"
@@ -29,12 +28,12 @@ class ChatBot:
29
 
30
  def load_model():
31
  print("Loading model...")
32
- return SparseAutoModelForCausalLM.from_pretrained(
33
  MODEL_NAME,
34
  trust_remote_code=True,
35
  device_map="auto",
36
  torch_dtype="auto",
37
- max_workers=4 # Use 4 threads for model loading
38
  )
39
 
40
  try:
 
2
  import torch
3
  import re
4
  import threading
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
6
 
7
  # Model configuration
8
  MODEL_NAME = "jmcinern/qwen3-8B-cpt-sft-awq"
 
28
 
29
  def load_model():
30
  print("Loading model...")
31
+ return AutoModelForCausalLM.from_pretrained(
32
  MODEL_NAME,
33
  trust_remote_code=True,
34
  device_map="auto",
35
  torch_dtype="auto",
36
+ low_cpu_mem_usage=True
37
  )
38
 
39
  try: