Text Generation
Transformers
PyTorch
Norwegian
Norwegian Bokmål
Norwegian Nynorsk
text2text-generation
T5
NorT5
Norwegian
encoder-decoder
custom_code
Instructions to use ltg/nort5-base with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use ltg/nort5-base with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="ltg/nort5-base", trust_remote_code=True)# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("ltg/nort5-base", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use ltg/nort5-base with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "ltg/nort5-base" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ltg/nort5-base", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/ltg/nort5-base
- SGLang
How to use ltg/nort5-base with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "ltg/nort5-base" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ltg/nort5-base", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "ltg/nort5-base" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "ltg/nort5-base", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use ltg/nort5-base with Docker Model Runner:
docker model run hf.co/ltg/nort5-base
Update modeling_nort5.py
Browse files- modeling_nort5.py +9 -8
modeling_nort5.py
CHANGED
|
@@ -62,7 +62,7 @@ class Decoder(nn.Module):
|
|
| 62 |
self_relative_embedding = self.self_relative_embedding()
|
| 63 |
cross_relative_embedding = self.cross_relative_embedding()
|
| 64 |
|
| 65 |
-
if past_key_values is
|
| 66 |
autoreg_mask = torch.triu(
|
| 67 |
torch.full((x.size(0), x.size(0)), True, device=x.device),
|
| 68 |
diagonal=1
|
|
@@ -259,12 +259,12 @@ class Attention(nn.Module):
|
|
| 259 |
|
| 260 |
if past_key_value is not None:
|
| 261 |
if not self.is_cross_attention:
|
| 262 |
-
key = torch.cat([past_key_value[0], key], dim=1)
|
| 263 |
-
value = torch.cat([past_key_value[1], value], dim=1)
|
| 264 |
key_len = key.size(1)
|
| 265 |
elif past_key_value[0].size(1) == kv.size(0):
|
| 266 |
-
key = past_key_value[0]
|
| 267 |
-
value = past_key_value[1]
|
| 268 |
|
| 269 |
if self.position_indices.size(0) < max(query_len, key_len):
|
| 270 |
position_indices = torch.arange(max(query_len, key_len), dtype=torch.long).unsqueeze(1) \
|
|
@@ -306,7 +306,10 @@ class Attention(nn.Module):
|
|
| 306 |
context = self.post_layer_norm(context)
|
| 307 |
context = self.dropout(context)
|
| 308 |
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
|
| 312 |
class WordEmbedding(nn.Module):
|
|
@@ -662,9 +665,7 @@ class NorT5ForConditionalGeneration(NorT5Model):
|
|
| 662 |
reordered_layer_past_states = ()
|
| 663 |
for layer_past_state in layer_past_states:
|
| 664 |
# need to set correct `past` for each of the four key / value states
|
| 665 |
-
layer_past_state = layer_past_state.unflatten(0, (-1, self.config.num_attention_heads))
|
| 666 |
layer_past_state = layer_past_state.index_select(0, beam_idx.to(layer_past_state.device))
|
| 667 |
-
layer_past_state = layer_past_state.flatten(0, 1)
|
| 668 |
reordered_layer_past_states = reordered_layer_past_states + (layer_past_state,)
|
| 669 |
|
| 670 |
assert reordered_layer_past_states[0].shape == layer_past_states[0].shape
|
|
|
|
| 62 |
self_relative_embedding = self.self_relative_embedding()
|
| 63 |
cross_relative_embedding = self.cross_relative_embedding()
|
| 64 |
|
| 65 |
+
if past_key_values is None:
|
| 66 |
autoreg_mask = torch.triu(
|
| 67 |
torch.full((x.size(0), x.size(0)), True, device=x.device),
|
| 68 |
diagonal=1
|
|
|
|
| 259 |
|
| 260 |
if past_key_value is not None:
|
| 261 |
if not self.is_cross_attention:
|
| 262 |
+
key = torch.cat([past_key_value[0].flatten(0, 1), key], dim=1)
|
| 263 |
+
value = torch.cat([past_key_value[1].flatten(0, 1), value], dim=1)
|
| 264 |
key_len = key.size(1)
|
| 265 |
elif past_key_value[0].size(1) == kv.size(0):
|
| 266 |
+
key = past_key_value[0].flatten(0, 1)
|
| 267 |
+
value = past_key_value[1].flatten(0, 1)
|
| 268 |
|
| 269 |
if self.position_indices.size(0) < max(query_len, key_len):
|
| 270 |
position_indices = torch.arange(max(query_len, key_len), dtype=torch.long).unsqueeze(1) \
|
|
|
|
| 306 |
context = self.post_layer_norm(context)
|
| 307 |
context = self.dropout(context)
|
| 308 |
|
| 309 |
+
key = key.detach().unflatten(0, (-1, self.num_heads))
|
| 310 |
+
value = value.detach().unflatten(0, (-1, self.num_heads))
|
| 311 |
+
|
| 312 |
+
return context, attention_probs.detach(), (key, value)
|
| 313 |
|
| 314 |
|
| 315 |
class WordEmbedding(nn.Module):
|
|
|
|
| 665 |
reordered_layer_past_states = ()
|
| 666 |
for layer_past_state in layer_past_states:
|
| 667 |
# need to set correct `past` for each of the four key / value states
|
|
|
|
| 668 |
layer_past_state = layer_past_state.index_select(0, beam_idx.to(layer_past_state.device))
|
|
|
|
| 669 |
reordered_layer_past_states = reordered_layer_past_states + (layer_past_state,)
|
| 670 |
|
| 671 |
assert reordered_layer_past_states[0].shape == layer_past_states[0].shape
|