File size: 1,748 Bytes
38a0389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841ab53
38a0389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841ab53
38a0389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import torch

from transformers import PreTrainedModel
from transformers.generation import GenerationMixin
from transformers.modeling_outputs import CausalLMOutput

from .configuration_gpjtgpt2 import GPJTGPT2Config
from .gpt import GPTModel


class GPJTGPT2Model(PreTrainedModel):

    config_class = GPJTGPT2Config


    def __init__(self, config):
        super().__init__(config)
        self.model = GPTModel(config.cfg)
        self.post_init()


    def forward(self, input_ids, **kwargs):
        return self.model.forward(input_ids)



class GPJTGPT2ModelForCausalLM(PreTrainedModel, GenerationMixin):

    config_class = GPJTGPT2Config


    def __init__(self, config):
        super().__init__(config)
        self.model = GPTModel(config.cfg)
        self.post_init()


    def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
        logits = self.model.forward(input_ids)

        loss = None
        if labels is not None:
            shifted_logits = logits[:, :-1, :]
            shifted_labels = labels[:, 1:]

            if attention_mask is not None:
                shifted_mask = attention_mask[:, 1:]
                shifted_labels = shifted_labels.masked_fill(
                    shifted_mask == 0, -100
                )

            loss = torch.nn.functional.cross_entropy(
                shifted_logits.flatten(0, 1), shifted_labels.flatten(),
                ignore_index=-100
            )

        return CausalLMOutput(logits=logits, loss=loss)


    def get_input_embeddings(self):
        return self.model.tok_emb


    def get_output_embeddings(self):
        return self.model.out_head


    def set_output_embeddings(self, new_embeddings):
        self.model.out_head = new_embeddings