Spaces:
Runtime error
Runtime error
| import torch.nn as nn | |
| import torch | |
| from transformers import BertModel, BertConfig, PreTrainedModel | |
| def get_device(): | |
| if torch.cuda.is_available(): | |
| return torch.device('cuda') | |
| else: | |
| return torch.device('cpu') | |
| USE_CUDA = False | |
| device = get_device() | |
| if device.type == 'cuda': | |
| USE_CUDA = True | |
| base_bert = 'indobenchmark/indobert-base-p2' | |
| HIDDEN_DIM = 768 | |
| OUTPUT_DIM = 2 # 2 if Binary Classification | |
| BIDIRECTIONAL = True | |
| DROPOUT = 0.2 # 0.2 | |
| class IndoBERTBiLSTM(PreTrainedModel): | |
| config_class = BertConfig | |
| def __init__(self, bert_config): | |
| super().__init__(bert_config) | |
| self.output_dim = OUTPUT_DIM | |
| self.n_layers = 1 | |
| self.hidden_dim = HIDDEN_DIM | |
| self.bidirectional = BIDIRECTIONAL | |
| self.bert = BertModel.from_pretrained(base_bert) | |
| self.lstm = nn.LSTM(input_size=self.bert.config.hidden_size, | |
| hidden_size=self.hidden_dim, | |
| num_layers=self.n_layers, | |
| bidirectional=self.bidirectional, | |
| batch_first=True) | |
| self.dropout = nn.Dropout(DROPOUT) | |
| self.output_layer = nn.Linear(self.hidden_dim * 2 if self.bidirectional else self.hidden_dim, self.output_dim) | |
| def forward(self, input_ids, attention_mask): | |
| hidden = self.init_hidden(input_ids.shape[0]) | |
| output = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
| sequence_output = output.last_hidden_state | |
| lstm_output, (hidden_last, cn_last) = self.lstm(sequence_output, hidden) | |
| hidden_last_L=hidden_last[-2] | |
| hidden_last_R=hidden_last[-1] | |
| hidden_last_out=torch.cat([hidden_last_L,hidden_last_R],dim=-1) #[16, 1536] | |
| # apply dropout | |
| out = self.dropout(hidden_last_out) | |
| # output layer | |
| logits = self.output_layer(out) | |
| return logits | |
| def init_hidden(self, batch_size): | |
| weight = next(self.parameters()).data | |
| number = 1 | |
| if self.bidirectional: | |
| number = 2 | |
| if (USE_CUDA): | |
| hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda(), | |
| weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float().cuda() | |
| ) | |
| else: | |
| hidden = (weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float(), | |
| weight.new(self.n_layers*number, batch_size, self.hidden_dim).zero_().float() | |
| ) | |
| return hidden |