| | import json |
| | import torch |
| | import torch.nn as nn |
| | from dataclasses import dataclass |
| | from preprocessing import preprocess_single_string |
| |
|
| | with open('model_data/vocab_kinopoisk_lstm.json', 'r') as file: |
| | vocab_to_int = json.load(file) |
| |
|
| | @dataclass |
| | class ConfigRNN: |
| | vocab_size: int |
| | device : str |
| | n_layers : int |
| | embedding_dim : int |
| | hidden_size : int |
| | seq_len : int |
| | bidirectional : bool or int |
| |
|
| | net_config = ConfigRNN( |
| | vocab_size = len(vocab_to_int)+1, |
| | device='cpu', |
| | n_layers=3, |
| | embedding_dim=64, |
| | hidden_size=64, |
| | seq_len = 100, |
| | bidirectional=False |
| | ) |
| |
|
| |
|
| | class LSTMClassifier(nn.Module): |
| | def __init__(self, rnn_conf = net_config) -> None: |
| | super().__init__() |
| |
|
| | self.embedding_dim = rnn_conf.embedding_dim |
| | self.hidden_size = rnn_conf.hidden_size |
| | self.bidirectional = rnn_conf.bidirectional |
| | self.n_layers = rnn_conf.n_layers |
| | |
| | self.embedding = nn.Embedding(rnn_conf.vocab_size, self.embedding_dim) |
| | self.lstm = nn.LSTM( |
| | input_size = self.embedding_dim, |
| | hidden_size = self.hidden_size, |
| | bidirectional = self.bidirectional, |
| | batch_first = True, |
| | num_layers = self.n_layers |
| | ) |
| | self.bidirect_factor = 2 if self.bidirectional else 1 |
| | self.clf = nn.Sequential( |
| | nn.Linear(self.hidden_size * self.bidirect_factor, 32), |
| | nn.Tanh(), |
| | nn.Dropout(), |
| | nn.Linear(32, 3) |
| | ) |
| | |
| | def model_description(self): |
| | direction = 'bidirect' if self.bidirectional else 'onedirect' |
| | return f'lstm_{direction}_{self.n_layers}' |
| | |
| |
|
| | def forward(self, x: torch.Tensor): |
| | embeddings = self.embedding(x) |
| | out, _ = self.lstm(embeddings) |
| | out = out[:, -1, :] |
| | out = self.clf(out.squeeze()) |
| | return out |
| |
|
| |
|
| | def load_lstm_model(): |
| | model = LSTMClassifier() |
| | model.load_state_dict(torch.load('model_data/lstm_model.pth')) |
| | model.eval() |
| | return model |
| | model = load_lstm_model() |
| |
|
| |
|
| | def predict_review(review_text, model=model, net_config=net_config, vocab_to_int=vocab_to_int): |
| | sample = preprocess_single_string(review_text, net_config.seq_len, vocab_to_int) |
| | model.eval() |
| | with torch.no_grad(): |
| | output = model(sample.unsqueeze(0)).to(net_config.device) |
| | if output.dim() == 1: |
| | output = output.unsqueeze(0) |
| | _, predicted_class = torch.max(output, dim=1) |
| | if predicted_class.item() == 0: |
| | return "Это положительный комментарий! Хорошо, что тебе понравился этот фильм! Можешь перейти в раздел с моделью GPT2 и обсудить с ней фильм!" |
| | elif predicted_class.item() == 1: |
| | return "Скорее всего... это комментарий нейтрального характера.. какой-то ты скучный..." |
| | else: |
| | return "Ты что такой токсик? Будь сдержанее, не понравился фильм - пройди мимо и не порьт авторам настроение, они же старались!" |