Commit dbc14a60 authored by Мазур Грета Евгеньевна's avatar Мазур Грета Евгеньевна
Browse files

improva grad and save token hsvboauyfb

parent 8cf3a5c9
No related merge requests found
Showing with 36 additions and 4 deletions
+36 -4
......@@ -11,6 +11,9 @@ import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight
# Определяем устройство (GPU или CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Пути для сохранения токенизированных данных
TOKENIZED_DATA_DIR = "./tokenized_data"
TRAIN_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "train")
......@@ -74,6 +77,10 @@ classes_task2 = np.unique(train_data[train_data['safety'] == 'unsafe']['type'])
class_weights_task2 = compute_class_weight('balanced', classes=classes_task2, y=train_data[train_data['safety'] == 'unsafe']['type'])
class_weights_dict_task2 = {i: weight for i, weight in enumerate(class_weights_task2)}
# Перенос весов на устройство
class_weights_task1_tensor = torch.tensor(list(class_weights_dict_task1.values()), dtype=torch.float32).to(device)
class_weights_task2_tensor = torch.tensor(list(class_weights_dict_task2.values()), dtype=torch.float32).to(device)
# Модель с балансировкой классов
class MultiTaskBert(BertPreTrainedModel):
def __init__(self, config):
......@@ -93,6 +100,14 @@ class MultiTaskBert(BertPreTrainedModel):
return_dict=None,
**kwargs, # Поглощаем все лишние аргументы
):
# Переносим входные данные на устройство
if input_ids is not None:
input_ids = input_ids.to(device)
if attention_mask is not None:
attention_mask = attention_mask.to(device)
if labels is not None:
labels = labels.to(device)
# Если переданы inputs_embeds, используем их вместо input_ids
if inputs_embeds is not None:
outputs = self.bert(
......@@ -125,8 +140,8 @@ class MultiTaskBert(BertPreTrainedModel):
labels_safety, labels_attack = labels[:, 0], labels[:, 1]
# Вычисляем потери для обеих задач с учётом весов классов
loss_fct_safety = nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights_dict_task1.values()), dtype=torch.float32))
loss_fct_attack = nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights_dict_task2.values()), dtype=torch.float32))
loss_fct_safety = nn.CrossEntropyLoss(weight=class_weights_task1_tensor)
loss_fct_attack = nn.CrossEntropyLoss(weight=class_weights_task2_tensor)
loss_safety = loss_fct_safety(logits_safety, labels_safety)
loss_attack = loss_fct_attack(logits_attack, labels_attack)
......@@ -142,6 +157,7 @@ class MultiTaskBert(BertPreTrainedModel):
# Загрузка модели
model = MultiTaskBert.from_pretrained('bert-base-uncased')
model = model.to(device) # Переносим модель на устройство
# Конфигурация LoRA
lora_config = LoraConfig(
......
......@@ -11,6 +11,9 @@ import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight
# Определяем устройство (GPU или CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Пути для сохранения токенизированных данных
TOKENIZED_DATA_DIR = "./tokenized_data"
TRAIN_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "train")
......@@ -74,6 +77,10 @@ classes_task2 = np.unique(train_data[train_data['safety'] == 'unsafe']['type'])
class_weights_task2 = compute_class_weight('balanced', classes=classes_task2, y=train_data[train_data['safety'] == 'unsafe']['type'])
class_weights_dict_task2 = {i: weight for i, weight in enumerate(class_weights_task2)}
# Перенос весов на устройство
class_weights_task1_tensor = torch.tensor(list(class_weights_dict_task1.values()), dtype=torch.float32).to(device)
class_weights_task2_tensor = torch.tensor(list(class_weights_dict_task2.values()), dtype=torch.float32).to(device)
# Модель с балансировкой классов
class MultiTaskBert(BertPreTrainedModel):
def __init__(self, config):
......@@ -93,6 +100,14 @@ class MultiTaskBert(BertPreTrainedModel):
return_dict=None,
**kwargs, # Поглощаем все лишние аргументы
):
# Переносим входные данные на устройство
if input_ids is not None:
input_ids = input_ids.to(device)
if attention_mask is not None:
attention_mask = attention_mask.to(device)
if labels is not None:
labels = labels.to(device)
# Если переданы inputs_embeds, используем их вместо input_ids
if inputs_embeds is not None:
outputs = self.bert(
......@@ -125,8 +140,8 @@ class MultiTaskBert(BertPreTrainedModel):
labels_safety, labels_attack = labels[:, 0], labels[:, 1]
# Вычисляем потери для обеих задач с учётом весов классов
loss_fct_safety = nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights_dict_task1.values()), dtype=torch.float32))
loss_fct_attack = nn.CrossEntropyLoss(weight=torch.tensor(list(class_weights_dict_task2.values()), dtype=torch.float32))
loss_fct_safety = nn.CrossEntropyLoss(weight=class_weights_task1_tensor)
loss_fct_attack = nn.CrossEntropyLoss(weight=class_weights_task2_tensor)
loss_safety = loss_fct_safety(logits_safety, labels_safety)
loss_attack = loss_fct_attack(logits_attack, labels_attack)
......@@ -142,6 +157,7 @@ class MultiTaskBert(BertPreTrainedModel):
# Загрузка модели
model = MultiTaskBert.from_pretrained('bert-base-uncased')
model = model.to(device) # Переносим модель на устройство
# Конфигурация LoRA
lora_config = LoraConfig(
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment