Commit 042d22bd authored by Мазур Грета Евгеньевна's avatar Мазур Грета Евгеньевна
Browse files

training bert

parent 8e3f3f8e
No related merge requests found
Showing with 434 additions and 0 deletions
+434 -0
import pandas as pd
from sklearn.model_selection import train_test_split
# Загрузка данных
data = pd.read_csv('all_dataset.csv')
# Разделение данных на train, validation и test
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42)
from datasets import Dataset
# Преобразование данных в формат Dataset
train_dataset = Dataset.from_pandas(train_data)
val_dataset = Dataset.from_pandas(val_data)
test_dataset = Dataset.from_pandas(test_data)
from transformers import BertTokenizer
# Загрузка токенизатора
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Функция для токенизации
# def preprocess_function(examples):
# return tokenizer(examples['prompt'], truncation=True, padding=True, max_length=512)
def preprocess_function(examples):
# Токенизация текста
tokenized = tokenizer(examples['prompt'], truncation=True, padding=True, max_length=512)
# Подготовка меток
labels_safety = [0 if label == "safe" else 1 for label in examples['safety']]
labels_attack = [0 if label == "jailbreak" else 1 if label == "evasion" else 2 if label == "generic attack" else 3 for label in examples['type']]
# Объединяем метки в один тензор
tokenized['labels'] = list(zip(labels_safety, labels_attack))
return tokenized
# Токенизация данных
train_dataset = train_dataset.map(preprocess_function, batched=True)
val_dataset = val_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)
from transformers import BertPreTrainedModel, BertModel
from torch import nn
import torch
class MultiTaskBert(BertPreTrainedModel):
def __init__(self, config):
super().__init__(config)
self.bert = BertModel(config)
self.classifier_safety = nn.Linear(config.hidden_size, 2) # safe/unsafe
self.classifier_attack = nn.Linear(config.hidden_size, 4) # jailbreak, evasion, generic attack, injection
def forward(
self,
input_ids=None,
attention_mask=None,
inputs_embeds=None,
labels=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs, # Поглощаем все лишние аргументы
):
# Если переданы inputs_embeds, используем их вместо input_ids
if inputs_embeds is not None:
outputs = self.bert(
inputs_embeds=inputs_embeds,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
else:
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
pooled_output = outputs.last_hidden_state[:, 0, :] # Используем [CLS] токен
# Классификация safe/unsafe
logits_safety = self.classifier_safety(pooled_output)
# Классификация типов атак
logits_attack = self.classifier_attack(pooled_output)
loss = None
if labels is not None:
# Разделяем labels на labels_safety и labels_attack
labels_safety, labels_attack = labels[:, 0], labels[:, 1]
# Вычисляем потери для обеих задач
loss_fct = nn.CrossEntropyLoss()
loss_safety = loss_fct(logits_safety, labels_safety)
loss_attack = loss_fct(logits_attack, labels_attack)
loss = loss_safety + loss_attack # Общий loss
return {
'logits_safety': logits_safety,
'logits_attack': logits_attack,
'loss': loss,
'attentions': outputs.attentions if output_attentions else None,
'hidden_states': outputs.hidden_states if output_hidden_states else None,
}
# Загрузка модели
model = MultiTaskBert.from_pretrained('bert-base-uncased')
from peft import get_peft_model, LoraConfig, TaskType
# Конфигурация LoRA
lora_config = LoraConfig(
task_type=TaskType.SEQ_CLS,
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["query", "value"],
)
# Добавление LoRA к модели
model = get_peft_model(model, lora_config)
# Вывод информации о trainable параметрах
model.print_trainable_parameters()
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
# Функция для вычисления метрик
def compute_metrics(p):
preds_safety = np.argmax(p.predictions[0], axis=1)
preds_attack = np.argmax(p.predictions[1], axis=1)
labels_safety = p.label_ids[0]
labels_attack = p.label_ids[1]
# Метрики для safe/unsafe
f1_safety = f1_score(labels_safety, preds_safety, average='weighted')
precision_safety = precision_score(labels_safety, preds_safety, average='weighted')
recall_safety = recall_score(labels_safety, preds_safety, average='weighted')
# Метрики для типов атак
f1_attack = f1_score(labels_attack, preds_attack, average='weighted')
precision_attack = precision_score(labels_attack, preds_attack, average='weighted')
recall_attack = recall_score(labels_attack, preds_attack, average='weighted')
return {
'f1_safety': f1_safety,
'precision_safety': precision_safety,
'recall_safety': recall_safety,
'f1_attack': f1_attack,
'precision_attack': precision_attack,
'recall_attack': recall_attack,
}
# Аргументы для обучения
training_args = TrainingArguments(
output_dir='./results',
eval_strategy="epoch",
save_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=10,
save_total_limit=2,
load_best_model_at_end=True,
metric_for_best_model="f1_safety",
greater_is_better=True,
label_names=["labels_safety", "labels_attack"],
report_to="none", # Отключаем W&B
)
# Создание Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics,
)
# Обучение модели
trainer.train()
# Оценка модели на тестовом наборе
results = trainer.evaluate(test_dataset)
print("Fine-tuned Model Evaluation Results:")
print(results)
# Сохранение модели
model.save_pretrained('./fine-tuned-bert-lora-multi-task')
tokenizer.save_pretrained('./fine-tuned-bert-lora-multi-task')
\ No newline at end of file
trybert.py 0 → 100644
import pandas as pd
from sklearn.model_selection import train_test_split
# Загрузка данных
data = pd.read_csv('all_dataset.csv')
# Разделение данных на train, validation и test
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42)
from datasets import Dataset
# Преобразование данных в формат Dataset
train_dataset = Dataset.from_pandas(train_data)
val_dataset = Dataset.from_pandas(val_data)
test_dataset = Dataset.from_pandas(test_data)
from transformers import BertTokenizer
# Загрузка токенизатора
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Функция для токенизации
# def preprocess_function(examples):
# return tokenizer(examples['prompt'], truncation=True, padding=True, max_length=512)
def preprocess_function(examples):
# Токенизация текста
tokenized = tokenizer(examples['prompt'], truncation=True, padding=True, max_length=512)
# Подготовка меток
labels_safety = [0 if label == "safe" else 1 for label in examples['safety']]
labels_attack = [0 if label == "jailbreak" else 1 if label == "evasion" else 2 if label == "generic attack" else 3 for label in examples['type']]
# Объединяем метки в один тензор
tokenized['labels'] = list(zip(labels_safety, labels_attack))
return tokenized
# Токенизация данных
train_dataset = train_dataset.map(preprocess_function, batched=True)
val_dataset = val_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)
from transformers import BertPreTrainedModel, BertModel
from torch import nn
import torch
class MultiTaskBert(BertPreTrainedModel):
def __init__(self, config):
super().__init__(config)
self.bert = BertModel(config)
self.classifier_safety = nn.Linear(config.hidden_size, 2) # safe/unsafe
self.classifier_attack = nn.Linear(config.hidden_size, 4) # jailbreak, evasion, generic attack, injection
def forward(
self,
input_ids=None,
attention_mask=None,
inputs_embeds=None,
labels=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
**kwargs, # Поглощаем все лишние аргументы
):
# Если переданы inputs_embeds, используем их вместо input_ids
if inputs_embeds is not None:
outputs = self.bert(
inputs_embeds=inputs_embeds,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
else:
outputs = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
pooled_output = outputs.last_hidden_state[:, 0, :] # Используем [CLS] токен
# Классификация safe/unsafe
logits_safety = self.classifier_safety(pooled_output)
# Классификация типов атак
logits_attack = self.classifier_attack(pooled_output)
loss = None
if labels is not None:
# Разделяем labels на labels_safety и labels_attack
labels_safety, labels_attack = labels[:, 0], labels[:, 1]
# Вычисляем потери для обеих задач
loss_fct = nn.CrossEntropyLoss()
loss_safety = loss_fct(logits_safety, labels_safety)
loss_attack = loss_fct(logits_attack, labels_attack)
loss = loss_safety + loss_attack # Общий loss
return {
'logits_safety': logits_safety,
'logits_attack': logits_attack,
'loss': loss,
'attentions': outputs.attentions if output_attentions else None,
'hidden_states': outputs.hidden_states if output_hidden_states else None,
}
# Загрузка модели
model = MultiTaskBert.from_pretrained('bert-base-uncased')
from peft import get_peft_model, LoraConfig, TaskType
# Конфигурация LoRA
lora_config = LoraConfig(
task_type=TaskType.SEQ_CLS,
r=8,
lora_alpha=32,
lora_dropout=0.1,
target_modules=["query", "value"],
)
# Добавление LoRA к модели
model = get_peft_model(model, lora_config)
# Вывод информации о trainable параметрах
model.print_trainable_parameters()
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score
# Функция для вычисления метрик
def compute_metrics(p):
preds_safety = np.argmax(p.predictions[0], axis=1)
preds_attack = np.argmax(p.predictions[1], axis=1)
labels_safety = p.label_ids[0]
labels_attack = p.label_ids[1]
# Метрики для safe/unsafe
f1_safety = f1_score(labels_safety, preds_safety, average='weighted')
precision_safety = precision_score(labels_safety, preds_safety, average='weighted')
recall_safety = recall_score(labels_safety, preds_safety, average='weighted')
# Метрики для типов атак
f1_attack = f1_score(labels_attack, preds_attack, average='weighted')
precision_attack = precision_score(labels_attack, preds_attack, average='weighted')
recall_attack = recall_score(labels_attack, preds_attack, average='weighted')
return {
'f1_safety': f1_safety,
'precision_safety': precision_safety,
'recall_safety': recall_safety,
'f1_attack': f1_attack,
'precision_attack': precision_attack,
'recall_attack': recall_attack,
}
# Аргументы для обучения
training_args = TrainingArguments(
output_dir='./results',
eval_strategy="epoch",
save_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=10,
save_total_limit=2,
load_best_model_at_end=True,
metric_for_best_model="f1_safety",
greater_is_better=True,
label_names=["labels_safety", "labels_attack"],
report_to="none", # Отключаем W&B
)
# Создание Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics,
)
# Обучение модели
trainer.train()
# Оценка модели на тестовом наборе
results = trainer.evaluate(test_dataset)
print("Fine-tuned Model Evaluation Results:")
print(results)
# Сохранение модели
model.save_pretrained('./fine-tuned-bert-lora-multi-task')
tokenizer.save_pretrained('./fine-tuned-bert-lora-multi-task')
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment