From b01f66a9ba6ff997f92ef6cbe87b08197bd32663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D0=B7=D1=83=D1=80=20=D0=93=D1=80=D0=B5=D1=82?= =?UTF-8?q?=D0=B0=20=D0=95=D0=B2=D0=B3=D0=B5=D0=BD=D1=8C=D0=B5=D0=B2=D0=BD?= =?UTF-8?q?=D0=B0?= <gemazur_1@edu.hse.ru> Date: Mon, 24 Mar 2025 21:17:57 +0300 Subject: [PATCH] obuch with cross and graphic --- .ipynb_checkpoints/withgraphic-checkpoint.py | 167 +++++++++++++++++++ withgraphic.py | 167 +++++++++++++++++++ 2 files changed, 334 insertions(+) create mode 100644 .ipynb_checkpoints/withgraphic-checkpoint.py create mode 100644 withgraphic.py diff --git a/.ipynb_checkpoints/withgraphic-checkpoint.py b/.ipynb_checkpoints/withgraphic-checkpoint.py new file mode 100644 index 0000000..2b0c12d --- /dev/null +++ b/.ipynb_checkpoints/withgraphic-checkpoint.py @@ -0,0 +1,167 @@ +import os +import pandas as pd +import torch +import numpy as np +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.metrics import f1_score, precision_score, recall_score +from sklearn.utils.class_weight import compute_class_weight +from datasets import Dataset, load_from_disk +from transformers import BertTokenizer, BertPreTrainedModel, BertModel +from torch import nn +from transformers import Trainer, TrainingArguments +from peft import get_peft_model, LoraConfig, TaskType + +# Очистка кеша +torch.cuda.empty_cache() + +# Определяем устройство (GPU или CPU) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Пути для сохранения токенизированных данных +TOKENIZED_DATA_DIR = "./tokenized_data" +TRAIN_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "train") +VAL_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "val") +TEST_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "test") + +# Загрузка данных +data = pd.read_csv('all_dataset.csv') + +# Разделение данных на train, validation и test +train_data, test_data = train_test_split(data, test_size=0.2, random_state=42) +train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42) + +# Преобразование данных в Dataset +train_dataset = Dataset.from_pandas(train_data) +val_dataset = Dataset.from_pandas(val_data) +test_dataset = Dataset.from_pandas(test_data) + +# Загрузка токенизатора +tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + +# Функция токенизации +def preprocess_function(examples): + tokenized = tokenizer(examples['prompt'], truncation=True, padding=True, max_length=512) + + labels_safety = [0 if label == "safe" else 1 for label in examples['safety']] + labels_attack = [0 if label == "jailbreak" else 1 if label == "evasion" else 2 if label == "generic attack" else 3 for label in examples['type']] + + tokenized['labels'] = list(zip(labels_safety, labels_attack)) + return tokenized + +# Токенизация данных (если не сохранены, то создаем) +if os.path.exists(TRAIN_TOKENIZED_PATH) and os.path.exists(VAL_TOKENIZED_PATH) and os.path.exists(TEST_TOKENIZED_PATH): + train_dataset = load_from_disk(TRAIN_TOKENIZED_PATH) + val_dataset = load_from_disk(VAL_TOKENIZED_PATH) + test_dataset = load_from_disk(TEST_TOKENIZED_PATH) +else: + train_dataset = train_dataset.map(preprocess_function, batched=True) + val_dataset = val_dataset.map(preprocess_function, batched=True) + test_dataset = test_dataset.map(preprocess_function, batched=True) + + os.makedirs(TOKENIZED_DATA_DIR, exist_ok=True) + train_dataset.save_to_disk(TRAIN_TOKENIZED_PATH) + val_dataset.save_to_disk(VAL_TOKENIZED_PATH) + test_dataset.save_to_disk(TEST_TOKENIZED_PATH) + +# Вычисление весов классов +class_weights_task1 = compute_class_weight('balanced', classes=np.unique(train_data['safety']), y=train_data['safety']) +class_weights_task2 = compute_class_weight('balanced', classes=np.unique(train_data[train_data['safety'] == 'unsafe']['type']), y=train_data[train_data['safety'] == 'unsafe']['type']) + +# Перевод весов в тензоры +class_weights_task1_tensor = torch.tensor(class_weights_task1, dtype=torch.float32).to(device) +class_weights_task2_tensor = torch.tensor(class_weights_task2, dtype=torch.float32).to(device) + +# Определение модели +class MultiTaskBert(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.bert = BertModel(config) + self.classifier_safety = nn.Linear(config.hidden_size, 2) + self.classifier_attack = nn.Linear(config.hidden_size, 4) + + def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs): + input_ids, attention_mask, labels = map(lambda x: x.to(device) if x is not None else None, [input_ids, attention_mask, labels]) + outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True) + pooled_output = outputs.last_hidden_state[:, 0, :] + + logits_safety = self.classifier_safety(pooled_output) + logits_attack = self.classifier_attack(pooled_output) + + loss = None + if labels is not None: + labels_safety, labels_attack = labels[:, 0], labels[:, 1] + loss_safety = nn.CrossEntropyLoss(weight=class_weights_task1_tensor)(logits_safety, labels_safety) + loss_attack = nn.CrossEntropyLoss(weight=class_weights_task2_tensor)(logits_attack, labels_attack) + loss = loss_safety + loss_attack + + return {'logits_safety': logits_safety, 'logits_attack': logits_attack, 'loss': loss} + +# Создание модели +model = MultiTaskBert.from_pretrained('bert-base-uncased').to(device) + +# Настройка LoRA +lora_config = LoraConfig(task_type=TaskType.SEQ_CLS, r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "value"]) +model = get_peft_model(model, lora_config) + +# Функция вычисления метрик +def compute_metrics(p): + preds_safety = np.argmax(p.predictions[0], axis=1) + preds_attack = np.argmax(p.predictions[1], axis=1) + labels_safety, labels_attack = p.label_ids[:, 0], p.label_ids[:, 1] + + return { + 'f1_safety': f1_score(labels_safety, preds_safety, average='weighted'), + 'precision_safety': precision_score(labels_safety, preds_safety, average='weighted'), + 'recall_safety': recall_score(labels_safety, preds_safety, average='weighted'), + 'f1_attack': f1_score(labels_attack, preds_attack, average='weighted'), + 'precision_attack': precision_score(labels_attack, preds_attack, average='weighted'), + 'recall_attack': recall_score(labels_attack, preds_attack, average='weighted'), + } + +# Аргументы обучения +training_args = TrainingArguments( + output_dir='./results', + evaluation_strategy="epoch", + save_strategy="epoch", + learning_rate=3e-5, + per_device_train_batch_size=16, + per_device_eval_batch_size=16, + num_train_epochs=5, + weight_decay=0.01, + logging_dir='./logs', + logging_steps=10, + save_total_limit=2, + load_best_model_at_end=True, + metric_for_best_model="f1_safety", + greater_is_better=True, + fp16=True, + max_grad_norm=1.0, + warmup_steps=100, + report_to="none", +) + +# Обучение +trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, compute_metrics=compute_metrics) +trainer.train() + +# Оценка +val_results = trainer.evaluate(val_dataset) +test_results = trainer.evaluate(test_dataset) + +print("Validation Results:", val_results) +print("Test Results:", test_results) + +# График потерь +logs = trainer.state.log_history +train_loss = [log["loss"] for log in logs if "loss" in log] +val_loss = [log["eval_loss"] for log in logs if "eval_loss" in log] + +plt.plot(train_loss, label="Train Loss") +plt.plot(val_loss, label="Validation Loss") +plt.legend() +plt.show() + +# Сохранение модели +model.save_pretrained('./fine-tuned-bert-lora') +tokenizer.save_pretrained('./fine-tuned-bert-lora') \ No newline at end of file diff --git a/withgraphic.py b/withgraphic.py new file mode 100644 index 0000000..2b0c12d --- /dev/null +++ b/withgraphic.py @@ -0,0 +1,167 @@ +import os +import pandas as pd +import torch +import numpy as np +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.metrics import f1_score, precision_score, recall_score +from sklearn.utils.class_weight import compute_class_weight +from datasets import Dataset, load_from_disk +from transformers import BertTokenizer, BertPreTrainedModel, BertModel +from torch import nn +from transformers import Trainer, TrainingArguments +from peft import get_peft_model, LoraConfig, TaskType + +# Очистка кеша +torch.cuda.empty_cache() + +# Определяем устройство (GPU или CPU) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Пути для сохранения токенизированных данных +TOKENIZED_DATA_DIR = "./tokenized_data" +TRAIN_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "train") +VAL_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "val") +TEST_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "test") + +# Загрузка данных +data = pd.read_csv('all_dataset.csv') + +# Разделение данных на train, validation и test +train_data, test_data = train_test_split(data, test_size=0.2, random_state=42) +train_data, val_data = train_test_split(train_data, test_size=0.1, random_state=42) + +# Преобразование данных в Dataset +train_dataset = Dataset.from_pandas(train_data) +val_dataset = Dataset.from_pandas(val_data) +test_dataset = Dataset.from_pandas(test_data) + +# Загрузка токенизатора +tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') + +# Функция токенизации +def preprocess_function(examples): + tokenized = tokenizer(examples['prompt'], truncation=True, padding=True, max_length=512) + + labels_safety = [0 if label == "safe" else 1 for label in examples['safety']] + labels_attack = [0 if label == "jailbreak" else 1 if label == "evasion" else 2 if label == "generic attack" else 3 for label in examples['type']] + + tokenized['labels'] = list(zip(labels_safety, labels_attack)) + return tokenized + +# Токенизация данных (если не сохранены, то создаем) +if os.path.exists(TRAIN_TOKENIZED_PATH) and os.path.exists(VAL_TOKENIZED_PATH) and os.path.exists(TEST_TOKENIZED_PATH): + train_dataset = load_from_disk(TRAIN_TOKENIZED_PATH) + val_dataset = load_from_disk(VAL_TOKENIZED_PATH) + test_dataset = load_from_disk(TEST_TOKENIZED_PATH) +else: + train_dataset = train_dataset.map(preprocess_function, batched=True) + val_dataset = val_dataset.map(preprocess_function, batched=True) + test_dataset = test_dataset.map(preprocess_function, batched=True) + + os.makedirs(TOKENIZED_DATA_DIR, exist_ok=True) + train_dataset.save_to_disk(TRAIN_TOKENIZED_PATH) + val_dataset.save_to_disk(VAL_TOKENIZED_PATH) + test_dataset.save_to_disk(TEST_TOKENIZED_PATH) + +# Вычисление весов классов +class_weights_task1 = compute_class_weight('balanced', classes=np.unique(train_data['safety']), y=train_data['safety']) +class_weights_task2 = compute_class_weight('balanced', classes=np.unique(train_data[train_data['safety'] == 'unsafe']['type']), y=train_data[train_data['safety'] == 'unsafe']['type']) + +# Перевод весов в тензоры +class_weights_task1_tensor = torch.tensor(class_weights_task1, dtype=torch.float32).to(device) +class_weights_task2_tensor = torch.tensor(class_weights_task2, dtype=torch.float32).to(device) + +# Определение модели +class MultiTaskBert(BertPreTrainedModel): + def __init__(self, config): + super().__init__(config) + self.bert = BertModel(config) + self.classifier_safety = nn.Linear(config.hidden_size, 2) + self.classifier_attack = nn.Linear(config.hidden_size, 4) + + def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs): + input_ids, attention_mask, labels = map(lambda x: x.to(device) if x is not None else None, [input_ids, attention_mask, labels]) + outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True) + pooled_output = outputs.last_hidden_state[:, 0, :] + + logits_safety = self.classifier_safety(pooled_output) + logits_attack = self.classifier_attack(pooled_output) + + loss = None + if labels is not None: + labels_safety, labels_attack = labels[:, 0], labels[:, 1] + loss_safety = nn.CrossEntropyLoss(weight=class_weights_task1_tensor)(logits_safety, labels_safety) + loss_attack = nn.CrossEntropyLoss(weight=class_weights_task2_tensor)(logits_attack, labels_attack) + loss = loss_safety + loss_attack + + return {'logits_safety': logits_safety, 'logits_attack': logits_attack, 'loss': loss} + +# Создание модели +model = MultiTaskBert.from_pretrained('bert-base-uncased').to(device) + +# Настройка LoRA +lora_config = LoraConfig(task_type=TaskType.SEQ_CLS, r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["query", "value"]) +model = get_peft_model(model, lora_config) + +# Функция вычисления метрик +def compute_metrics(p): + preds_safety = np.argmax(p.predictions[0], axis=1) + preds_attack = np.argmax(p.predictions[1], axis=1) + labels_safety, labels_attack = p.label_ids[:, 0], p.label_ids[:, 1] + + return { + 'f1_safety': f1_score(labels_safety, preds_safety, average='weighted'), + 'precision_safety': precision_score(labels_safety, preds_safety, average='weighted'), + 'recall_safety': recall_score(labels_safety, preds_safety, average='weighted'), + 'f1_attack': f1_score(labels_attack, preds_attack, average='weighted'), + 'precision_attack': precision_score(labels_attack, preds_attack, average='weighted'), + 'recall_attack': recall_score(labels_attack, preds_attack, average='weighted'), + } + +# Аргументы обучения +training_args = TrainingArguments( + output_dir='./results', + evaluation_strategy="epoch", + save_strategy="epoch", + learning_rate=3e-5, + per_device_train_batch_size=16, + per_device_eval_batch_size=16, + num_train_epochs=5, + weight_decay=0.01, + logging_dir='./logs', + logging_steps=10, + save_total_limit=2, + load_best_model_at_end=True, + metric_for_best_model="f1_safety", + greater_is_better=True, + fp16=True, + max_grad_norm=1.0, + warmup_steps=100, + report_to="none", +) + +# Обучение +trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, compute_metrics=compute_metrics) +trainer.train() + +# Оценка +val_results = trainer.evaluate(val_dataset) +test_results = trainer.evaluate(test_dataset) + +print("Validation Results:", val_results) +print("Test Results:", test_results) + +# График потерь +logs = trainer.state.log_history +train_loss = [log["loss"] for log in logs if "loss" in log] +val_loss = [log["eval_loss"] for log in logs if "eval_loss" in log] + +plt.plot(train_loss, label="Train Loss") +plt.plot(val_loss, label="Validation Loss") +plt.legend() +plt.show() + +# Сохранение модели +model.save_pretrained('./fine-tuned-bert-lora') +tokenizer.save_pretrained('./fine-tuned-bert-lora') \ No newline at end of file -- GitLab