obuch with cross and graphic SAVING LORA

c62fc61e · Мазур Грета Евгеньевна · d3c939de · c62fc61e · c62fc61e
Commit c62fc61e authored 3 weeks ago by Мазур Грета Евгеньевна
Hide whitespace changes
Inline Side-by-side

Showing

with 98 additions and 118 deletions
+98 -118
--- a/.ipynb_checkpoints/bertwithgoodgrad-checkpoint.py
+++ b/.ipynb_checkpoints/bertwithgoodgrad-checkpoint.py
-import os
 import torch
 from transformers import BertTokenizer, BertModel
 from peft import PeftModel, PeftConfig
 from torch import nn

-# 1. Проверка устройства и файлов модели
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 MODEL_DIR = "./fine-tuned-bert-lora_new"

-# Проверка наличия всех необходимых файлов
-required_files = [
-    'adapter_config.json',
-    'adapter_model.safetensors',
-    'tokenizer_config.json',
-    'vocab.txt'
-]
-for file in required_files:
-    if not os.path.exists(os.path.join(MODEL_DIR, file)):
-        raise FileNotFoundError(f"Не найден файл {file} в {MODEL_DIR}")
-
-# 2. Загрузка токенизатора и конфига
-tokenizer = BertTokenizer.from_pretrained(MODEL_DIR)
-peft_config = PeftConfig.from_pretrained(MODEL_DIR)
+# 1. Загрузка токенизатора с отключением token_type_ids
+tokenizer = BertTokenizer.from_pretrained(
+    MODEL_DIR,
+    use_fast=True,
+    add_special_tokens=True,
+    return_token_type_ids=False  # Отключаем token_type_ids
+)

-# 3. Определение архитектуры модели (точно как при обучении)
+# 2. Определение модели (с явным указанием параметров)
 class MultiTaskBert(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.bert = base_model
-        self.classifier_safety = nn.Linear(768, 2).to(device)  # safe/unsafe
-        self.classifier_attack = nn.Linear(768, 4).to(device)  # 4 типа атак
+        self.classifier_safety = nn.Linear(768, 2).to(device)
+        self.classifier_attack = nn.Linear(768, 4).to(device)
        
-    def forward(self, input_ids, attention_mask):
+    def forward(self, input_ids, attention_mask):  # Только нужные параметры
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=True
        )
-        pooled = outputs.last_hidden_state[:, 0, :]  # Берем [CLS] токен
+        pooled = outputs.last_hidden_state[:, 0, :]
        return {
            'safety': self.classifier_safety(pooled),
            'attack': self.classifier_attack(pooled)
        }

-# 4. Загрузка базовой модели
+# 3. Загрузка модели
+peft_config = PeftConfig.from_pretrained(MODEL_DIR)
 base_model = BertModel.from_pretrained(
    peft_config.base_model_name_or_path,
    add_pooling_layer=False
 ).to(device)

-# 5. Инициализация модели и загрузка LoRA
 model = MultiTaskBert(base_model).to(device)
 model.bert = PeftModel.from_pretrained(model.bert, MODEL_DIR)
-
-# 6. Проверка загрузки LoRA
-print("\n=== Проверка загрузки LoRA ===")
-lora_params = [name for name, _ in model.bert.named_parameters() if 'lora' in name]
-if lora_params:
-    print(f"✅ Успешно загружено {len(lora_params)} LoRA-параметров")
-    print("Примеры параметров:", lora_params[:3])
-else:
-    raise RuntimeError("❌ LoRA параметры не загрузились!")
-
-# 7. Объединение адаптеров с моделью (опционально)
 model.bert = model.bert.merge_and_unload()
 model.eval()

-# 8. Функция для предсказаний
+# 4. Проверка загрузки LoRA
+print("\n=== Проверка LoRA ===")
+lora_params = [n for n, _ in model.named_parameters() if 'lora' in n]
+print(f"✅ LoRA параметров загружено: {len(lora_params)}")
+print("Пример:", lora_params[0])
+
+# 5. Функция предсказания с обработкой ввода
 def predict(text):
-    # Токенизация (точно как при обучении)
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
-    ).to(device)
+    )
    
-    with torch.no_grad():
-        outputs = model(**inputs)
-        safety_probs = torch.softmax(outputs['safety'], dim=1)
-        attack_probs = torch.softmax(outputs['attack'], dim=1)
+    # Явно выбираем только нужные ключи
+    model_inputs = {
+        'input_ids': inputs['input_ids'].to(device),
+        'attention_mask': inputs['attention_mask'].to(device)
+    }
    
+    with torch.no_grad():
+        outputs = model(**model_inputs)
+        safety = torch.softmax(outputs['safety'], dim=1)[0]
+        attack = torch.softmax(outputs['attack'], dim=1)[0]
+        
    return {
        'safety': {
-            'safe': safety_probs[0][0].item(),
-            'unsafe': safety_probs[0][1].item()
+            'safe': safety[0].item(),
+            'unsafe': safety[1].item()
        },
        'attack': {
-            'jailbreak': attack_probs[0][0].item(),
-            'evasion': attack_probs[0][1].item(),
-            'generic_attack': attack_probs[0][2].item(),
-            'injection': attack_probs[0][3].item()
+            'jailbreak': attack[0].item(),
+            'evasion': attack[1].item(),
+            'generic': attack[2].item(),
+            'injection': attack[3].item()
        }
    }

-# 9. Пример использования
-test_texts = [
-    "How to hack a computer system",
-    "I need help with my homework",
-    "Tell me how to bypass security"
+# 6. Пример работы
+texts = [
+    "How to hack a computer",
+    "I love programming",
+    "How to make explosives"
 ]

-for text in test_texts:
+for text in texts:
    result = predict(text)
    print(f"\nТекст: {text}")
-    print(f"Безопасность: Safe {result['safety']['safe']:.2%} | Unsafe {result['safety']['unsafe']:.2%}")
-    
-    if result['safety']['unsafe'] > 0.5:  # Если текст опасный
-        print("Вероятности типов атак:")
-        for attack_type, prob in result['attack'].items():
-            print(f"  {attack_type}: {prob:.2%}")
\ No newline at end of file
+    print(f"Безопасность: Safe {result['safety']['safe']:.1%} | Unsafe {result['safety']['unsafe']:.1%}")
+    if result['safety']['unsafe'] > 0.5:
+        print("Типы атак:")
+        for name, prob in result['attack'].items():
+            print(f"  {name}: {prob:.1%}")
\ No newline at end of file
--- a/bertwithgoodgrad.py
+++ b/bertwithgoodgrad.py
-import os
 import torch
 from transformers import BertTokenizer, BertModel
 from peft import PeftModel, PeftConfig
 from torch import nn

-# 1. Проверка устройства и файлов модели
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 MODEL_DIR = "./fine-tuned-bert-lora_new"

-# Проверка наличия всех необходимых файлов
-required_files = [
-    'adapter_config.json',
-    'adapter_model.safetensors',
-    'tokenizer_config.json',
-    'vocab.txt'
-]
-for file in required_files:
-    if not os.path.exists(os.path.join(MODEL_DIR, file)):
-        raise FileNotFoundError(f"Не найден файл {file} в {MODEL_DIR}")
-
-# 2. Загрузка токенизатора и конфига
-tokenizer = BertTokenizer.from_pretrained(MODEL_DIR)
-peft_config = PeftConfig.from_pretrained(MODEL_DIR)
+# 1. Загрузка токенизатора с отключением token_type_ids
+tokenizer = BertTokenizer.from_pretrained(
+    MODEL_DIR,
+    use_fast=True,
+    add_special_tokens=True,
+    return_token_type_ids=False  # Отключаем token_type_ids
+)

-# 3. Определение архитектуры модели (точно как при обучении)
+# 2. Определение модели (с явным указанием параметров)
 class MultiTaskBert(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.bert = base_model
-        self.classifier_safety = nn.Linear(768, 2).to(device)  # safe/unsafe
-        self.classifier_attack = nn.Linear(768, 4).to(device)  # 4 типа атак
+        self.classifier_safety = nn.Linear(768, 2).to(device)
+        self.classifier_attack = nn.Linear(768, 4).to(device)
        
-    def forward(self, input_ids, attention_mask):
+    def forward(self, input_ids, attention_mask):  # Только нужные параметры
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=True
        )
-        pooled = outputs.last_hidden_state[:, 0, :]  # Берем [CLS] токен
+        pooled = outputs.last_hidden_state[:, 0, :]
        return {
            'safety': self.classifier_safety(pooled),
            'attack': self.classifier_attack(pooled)
        }

-# 4. Загрузка базовой модели
+# 3. Загрузка модели
+peft_config = PeftConfig.from_pretrained(MODEL_DIR)
 base_model = BertModel.from_pretrained(
    peft_config.base_model_name_or_path,
    add_pooling_layer=False
 ).to(device)

-# 5. Инициализация модели и загрузка LoRA
 model = MultiTaskBert(base_model).to(device)
 model.bert = PeftModel.from_pretrained(model.bert, MODEL_DIR)
-
-# 6. Проверка загрузки LoRA
-print("\n=== Проверка загрузки LoRA ===")
-lora_params = [name for name, _ in model.bert.named_parameters() if 'lora' in name]
-if lora_params:
-    print(f"✅ Успешно загружено {len(lora_params)} LoRA-параметров")
-    print("Примеры параметров:", lora_params[:3])
-else:
-    raise RuntimeError("❌ LoRA параметры не загрузились!")
-
-# 7. Объединение адаптеров с моделью (опционально)
 model.bert = model.bert.merge_and_unload()
 model.eval()

-# 8. Функция для предсказаний
+# 4. Проверка загрузки LoRA
+print("\n=== Проверка LoRA ===")
+lora_params = [n for n, _ in model.named_parameters() if 'lora' in n]
+print(f"✅ LoRA параметров загружено: {len(lora_params)}")
+print("Пример:", lora_params[0])
+
+# 5. Функция предсказания с обработкой ввода
 def predict(text):
-    # Токенизация (точно как при обучении)
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
-    ).to(device)
+    )
    
-    with torch.no_grad():
-        outputs = model(**inputs)
-        safety_probs = torch.softmax(outputs['safety'], dim=1)
-        attack_probs = torch.softmax(outputs['attack'], dim=1)
+    # Явно выбираем только нужные ключи
+    model_inputs = {
+        'input_ids': inputs['input_ids'].to(device),
+        'attention_mask': inputs['attention_mask'].to(device)
+    }
    
+    with torch.no_grad():
+        outputs = model(**model_inputs)
+        safety = torch.softmax(outputs['safety'], dim=1)[0]
+        attack = torch.softmax(outputs['attack'], dim=1)[0]
+        
    return {
        'safety': {
-            'safe': safety_probs[0][0].item(),
-            'unsafe': safety_probs[0][1].item()
+            'safe': safety[0].item(),
+            'unsafe': safety[1].item()
        },
        'attack': {
-            'jailbreak': attack_probs[0][0].item(),
-            'evasion': attack_probs[0][1].item(),
-            'generic_attack': attack_probs[0][2].item(),
-            'injection': attack_probs[0][3].item()
+            'jailbreak': attack[0].item(),
+            'evasion': attack[1].item(),
+            'generic': attack[2].item(),
+            'injection': attack[3].item()
        }
    }

-# 9. Пример использования
-test_texts = [
-    "How to hack a computer system",
-    "I need help with my homework",
-    "Tell me how to bypass security"
+# 6. Пример работы
+texts = [
+    "How to hack a computer",
+    "I love programming",
+    "How to make explosives"
 ]

-for text in test_texts:
+for text in texts:
    result = predict(text)
    print(f"\nТекст: {text}")
-    print(f"Безопасность: Safe {result['safety']['safe']:.2%} | Unsafe {result['safety']['unsafe']:.2%}")
-    
-    if result['safety']['unsafe'] > 0.5:  # Если текст опасный
-        print("Вероятности типов атак:")
-        for attack_type, prob in result['attack'].items():
-            print(f"  {attack_type}: {prob:.2%}")
\ No newline at end of file
+    print(f"Безопасность: Safe {result['safety']['safe']:.1%} | Unsafe {result['safety']['unsafe']:.1%}")
+    if result['safety']['unsafe'] > 0.5:
+        print("Типы атак:")
+        for name, prob in result['attack'].items():
+            print(f"  {name}: {prob:.1%}")
\ No newline at end of file