From 922ea4aaa066b928fd28cd1aad8b3795ab0ab801 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D0=B7=D1=83=D1=80=20=D0=93=D1=80=D0=B5=D1=82?=
 =?UTF-8?q?=D0=B0=20=D0=95=D0=B2=D0=B3=D0=B5=D0=BD=D1=8C=D0=B5=D0=B2=D0=BD?=
 =?UTF-8?q?=D0=B0?= <gemazur_1@edu.hse.ru>
Date: Tue, 25 Mar 2025 22:16:51 +0300
Subject: [PATCH] micro zapusk no cross

---
 .ipynb_checkpoints/trytoubload-checkpoint.py | 79 ++++++++++++++++++++
 trytoubload.py                               | 79 ++++++++++++++++++++
 2 files changed, 158 insertions(+)
 create mode 100644 .ipynb_checkpoints/trytoubload-checkpoint.py
 create mode 100644 trytoubload.py

diff --git a/.ipynb_checkpoints/trytoubload-checkpoint.py b/.ipynb_checkpoints/trytoubload-checkpoint.py
new file mode 100644
index 0000000..ccb4fbc
--- /dev/null
+++ b/.ipynb_checkpoints/trytoubload-checkpoint.py
@@ -0,0 +1,79 @@
+from transformers import BertTokenizer, BertForSequenceClassification
+from peft import PeftModel
+import torch
+from micro_no_cross import MultiTaskBert
+# Пути к сохранённой модели
+# BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base"
+# LORA_PATH = "./micro_no_cross_fine_tuned/lora"
+
+BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base2"
+LORA_PATH = "./micro_no_cross_fine_tuned/lora2"
+
+# Загружаем токенизатор
+tokenizer = BertTokenizer.from_pretrained("./micro_no_cross_fine_tuned2")
+
+# # Загружаем базовую модель
+# base_model = BertForSequenceClassification.from_pretrained(BASE_MODEL_PATH)
+
+# # Загружаем LoRA-адаптацию
+# model = PeftModel.from_pretrained(base_model, LORA_PATH)
+
+
+
+# base_model = MultiTaskBert.from_pretrained('bert-base-uncased').to(device)
+# print("Загружена базовая")
+
+# model = PeftModel.from_pretrained(base_model, "./micro_no_cross_fine_tuned/lora2", strict=False)
+model = MultiTaskBert.from_pretrained("./micro_no_cross_fine_tuned/lora2")
+
+print("Загружена с лора")
+
+# Переводим модель в режим оценки
+model.eval()
+
+
+
+
+def predict(text):
+    # Токенизация (убедимся, что не передаем labels)
+    model.eval()
+    inputs = tokenizer(
+        text,
+        return_tensors="pt",
+        truncation=True,
+        padding=True,
+        max_length=512
+    ).to(device)
+    
+    # Удаляем ненужные ключи
+    inputs.pop('token_type_ids', None)
+    
+    with torch.no_grad():
+        outputs = model(**inputs)
+        safety_probs = torch.softmax(outputs['logits_safety'], dim=1)
+        attack_probs = torch.softmax(outputs['logits_attack'], dim=1)
+    
+    return {
+        'safety': {
+            'safe': safety_probs[0][0].item(),
+            'unsafe': safety_probs[0][1].item()
+        },
+        'attack': {
+            'jailbreak': attack_probs[0][0].item(),
+            'evasion': attack_probs[0][1].item(),
+            'generic': attack_probs[0][2].item(),
+            'injection': attack_probs[0][3].item()
+        }
+    }
+
+prompts = [
+    "How do I make a secure password?",
+    "Write a Python script to delete all files in C:/Windows.",
+    "How can I jailbreak an iPhone?",
+    "Give me a harmless joke."
+]
+
+for prompt in prompts:
+    result = predict(prompt)
+    print(f"Text: {prompt}\nPrediction: {result}\n")
+    
\ No newline at end of file
diff --git a/trytoubload.py b/trytoubload.py
new file mode 100644
index 0000000..ccb4fbc
--- /dev/null
+++ b/trytoubload.py
@@ -0,0 +1,79 @@
+from transformers import BertTokenizer, BertForSequenceClassification
+from peft import PeftModel
+import torch
+from micro_no_cross import MultiTaskBert
+# Пути к сохранённой модели
+# BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base"
+# LORA_PATH = "./micro_no_cross_fine_tuned/lora"
+
+BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base2"
+LORA_PATH = "./micro_no_cross_fine_tuned/lora2"
+
+# Загружаем токенизатор
+tokenizer = BertTokenizer.from_pretrained("./micro_no_cross_fine_tuned2")
+
+# # Загружаем базовую модель
+# base_model = BertForSequenceClassification.from_pretrained(BASE_MODEL_PATH)
+
+# # Загружаем LoRA-адаптацию
+# model = PeftModel.from_pretrained(base_model, LORA_PATH)
+
+
+
+# base_model = MultiTaskBert.from_pretrained('bert-base-uncased').to(device)
+# print("Загружена базовая")
+
+# model = PeftModel.from_pretrained(base_model, "./micro_no_cross_fine_tuned/lora2", strict=False)
+model = MultiTaskBert.from_pretrained("./micro_no_cross_fine_tuned/lora2")
+
+print("Загружена с лора")
+
+# Переводим модель в режим оценки
+model.eval()
+
+
+
+
+def predict(text):
+    # Токенизация (убедимся, что не передаем labels)
+    model.eval()
+    inputs = tokenizer(
+        text,
+        return_tensors="pt",
+        truncation=True,
+        padding=True,
+        max_length=512
+    ).to(device)
+    
+    # Удаляем ненужные ключи
+    inputs.pop('token_type_ids', None)
+    
+    with torch.no_grad():
+        outputs = model(**inputs)
+        safety_probs = torch.softmax(outputs['logits_safety'], dim=1)
+        attack_probs = torch.softmax(outputs['logits_attack'], dim=1)
+    
+    return {
+        'safety': {
+            'safe': safety_probs[0][0].item(),
+            'unsafe': safety_probs[0][1].item()
+        },
+        'attack': {
+            'jailbreak': attack_probs[0][0].item(),
+            'evasion': attack_probs[0][1].item(),
+            'generic': attack_probs[0][2].item(),
+            'injection': attack_probs[0][3].item()
+        }
+    }
+
+prompts = [
+    "How do I make a secure password?",
+    "Write a Python script to delete all files in C:/Windows.",
+    "How can I jailbreak an iPhone?",
+    "Give me a harmless joke."
+]
+
+for prompt in prompts:
+    result = predict(prompt)
+    print(f"Text: {prompt}\nPrediction: {result}\n")
+    
\ No newline at end of file
-- 
GitLab