From 922ea4aaa066b928fd28cd1aad8b3795ab0ab801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B0=D0=B7=D1=83=D1=80=20=D0=93=D1=80=D0=B5=D1=82?= =?UTF-8?q?=D0=B0=20=D0=95=D0=B2=D0=B3=D0=B5=D0=BD=D1=8C=D0=B5=D0=B2=D0=BD?= =?UTF-8?q?=D0=B0?= <gemazur_1@edu.hse.ru> Date: Tue, 25 Mar 2025 22:16:51 +0300 Subject: [PATCH] micro zapusk no cross --- .ipynb_checkpoints/trytoubload-checkpoint.py | 79 ++++++++++++++++++++ trytoubload.py | 79 ++++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 .ipynb_checkpoints/trytoubload-checkpoint.py create mode 100644 trytoubload.py diff --git a/.ipynb_checkpoints/trytoubload-checkpoint.py b/.ipynb_checkpoints/trytoubload-checkpoint.py new file mode 100644 index 0000000..ccb4fbc --- /dev/null +++ b/.ipynb_checkpoints/trytoubload-checkpoint.py @@ -0,0 +1,79 @@ +from transformers import BertTokenizer, BertForSequenceClassification +from peft import PeftModel +import torch +from micro_no_cross import MultiTaskBert +# Пути к сохранённой модели +# BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base" +# LORA_PATH = "./micro_no_cross_fine_tuned/lora" + +BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base2" +LORA_PATH = "./micro_no_cross_fine_tuned/lora2" + +# Загружаем токенизатор +tokenizer = BertTokenizer.from_pretrained("./micro_no_cross_fine_tuned2") + +# # Загружаем базовую модель +# base_model = BertForSequenceClassification.from_pretrained(BASE_MODEL_PATH) + +# # Загружаем LoRA-адаптацию +# model = PeftModel.from_pretrained(base_model, LORA_PATH) + + + +# base_model = MultiTaskBert.from_pretrained('bert-base-uncased').to(device) +# print("Загружена базовая") + +# model = PeftModel.from_pretrained(base_model, "./micro_no_cross_fine_tuned/lora2", strict=False) +model = MultiTaskBert.from_pretrained("./micro_no_cross_fine_tuned/lora2") + +print("Загружена с лора") + +# Переводим модель в режим оценки +model.eval() + + + + +def predict(text): + # Токенизация (убедимся, что не передаем labels) + model.eval() + inputs = tokenizer( + text, + return_tensors="pt", + truncation=True, + padding=True, + max_length=512 + ).to(device) + + # Удаляем ненужные ключи + inputs.pop('token_type_ids', None) + + with torch.no_grad(): + outputs = model(**inputs) + safety_probs = torch.softmax(outputs['logits_safety'], dim=1) + attack_probs = torch.softmax(outputs['logits_attack'], dim=1) + + return { + 'safety': { + 'safe': safety_probs[0][0].item(), + 'unsafe': safety_probs[0][1].item() + }, + 'attack': { + 'jailbreak': attack_probs[0][0].item(), + 'evasion': attack_probs[0][1].item(), + 'generic': attack_probs[0][2].item(), + 'injection': attack_probs[0][3].item() + } + } + +prompts = [ + "How do I make a secure password?", + "Write a Python script to delete all files in C:/Windows.", + "How can I jailbreak an iPhone?", + "Give me a harmless joke." +] + +for prompt in prompts: + result = predict(prompt) + print(f"Text: {prompt}\nPrediction: {result}\n") + \ No newline at end of file diff --git a/trytoubload.py b/trytoubload.py new file mode 100644 index 0000000..ccb4fbc --- /dev/null +++ b/trytoubload.py @@ -0,0 +1,79 @@ +from transformers import BertTokenizer, BertForSequenceClassification +from peft import PeftModel +import torch +from micro_no_cross import MultiTaskBert +# Пути к сохранённой модели +# BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base" +# LORA_PATH = "./micro_no_cross_fine_tuned/lora" + +BASE_MODEL_PATH = "./micro_no_cross_fine_tuned/base2" +LORA_PATH = "./micro_no_cross_fine_tuned/lora2" + +# Загружаем токенизатор +tokenizer = BertTokenizer.from_pretrained("./micro_no_cross_fine_tuned2") + +# # Загружаем базовую модель +# base_model = BertForSequenceClassification.from_pretrained(BASE_MODEL_PATH) + +# # Загружаем LoRA-адаптацию +# model = PeftModel.from_pretrained(base_model, LORA_PATH) + + + +# base_model = MultiTaskBert.from_pretrained('bert-base-uncased').to(device) +# print("Загружена базовая") + +# model = PeftModel.from_pretrained(base_model, "./micro_no_cross_fine_tuned/lora2", strict=False) +model = MultiTaskBert.from_pretrained("./micro_no_cross_fine_tuned/lora2") + +print("Загружена с лора") + +# Переводим модель в режим оценки +model.eval() + + + + +def predict(text): + # Токенизация (убедимся, что не передаем labels) + model.eval() + inputs = tokenizer( + text, + return_tensors="pt", + truncation=True, + padding=True, + max_length=512 + ).to(device) + + # Удаляем ненужные ключи + inputs.pop('token_type_ids', None) + + with torch.no_grad(): + outputs = model(**inputs) + safety_probs = torch.softmax(outputs['logits_safety'], dim=1) + attack_probs = torch.softmax(outputs['logits_attack'], dim=1) + + return { + 'safety': { + 'safe': safety_probs[0][0].item(), + 'unsafe': safety_probs[0][1].item() + }, + 'attack': { + 'jailbreak': attack_probs[0][0].item(), + 'evasion': attack_probs[0][1].item(), + 'generic': attack_probs[0][2].item(), + 'injection': attack_probs[0][3].item() + } + } + +prompts = [ + "How do I make a secure password?", + "Write a Python script to delete all files in C:/Windows.", + "How can I jailbreak an iPhone?", + "Give me a harmless joke." +] + +for prompt in prompts: + result = predict(prompt) + print(f"Text: {prompt}\nPrediction: {result}\n") + \ No newline at end of file -- GitLab