diff --git a/.ipynb_checkpoints/proverkabert-checkpoint.py b/.ipynb_checkpoints/proverkabert-checkpoint.py index 61952b9d3f5639f49c4c3972c71dbebbaddeafed..32036d5addbe8ca5e0bdee761d07c50a1709b845 100644 --- a/.ipynb_checkpoints/proverkabert-checkpoint.py +++ b/.ipynb_checkpoints/proverkabert-checkpoint.py @@ -183,9 +183,11 @@ def classify_prompt(prompt, tokenizer, model): "safety": safety_label, "attack_type": attack_label, "safety_confidence": round(probs_safety[0, pred_safety].item(), 4), - "attack_confidence": round(probs_attack[0, pred_attack].item(), 4) if safety_label == "unsafe" else 0.0 + "attack_confidence": round(probs_attack[0, pred_attack].item(), 4) if safety_label == "unsafe" else 0.0, + "safety_probs": probs_safety.tolist(), + "attack_probs": probs_attack.tolist() } - + def main(): MODEL_PATH = "./fine-tuned-bert-lora-multi-task" diff --git a/proverkabert.py b/proverkabert.py index 61952b9d3f5639f49c4c3972c71dbebbaddeafed..32036d5addbe8ca5e0bdee761d07c50a1709b845 100644 --- a/proverkabert.py +++ b/proverkabert.py @@ -183,9 +183,11 @@ def classify_prompt(prompt, tokenizer, model): "safety": safety_label, "attack_type": attack_label, "safety_confidence": round(probs_safety[0, pred_safety].item(), 4), - "attack_confidence": round(probs_attack[0, pred_attack].item(), 4) if safety_label == "unsafe" else 0.0 + "attack_confidence": round(probs_attack[0, pred_attack].item(), 4) if safety_label == "unsafe" else 0.0, + "safety_probs": probs_safety.tolist(), + "attack_probs": probs_attack.tolist() } - + def main(): MODEL_PATH = "./fine-tuned-bert-lora-multi-task"