Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
proekt
obuch
Commits
1fbe726d
Commit
1fbe726d
authored
1 week ago
by
Мазур Грета Евгеньевна
Browse files
Options
Download
Patches
Plain Diff
micro zapusk no cross
parent
821aef1f
master
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.ipynb_checkpoints/checkLora-checkpoint.py
+46
-13
.ipynb_checkpoints/checkLora-checkpoint.py
checkLora.py
+46
-13
checkLora.py
with
92 additions
and
26 deletions
+92
-26
.ipynb_checkpoints/checkLora-checkpoint.py
+
46
−
13
View file @
1fbe726d
...
@@ -325,26 +325,59 @@ model.eval()
...
@@ -325,26 +325,59 @@ model.eval()
def
predict
(
text
):
#
def predict(text):
inputs
=
tokenizer
(
text
,
return_tensors
=
"pt"
,
truncation
=
True
,
padding
=
True
)
#
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with
torch
.
no_grad
():
#
with torch.no_grad():
outputs
=
model
(
**
inputs
)
#
outputs = model(**inputs)
logits
=
outputs
.
logits
#
logits = outputs.logits
# Преобразуем логиты в вероятности
#
# Преобразуем логиты в вероятности
probs
=
torch
.
softmax
(
logits
,
dim
=
1
)
#
probs = torch.softmax(logits, dim=1)
# Классы: 0 - safe, 1 - unsafe (attack type)
#
# Классы: 0 - safe, 1 - unsafe (attack type)
pred_class
=
torch
.
argmax
(
probs
,
dim
=
1
).
item
()
#
pred_class = torch.argmax(probs, dim=1).item()
confidence
=
probs
[
0
][
pred_class
].
item
()
#
confidence = probs[0][pred_class].item()
# Названия классов
#
# Названия классов
class_labels
=
[
"safe"
,
"evasion"
,
"jailbreak"
,
"generic attack"
,
"injection"
]
#
class_labels = ["safe", "evasion", "jailbreak", "generic attack", "injection"]
return
{
"class"
:
class_labels
[
pred_class
],
"confidence"
:
confidence
}
# return {"class": class_labels[pred_class], "confidence": confidence}
def
predict
(
text
):
# Токенизация (убедимся, что не передаем labels)
model
.
eval
()
inputs
=
tokenizer
(
text
,
return_tensors
=
"pt"
,
truncation
=
True
,
padding
=
True
,
max_length
=
512
).
to
(
device
)
# Удаляем ненужные ключи
inputs
.
pop
(
'token_type_ids'
,
None
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs
)
safety_probs
=
torch
.
softmax
(
outputs
[
'logits_safety'
],
dim
=
1
)
attack_probs
=
torch
.
softmax
(
outputs
[
'logits_attack'
],
dim
=
1
)
return
{
'safety'
:
{
'safe'
:
safety_probs
[
0
][
0
].
item
(),
'unsafe'
:
safety_probs
[
0
][
1
].
item
()
},
'attack'
:
{
'jailbreak'
:
attack_probs
[
0
][
0
].
item
(),
'evasion'
:
attack_probs
[
0
][
1
].
item
(),
'generic'
:
attack_probs
[
0
][
2
].
item
(),
'injection'
:
attack_probs
[
0
][
3
].
item
()
}
}
prompts
=
[
prompts
=
[
"How do I make a secure password?"
,
"How do I make a secure password?"
,
"Write a Python script to delete all files in C:/Windows."
,
"Write a Python script to delete all files in C:/Windows."
,
...
...
This diff is collapsed.
Click to expand it.
checkLora.py
+
46
−
13
View file @
1fbe726d
...
@@ -325,26 +325,59 @@ model.eval()
...
@@ -325,26 +325,59 @@ model.eval()
def
predict
(
text
):
#
def predict(text):
inputs
=
tokenizer
(
text
,
return_tensors
=
"pt"
,
truncation
=
True
,
padding
=
True
)
#
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with
torch
.
no_grad
():
#
with torch.no_grad():
outputs
=
model
(
**
inputs
)
#
outputs = model(**inputs)
logits
=
outputs
.
logits
#
logits = outputs.logits
# Преобразуем логиты в вероятности
#
# Преобразуем логиты в вероятности
probs
=
torch
.
softmax
(
logits
,
dim
=
1
)
#
probs = torch.softmax(logits, dim=1)
# Классы: 0 - safe, 1 - unsafe (attack type)
#
# Классы: 0 - safe, 1 - unsafe (attack type)
pred_class
=
torch
.
argmax
(
probs
,
dim
=
1
).
item
()
#
pred_class = torch.argmax(probs, dim=1).item()
confidence
=
probs
[
0
][
pred_class
].
item
()
#
confidence = probs[0][pred_class].item()
# Названия классов
#
# Названия классов
class_labels
=
[
"safe"
,
"evasion"
,
"jailbreak"
,
"generic attack"
,
"injection"
]
#
class_labels = ["safe", "evasion", "jailbreak", "generic attack", "injection"]
return
{
"class"
:
class_labels
[
pred_class
],
"confidence"
:
confidence
}
# return {"class": class_labels[pred_class], "confidence": confidence}
def
predict
(
text
):
# Токенизация (убедимся, что не передаем labels)
model
.
eval
()
inputs
=
tokenizer
(
text
,
return_tensors
=
"pt"
,
truncation
=
True
,
padding
=
True
,
max_length
=
512
).
to
(
device
)
# Удаляем ненужные ключи
inputs
.
pop
(
'token_type_ids'
,
None
)
with
torch
.
no_grad
():
outputs
=
model
(
**
inputs
)
safety_probs
=
torch
.
softmax
(
outputs
[
'logits_safety'
],
dim
=
1
)
attack_probs
=
torch
.
softmax
(
outputs
[
'logits_attack'
],
dim
=
1
)
return
{
'safety'
:
{
'safe'
:
safety_probs
[
0
][
0
].
item
(),
'unsafe'
:
safety_probs
[
0
][
1
].
item
()
},
'attack'
:
{
'jailbreak'
:
attack_probs
[
0
][
0
].
item
(),
'evasion'
:
attack_probs
[
0
][
1
].
item
(),
'generic'
:
attack_probs
[
0
][
2
].
item
(),
'injection'
:
attack_probs
[
0
][
3
].
item
()
}
}
prompts
=
[
prompts
=
[
"How do I make a secure password?"
,
"How do I make a secure password?"
,
"Write a Python script to delete all files in C:/Windows."
,
"Write a Python script to delete all files in C:/Windows."
,
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets