Skip to content
GitLab
Explore
Projects
Groups
Topics
Snippets
Projects
Groups
Topics
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
proekt
obuch
Commits
d20b3b88
Commit
d20b3b88
authored
3 weeks ago
by
Мазур Грета Евгеньевна
Browse files
Options
Download
Patches
Plain Diff
obuch with cross val
parent
81e0d092
master
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.ipynb_checkpoints/obuchwithcross-checkpoint.py
+56
-113
.ipynb_checkpoints/obuchwithcross-checkpoint.py
obuchwithcross.py
+56
-113
obuchwithcross.py
with
112 additions
and
226 deletions
+112
-226
.ipynb_checkpoints/obuchwithcross-checkpoint.py
+
56
−
113
View file @
d20b3b88
import
os
import
pandas
as
pd
from
sklearn.model_selection
import
train_test_split
,
StratifiedKFold
from
sklearn.model_selection
import
train_test_split
from
datasets
import
Dataset
,
load_from_disk
from
transformers
import
BertTokenizer
,
BertPreTrainedModel
,
BertModel
from
torch
import
nn
...
...
@@ -10,6 +10,7 @@ from transformers import Trainer, TrainingArguments
import
numpy
as
np
from
sklearn.metrics
import
f1_score
,
precision_score
,
recall_score
from
sklearn.utils.class_weight
import
compute_class_weight
torch
.
cuda
.
empty_cache
()
# Определяем устройство (GPU или CPU)
...
...
@@ -21,19 +22,12 @@ TRAIN_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "train")
VAL_TOKENIZED_PATH
=
os
.
path
.
join
(
TOKENIZED_DATA_DIR
,
"val"
)
TEST_TOKENIZED_PATH
=
os
.
path
.
join
(
TOKENIZED_DATA_DIR
,
"test"
)
print
(
'123456'
)
# Загрузка данных
data
=
pd
.
read_csv
(
'all_dataset.csv'
)
# Стратифицированное разделение данных
X
=
data
[
'prompt'
]
# тексты
y_safety
=
data
[
'safety'
]
# метки для безопасности
y_attack
=
data
[
'type'
]
# метки для типа атак
# Стратифицированное разделение
train_data
,
test_data
=
train_test_split
(
data
,
test_size
=
0.2
,
stratify
=
y_safety
,
random_state
=
42
)
train_data
,
val_data
=
train_test_split
(
train_data
,
test_size
=
0.1
,
stratify
=
train_data
[
'safety'
],
random_state
=
42
)
# Разделение данных
train_data
,
test_data
=
train_test_split
(
data
,
test_size
=
0.2
,
random_state
=
42
)
train_data
,
val_data
=
train_test_split
(
train_data
,
test_size
=
0.1
,
random_state
=
42
)
# Преобразование данных в формат Dataset
train_dataset
=
Dataset
.
from_pandas
(
train_data
)
...
...
@@ -45,18 +39,19 @@ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Функция для токенизации
def
preprocess_function
(
examples
):
# Токенизация текста
tokenized
=
tokenizer
(
examples
[
'prompt'
],
truncation
=
True
,
padding
=
True
,
max_length
=
512
)
# Подготовка меток
labels_safety
=
[
0
if
label
==
"safe"
else
1
for
label
in
examples
[
'safety'
]]
labels_attack
=
[
0
if
label
==
"jailbreak"
else
1
if
label
==
"evasion"
else
2
if
label
==
"generic attack"
else
3
for
label
in
examples
[
'type'
]]
# Объединяем метки в один тензор
tokenized
[
'labels'
]
=
list
(
zip
(
labels_safety
,
labels_attack
))
# Проверка корректности меток
print
(
f
"Sample labels:
{
tokenized
[
'labels'
][
:
5
]
}
"
)
return
tokenized
# Провер
ка
, существуют ли
уже
токенизированные данные
# Провер
яем
, существуют ли токенизированные данные
if
os
.
path
.
exists
(
TRAIN_TOKENIZED_PATH
)
and
os
.
path
.
exists
(
VAL_TOKENIZED_PATH
)
and
os
.
path
.
exists
(
TEST_TOKENIZED_PATH
):
print
(
"Загрузка токенизированных данных с диска..."
)
train_dataset
=
load_from_disk
(
TRAIN_TOKENIZED_PATH
)
...
...
@@ -64,82 +59,40 @@ if os.path.exists(TRAIN_TOKENIZED_PATH) and os.path.exists(VAL_TOKENIZED_PATH) a
test_dataset
=
load_from_disk
(
TEST_TOKENIZED_PATH
)
else
:
print
(
"Токенизация данных..."
)
# Токенизация данных
train_dataset
=
train_dataset
.
map
(
preprocess_function
,
batched
=
True
)
val_dataset
=
val_dataset
.
map
(
preprocess_function
,
batched
=
True
)
test_dataset
=
test_dataset
.
map
(
preprocess_function
,
batched
=
True
)
# Сохранение токенизированных данных на диск
os
.
makedirs
(
TOKENIZED_DATA_DIR
,
exist_ok
=
True
)
train_dataset
.
save_to_disk
(
TRAIN_TOKENIZED_PATH
)
val_dataset
.
save_to_disk
(
VAL_TOKENIZED_PATH
)
test_dataset
.
save_to_disk
(
TEST_TOKENIZED_PATH
)
print
(
"Токенизированные данные сохранены на диск."
)
# Вычисление весов классов для безопасных/небезопасных
classes_task1
=
np
.
unique
(
train_data
[
'safety'
])
class_weights_task1
=
compute_class_weight
(
'balanced'
,
classes
=
classes_task1
,
y
=
train_data
[
'safety'
])
class_weights_dict_task1
=
{
i
:
weight
for
i
,
weight
in
enumerate
(
class_weights_task1
)}
# Вычисление весов классов для атак
classes_task2
=
np
.
unique
(
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
])
class_weights_task2
=
compute_class_weight
(
'balanced'
,
classes
=
classes_task2
,
y
=
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
])
class_weights_dict_task2
=
{
i
:
weight
for
i
,
weight
in
enumerate
(
class_weights_task2
)}
# Вычисление весов классов
class_weights_task1
=
compute_class_weight
(
'balanced'
,
classes
=
np
.
unique
(
train_data
[
'safety'
]),
y
=
train_data
[
'safety'
])
class_weights_task2
=
compute_class_weight
(
'balanced'
,
classes
=
np
.
unique
(
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
]),
y
=
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
])
# Перенос весов на устройство
class_weights_task1_tensor
=
torch
.
tensor
(
list
(
class_weights_dict_task1
.
values
()),
dtype
=
torch
.
float32
).
to
(
device
)
class_weights_task2_tensor
=
torch
.
tensor
(
list
(
class_weights_dict_task2
.
values
()),
dtype
=
torch
.
float32
).
to
(
device
)
class_weights_task1_tensor
=
torch
.
tensor
(
class_weights_task1
,
dtype
=
torch
.
float32
).
to
(
device
)
class_weights_task2_tensor
=
torch
.
tensor
(
class_weights_task2
,
dtype
=
torch
.
float32
).
to
(
device
)
#
Модель с балансировкой классов
#
Определение модели
class
MultiTaskBert
(
BertPreTrainedModel
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
classifier_safety
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
# safe/unsafe
self
.
classifier_attack
=
nn
.
Linear
(
config
.
hidden_size
,
4
)
# jailbreak, evasion, generic attack, injection
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
inputs_embeds
=
None
,
labels
=
None
,
output_attentions
=
None
,
output_hidden_states
=
None
,
return_dict
=
None
,
**
kwargs
,
):
if
input_ids
is
not
None
:
input_ids
=
input_ids
.
to
(
device
)
if
attention_mask
is
not
None
:
attention_mask
=
attention_mask
.
to
(
device
)
if
labels
is
not
None
:
labels
=
labels
.
to
(
device
)
self
.
classifier_safety
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
self
.
classifier_attack
=
nn
.
Linear
(
config
.
hidden_size
,
4
)
if
inputs_embeds
is
not
None
:
outputs
=
self
.
bert
(
inputs_embeds
=
inputs_embeds
,
attention_mask
=
attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
)
else
:
outputs
=
self
.
bert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
)
pooled_output
=
outputs
.
last_hidden_state
[:,
0
,
:]
# Используем [CLS] токен
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
labels
=
None
,
**
kwargs
):
outputs
=
self
.
bert
(
input_ids
=
input_ids
.
to
(
device
),
attention_mask
=
attention_mask
.
to
(
device
),
return_dict
=
True
)
pooled_output
=
outputs
.
last_hidden_state
[:,
0
,
:]
logits_safety
=
self
.
classifier_safety
(
pooled_output
)
logits_attack
=
self
.
classifier_attack
(
pooled_output
)
loss
=
None
if
labels
is
not
None
:
labels
=
labels
.
to
(
device
)
labels_safety
,
labels_attack
=
labels
[:,
0
],
labels
[:,
1
]
loss_fct_safety
=
nn
.
CrossEntropyLoss
(
weight
=
class_weights_task1_tensor
)
...
...
@@ -147,19 +100,12 @@ class MultiTaskBert(BertPreTrainedModel):
loss_safety
=
loss_fct_safety
(
logits_safety
,
labels_safety
)
loss_attack
=
loss_fct_attack
(
logits_attack
,
labels_attack
)
loss
=
loss_safety
+
loss_attack
# Общий loss
loss
=
loss_safety
+
loss_attack
return
{
'logits_safety'
:
logits_safety
,
'logits_attack'
:
logits_attack
,
'loss'
:
loss
,
'attentions'
:
outputs
.
attentions
if
output_attentions
else
None
,
'hidden_states'
:
outputs
.
hidden_states
if
output_hidden_states
else
None
,
}
return
(
loss
,
(
logits_safety
,
logits_attack
))
if
loss
is
not
None
else
(
logits_safety
,
logits_attack
)
# Загрузка модели
model
=
MultiTaskBert
.
from_pretrained
(
'bert-base-uncased'
)
model
=
model
.
to
(
device
)
model
=
MultiTaskBert
.
from_pretrained
(
'bert-base-uncased'
).
to
(
device
)
# Конфигурация LoRA
lora_config
=
LoraConfig
(
...
...
@@ -170,66 +116,63 @@ lora_config = LoraConfig(
target_modules
=
[
"query"
,
"value"
],
)
model
=
get_peft_model
(
model
,
lora_config
)
model
.
print_trainable_parameters
()
# Функция
для
вычисления метрик
# Функция вычисления метрик
def
compute_metrics
(
p
):
preds_safety
=
np
.
argmax
(
p
.
predictions
[
0
],
axis
=
1
)
preds_attack
=
np
.
argmax
(
p
.
predictions
[
1
],
axis
=
1
)
labels_safety
=
p
.
label_ids
[
0
]
labels_attack
=
p
.
label_ids
[
1
]
logits_safety
,
logits_attack
=
p
.
predictions
labels_safety
,
labels_attack
=
p
.
label_ids
.
T
f1_safety
=
f1_score
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
precision_safety
=
precision_score
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
recall_safety
=
recall_score
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
f1_attack
=
f1_score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
precision_attack
=
precision_score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
recall_attack
=
recall_score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
preds_safety
=
np
.
argmax
(
logits_safety
,
axis
=
1
)
preds_attack
=
np
.
argmax
(
logits_attack
,
axis
=
1
)
return
{
'f1_safety'
:
f1_s
afety
,
'precision_safety'
:
precision_s
afety
,
'recall_safety'
:
recall_s
afety
,
'f1_attack'
:
f1_
attack
,
'precision_attack'
:
precision_
attack
,
'recall_attack'
:
recall_
attack
,
'f1_safety'
:
f1_s
core
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
,
'precision_safety'
:
precision_s
core
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
,
'recall_safety'
:
recall_s
core
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
,
'f1_attack'
:
f1_
score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
,
'precision_attack'
:
precision_
score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
,
'recall_attack'
:
recall_
score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
,
}
# Аргументы
для
обучения
# Аргументы обучения
training_args
=
TrainingArguments
(
output_dir
=
'./results'
,
eval_strategy
=
"epoch"
,
save_strategy
=
"epoch"
,
learning_rate
=
5
e-
6
,
per_device_train_batch_size
=
16
,
per_device_eval_batch_size
=
16
,
num_train_epochs
=
3
,
learning_rate
=
2
e-
5
,
# Уменьшение learning_rate
per_device_train_batch_size
=
8
,
# Оптимальный баланс
per_device_eval_batch_size
=
8
,
num_train_epochs
=
3
,
# Уменьшение количества эпох
weight_decay
=
0.01
,
logging_dir
=
'./logs'
,
logging_steps
=
100
,
logging_steps
=
10
,
save_total_limit
=
2
,
load_best_model_at_end
=
True
,
evaluation_strategy
=
"epoch"
,
metric_for_best_model
=
"f1_safety"
,
# или метрика для выбора лучшей модели
metric_for_best_model
=
"f1_safety"
,
greater_is_better
=
True
,
fp16
=
True
,
max_grad_norm
=
1.0
,
warmup_steps
=
100
,
report_to
=
"none"
,
)
#
Настройка тренера
#
Создание Trainer
trainer
=
Trainer
(
model
=
model
,
args
=
training_args
,
train_dataset
=
train_dataset
,
eval_dataset
=
val_dataset
,
tokenizer
=
tokenizer
,
compute_metrics
=
compute_metrics
,
)
# Обучение модели
trainer
.
train
()
# Оценка
модели на тестовых данных
# Оценка
на тесте
results
=
trainer
.
evaluate
(
test_dataset
)
print
(
"Evaluation Results:"
,
results
)
# Вывод результатов
print
(
f
"Evaluation Results:
{
results
}
"
)
# Сохранение модели
model
.
save_pretrained
(
'./fine-tuned-bert-lora-multi-task2'
)
tokenizer
.
save_pretrained
(
'./fine-tuned-bert-lora-multi-task2'
)
tokenizer
.
save_pretrained
(
'./fine-tuned-bert-lora-multi-task2'
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
obuchwithcross.py
+
56
−
113
View file @
d20b3b88
import
os
import
pandas
as
pd
from
sklearn.model_selection
import
train_test_split
,
StratifiedKFold
from
sklearn.model_selection
import
train_test_split
from
datasets
import
Dataset
,
load_from_disk
from
transformers
import
BertTokenizer
,
BertPreTrainedModel
,
BertModel
from
torch
import
nn
...
...
@@ -10,6 +10,7 @@ from transformers import Trainer, TrainingArguments
import
numpy
as
np
from
sklearn.metrics
import
f1_score
,
precision_score
,
recall_score
from
sklearn.utils.class_weight
import
compute_class_weight
torch
.
cuda
.
empty_cache
()
# Определяем устройство (GPU или CPU)
...
...
@@ -21,19 +22,12 @@ TRAIN_TOKENIZED_PATH = os.path.join(TOKENIZED_DATA_DIR, "train")
VAL_TOKENIZED_PATH
=
os
.
path
.
join
(
TOKENIZED_DATA_DIR
,
"val"
)
TEST_TOKENIZED_PATH
=
os
.
path
.
join
(
TOKENIZED_DATA_DIR
,
"test"
)
print
(
'123456'
)
# Загрузка данных
data
=
pd
.
read_csv
(
'all_dataset.csv'
)
# Стратифицированное разделение данных
X
=
data
[
'prompt'
]
# тексты
y_safety
=
data
[
'safety'
]
# метки для безопасности
y_attack
=
data
[
'type'
]
# метки для типа атак
# Стратифицированное разделение
train_data
,
test_data
=
train_test_split
(
data
,
test_size
=
0.2
,
stratify
=
y_safety
,
random_state
=
42
)
train_data
,
val_data
=
train_test_split
(
train_data
,
test_size
=
0.1
,
stratify
=
train_data
[
'safety'
],
random_state
=
42
)
# Разделение данных
train_data
,
test_data
=
train_test_split
(
data
,
test_size
=
0.2
,
random_state
=
42
)
train_data
,
val_data
=
train_test_split
(
train_data
,
test_size
=
0.1
,
random_state
=
42
)
# Преобразование данных в формат Dataset
train_dataset
=
Dataset
.
from_pandas
(
train_data
)
...
...
@@ -45,18 +39,19 @@ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Функция для токенизации
def
preprocess_function
(
examples
):
# Токенизация текста
tokenized
=
tokenizer
(
examples
[
'prompt'
],
truncation
=
True
,
padding
=
True
,
max_length
=
512
)
# Подготовка меток
labels_safety
=
[
0
if
label
==
"safe"
else
1
for
label
in
examples
[
'safety'
]]
labels_attack
=
[
0
if
label
==
"jailbreak"
else
1
if
label
==
"evasion"
else
2
if
label
==
"generic attack"
else
3
for
label
in
examples
[
'type'
]]
# Объединяем метки в один тензор
tokenized
[
'labels'
]
=
list
(
zip
(
labels_safety
,
labels_attack
))
# Проверка корректности меток
print
(
f
"Sample labels:
{
tokenized
[
'labels'
][
:
5
]
}
"
)
return
tokenized
# Провер
ка
, существуют ли
уже
токенизированные данные
# Провер
яем
, существуют ли токенизированные данные
if
os
.
path
.
exists
(
TRAIN_TOKENIZED_PATH
)
and
os
.
path
.
exists
(
VAL_TOKENIZED_PATH
)
and
os
.
path
.
exists
(
TEST_TOKENIZED_PATH
):
print
(
"Загрузка токенизированных данных с диска..."
)
train_dataset
=
load_from_disk
(
TRAIN_TOKENIZED_PATH
)
...
...
@@ -64,82 +59,40 @@ if os.path.exists(TRAIN_TOKENIZED_PATH) and os.path.exists(VAL_TOKENIZED_PATH) a
test_dataset
=
load_from_disk
(
TEST_TOKENIZED_PATH
)
else
:
print
(
"Токенизация данных..."
)
# Токенизация данных
train_dataset
=
train_dataset
.
map
(
preprocess_function
,
batched
=
True
)
val_dataset
=
val_dataset
.
map
(
preprocess_function
,
batched
=
True
)
test_dataset
=
test_dataset
.
map
(
preprocess_function
,
batched
=
True
)
# Сохранение токенизированных данных на диск
os
.
makedirs
(
TOKENIZED_DATA_DIR
,
exist_ok
=
True
)
train_dataset
.
save_to_disk
(
TRAIN_TOKENIZED_PATH
)
val_dataset
.
save_to_disk
(
VAL_TOKENIZED_PATH
)
test_dataset
.
save_to_disk
(
TEST_TOKENIZED_PATH
)
print
(
"Токенизированные данные сохранены на диск."
)
# Вычисление весов классов для безопасных/небезопасных
classes_task1
=
np
.
unique
(
train_data
[
'safety'
])
class_weights_task1
=
compute_class_weight
(
'balanced'
,
classes
=
classes_task1
,
y
=
train_data
[
'safety'
])
class_weights_dict_task1
=
{
i
:
weight
for
i
,
weight
in
enumerate
(
class_weights_task1
)}
# Вычисление весов классов для атак
classes_task2
=
np
.
unique
(
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
])
class_weights_task2
=
compute_class_weight
(
'balanced'
,
classes
=
classes_task2
,
y
=
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
])
class_weights_dict_task2
=
{
i
:
weight
for
i
,
weight
in
enumerate
(
class_weights_task2
)}
# Вычисление весов классов
class_weights_task1
=
compute_class_weight
(
'balanced'
,
classes
=
np
.
unique
(
train_data
[
'safety'
]),
y
=
train_data
[
'safety'
])
class_weights_task2
=
compute_class_weight
(
'balanced'
,
classes
=
np
.
unique
(
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
]),
y
=
train_data
[
train_data
[
'safety'
]
==
'unsafe'
][
'type'
])
# Перенос весов на устройство
class_weights_task1_tensor
=
torch
.
tensor
(
list
(
class_weights_dict_task1
.
values
()),
dtype
=
torch
.
float32
).
to
(
device
)
class_weights_task2_tensor
=
torch
.
tensor
(
list
(
class_weights_dict_task2
.
values
()),
dtype
=
torch
.
float32
).
to
(
device
)
class_weights_task1_tensor
=
torch
.
tensor
(
class_weights_task1
,
dtype
=
torch
.
float32
).
to
(
device
)
class_weights_task2_tensor
=
torch
.
tensor
(
class_weights_task2
,
dtype
=
torch
.
float32
).
to
(
device
)
#
Модель с балансировкой классов
#
Определение модели
class
MultiTaskBert
(
BertPreTrainedModel
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
self
.
bert
=
BertModel
(
config
)
self
.
classifier_safety
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
# safe/unsafe
self
.
classifier_attack
=
nn
.
Linear
(
config
.
hidden_size
,
4
)
# jailbreak, evasion, generic attack, injection
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
inputs_embeds
=
None
,
labels
=
None
,
output_attentions
=
None
,
output_hidden_states
=
None
,
return_dict
=
None
,
**
kwargs
,
):
if
input_ids
is
not
None
:
input_ids
=
input_ids
.
to
(
device
)
if
attention_mask
is
not
None
:
attention_mask
=
attention_mask
.
to
(
device
)
if
labels
is
not
None
:
labels
=
labels
.
to
(
device
)
self
.
classifier_safety
=
nn
.
Linear
(
config
.
hidden_size
,
2
)
self
.
classifier_attack
=
nn
.
Linear
(
config
.
hidden_size
,
4
)
if
inputs_embeds
is
not
None
:
outputs
=
self
.
bert
(
inputs_embeds
=
inputs_embeds
,
attention_mask
=
attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
)
else
:
outputs
=
self
.
bert
(
input_ids
=
input_ids
,
attention_mask
=
attention_mask
,
output_attentions
=
output_attentions
,
output_hidden_states
=
output_hidden_states
,
return_dict
=
return_dict
,
)
pooled_output
=
outputs
.
last_hidden_state
[:,
0
,
:]
# Используем [CLS] токен
def
forward
(
self
,
input_ids
=
None
,
attention_mask
=
None
,
labels
=
None
,
**
kwargs
):
outputs
=
self
.
bert
(
input_ids
=
input_ids
.
to
(
device
),
attention_mask
=
attention_mask
.
to
(
device
),
return_dict
=
True
)
pooled_output
=
outputs
.
last_hidden_state
[:,
0
,
:]
logits_safety
=
self
.
classifier_safety
(
pooled_output
)
logits_attack
=
self
.
classifier_attack
(
pooled_output
)
loss
=
None
if
labels
is
not
None
:
labels
=
labels
.
to
(
device
)
labels_safety
,
labels_attack
=
labels
[:,
0
],
labels
[:,
1
]
loss_fct_safety
=
nn
.
CrossEntropyLoss
(
weight
=
class_weights_task1_tensor
)
...
...
@@ -147,19 +100,12 @@ class MultiTaskBert(BertPreTrainedModel):
loss_safety
=
loss_fct_safety
(
logits_safety
,
labels_safety
)
loss_attack
=
loss_fct_attack
(
logits_attack
,
labels_attack
)
loss
=
loss_safety
+
loss_attack
# Общий loss
loss
=
loss_safety
+
loss_attack
return
{
'logits_safety'
:
logits_safety
,
'logits_attack'
:
logits_attack
,
'loss'
:
loss
,
'attentions'
:
outputs
.
attentions
if
output_attentions
else
None
,
'hidden_states'
:
outputs
.
hidden_states
if
output_hidden_states
else
None
,
}
return
(
loss
,
(
logits_safety
,
logits_attack
))
if
loss
is
not
None
else
(
logits_safety
,
logits_attack
)
# Загрузка модели
model
=
MultiTaskBert
.
from_pretrained
(
'bert-base-uncased'
)
model
=
model
.
to
(
device
)
model
=
MultiTaskBert
.
from_pretrained
(
'bert-base-uncased'
).
to
(
device
)
# Конфигурация LoRA
lora_config
=
LoraConfig
(
...
...
@@ -170,66 +116,63 @@ lora_config = LoraConfig(
target_modules
=
[
"query"
,
"value"
],
)
model
=
get_peft_model
(
model
,
lora_config
)
model
.
print_trainable_parameters
()
# Функция
для
вычисления метрик
# Функция вычисления метрик
def
compute_metrics
(
p
):
preds_safety
=
np
.
argmax
(
p
.
predictions
[
0
],
axis
=
1
)
preds_attack
=
np
.
argmax
(
p
.
predictions
[
1
],
axis
=
1
)
labels_safety
=
p
.
label_ids
[
0
]
labels_attack
=
p
.
label_ids
[
1
]
logits_safety
,
logits_attack
=
p
.
predictions
labels_safety
,
labels_attack
=
p
.
label_ids
.
T
f1_safety
=
f1_score
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
precision_safety
=
precision_score
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
recall_safety
=
recall_score
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
f1_attack
=
f1_score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
precision_attack
=
precision_score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
recall_attack
=
recall_score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
preds_safety
=
np
.
argmax
(
logits_safety
,
axis
=
1
)
preds_attack
=
np
.
argmax
(
logits_attack
,
axis
=
1
)
return
{
'f1_safety'
:
f1_s
afety
,
'precision_safety'
:
precision_s
afety
,
'recall_safety'
:
recall_s
afety
,
'f1_attack'
:
f1_
attack
,
'precision_attack'
:
precision_
attack
,
'recall_attack'
:
recall_
attack
,
'f1_safety'
:
f1_s
core
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
,
'precision_safety'
:
precision_s
core
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
,
'recall_safety'
:
recall_s
core
(
labels_safety
,
preds_safety
,
average
=
'weighted'
)
,
'f1_attack'
:
f1_
score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
,
'precision_attack'
:
precision_
score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
,
'recall_attack'
:
recall_
score
(
labels_attack
,
preds_attack
,
average
=
'weighted'
)
,
}
# Аргументы
для
обучения
# Аргументы обучения
training_args
=
TrainingArguments
(
output_dir
=
'./results'
,
eval_strategy
=
"epoch"
,
save_strategy
=
"epoch"
,
learning_rate
=
5
e-
6
,
per_device_train_batch_size
=
16
,
per_device_eval_batch_size
=
16
,
num_train_epochs
=
3
,
learning_rate
=
2
e-
5
,
# Уменьшение learning_rate
per_device_train_batch_size
=
8
,
# Оптимальный баланс
per_device_eval_batch_size
=
8
,
num_train_epochs
=
3
,
# Уменьшение количества эпох
weight_decay
=
0.01
,
logging_dir
=
'./logs'
,
logging_steps
=
100
,
logging_steps
=
10
,
save_total_limit
=
2
,
load_best_model_at_end
=
True
,
evaluation_strategy
=
"epoch"
,
metric_for_best_model
=
"f1_safety"
,
# или метрика для выбора лучшей модели
metric_for_best_model
=
"f1_safety"
,
greater_is_better
=
True
,
fp16
=
True
,
max_grad_norm
=
1.0
,
warmup_steps
=
100
,
report_to
=
"none"
,
)
#
Настройка тренера
#
Создание Trainer
trainer
=
Trainer
(
model
=
model
,
args
=
training_args
,
train_dataset
=
train_dataset
,
eval_dataset
=
val_dataset
,
tokenizer
=
tokenizer
,
compute_metrics
=
compute_metrics
,
)
# Обучение модели
trainer
.
train
()
# Оценка
модели на тестовых данных
# Оценка
на тесте
results
=
trainer
.
evaluate
(
test_dataset
)
print
(
"Evaluation Results:"
,
results
)
# Вывод результатов
print
(
f
"Evaluation Results:
{
results
}
"
)
# Сохранение модели
model
.
save_pretrained
(
'./fine-tuned-bert-lora-multi-task2'
)
tokenizer
.
save_pretrained
(
'./fine-tuned-bert-lora-multi-task2'
)
tokenizer
.
save_pretrained
(
'./fine-tuned-bert-lora-multi-task2'
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment
Menu
Explore
Projects
Groups
Topics
Snippets