Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
codecritic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ziyuan Nan
codecritic
Commits
6d68e66f
Commit
6d68e66f
authored
Dec 27, 2024
by
nzy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refine test
parent
ddc93279
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
34 additions
and
44 deletions
+34
-44
codecritic/cli/test_genrm.py
+27
-21
codecritic/data/genrm_prompt.py
+4
-1
codecritic/evaluation/inference.py
+3
-22
No files found.
codecritic/cli/test_genrm.py
View file @
6d68e66f
...
...
@@ -3,18 +3,14 @@ from functools import partial
import
os
from
transformers
import
AutoTokenizer
from
vllm
import
SamplingParams
from
codecritic.data.genrm_prompt
import
JUDGE_PROMPT
from
codecritic.data.genrm_prompt
import
THINK_PROMPT
,
JUDGE_PROMPT
,
JUDGE_TOEKNS
from
codecritic.evaluation.inference
import
generate_worker
,
score_worker
from
codecritic.utils.parallel
import
model_map
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
def
append_prompt
(
item
,
content
):
item
[
"messages"
]
.
append
({
"role"
:
"user"
,
"content"
:
content
})
return
item
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
,
help
=
"path/to/model"
)
...
...
@@ -22,43 +18,53 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
parser
.
add_argument
(
"--reasoning"
,
action
=
"store_true"
,
help
=
"enable reasoning"
)
parser
.
add_argument
(
"--reason_max_tokens"
,
type
=
int
,
default
=
4096
,
help
=
"maximum number of tokens allowed for the reasoning process."
,
)
parser
.
add_argument
(
"--gpu"
,
type
=
int
,
default
=
1
,
help
=
"gpu number required by model"
)
args
=
parser
.
parse_args
()
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"false"
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
dataset
=
load_jsonl
(
args
.
sample
)
if
args
.
reasoning
:
dataset
=
[
append_prompt
(
x
,
COV_PROMPT
)
for
x
in
dataset
]
worker
=
partial
(
generate_worker
,
model_path
=
args
.
model
,
for
item
in
dataset
:
item
[
"messages"
]
.
append
({
"role"
:
"user"
,
"content"
:
THINK_PROMPT
})
sampling_params
=
SamplingParams
(
n
=
1
,
temperature
=
0
,
max_tokens
=
4096
,
top_p
=
0.95
,
max_tokens
=
args
.
reason_max_tokens
,
)
dataset
=
model_map
(
worker
,
dataset
,
args
.
gpu_per_model
)
worker
=
partial
(
generate_worker
,
model_path
=
args
.
model
,
sampling_params
=
sampling_params
)
dataset
=
model_map
(
worker
,
dataset
,
args
.
gpu_per_model
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
args
.
model
)
def
get_token_id
(
token
):
score_tokens
=
tokenizer
.
encode
(
token
,
add_special_tokens
=
False
)
assert
len
(
score_tokens
)
==
1
return
score_tokens
[
0
]
positive_token
=
get_token_id
(
"Yes"
)
negative_token
=
get_token_id
(
"No"
)
dataset
=
[
append_prompt
(
x
,
JUDGE_PROMPT
)
for
x
in
dataset
]
positive_token
=
get_token_id
(
JUDGE_TOEKNS
[
"positive"
]
)
negative_token
=
get_token_id
(
JUDGE_TOEKNS
[
"negative"
]
)
for
item
in
dataset
:
item
[
"messages"
]
.
append
({
"role"
:
"user"
,
"content"
:
JUDGE_PROMPT
})
worker
=
partial
(
score_worker
,
model_path
=
args
.
model
,
positive_token
=
positive_token
,
negative_token
=
negative_token
negative_token
=
negative_token
,
)
dataset
=
model_map
(
worker
,
dataset
,
args
.
gpu_per_model
)
...
...
codecritic/data/genrm_prompt.py
View file @
6d68e66f
JUDGE_PROMPT
=
"Is the code correct (Yes/No)?"
JUDGE_TOEKNS
=
{
"positive"
:
"Yes"
,
"negative"
:
"No"
}
def
mk_critic_verify
(
answer
=
None
):
# answer: bool or none
...
...
codecritic/evaluation/inference.py
View file @
6d68e66f
...
...
@@ -18,18 +18,6 @@ def generate_worker(cuda_device, prompts, model_path, sampling_params):
)
tokenizer
=
llm
.
get_tokenizer
()
stop_tokens
=
[
tokenizer
.
eos_token_id
]
print
(
f
"SUCCESS: load llm {model_path} on cuda {cuda_device}"
)
vllm_sampling_params
=
SamplingParams
(
n
=
sampling_params
[
"n"
],
temperature
=
sampling_params
[
"temperature"
],
top_p
=
0.95
,
max_tokens
=
sampling_params
[
"max_tokens"
],
stop_token_ids
=
stop_tokens
,
)
print
(
"Sampling params:"
,
vllm_sampling_params
)
def
messages_to_text
(
messages
):
text
=
tokenizer
.
apply_chat_template
(
...
...
@@ -41,9 +29,7 @@ def generate_worker(cuda_device, prompts, model_path, sampling_params):
text_prompts
=
[
messages_to_text
(
item
[
"messages"
])
for
item
in
prompts
]
outputs
=
llm
.
generate
(
text_prompts
,
sampling_params
=
vllm_sampling_params
,
use_tqdm
=
True
)
outputs
=
llm
.
generate
(
text_prompts
,
sampling_params
=
sampling_params
,
use_tqdm
=
True
)
results
=
[]
for
item
,
output
in
zip
(
prompts
,
outputs
):
...
...
@@ -96,10 +82,6 @@ def score_worker(cuda_device, prompts, model_path, positive_token, negative_toke
)
tokenizer
=
llm
.
get_tokenizer
()
print
(
f
"SUCCESS: load llm {model_path} on cuda {cuda_device}"
)
vllm_sampling_params
=
SamplingParams
(
n
=
1
,
temperature
=
0
,
max_tokens
=
5
,
logprobs
=
20
)
text_prompts
=
[
tokenizer
.
apply_chat_template
(
item
[
"messages"
],
tokenize
=
False
,
add_generation_prompt
=
True
...
...
@@ -107,9 +89,8 @@ def score_worker(cuda_device, prompts, model_path, positive_token, negative_toke
for
item
in
prompts
]
outputs
=
llm
.
generate
(
text_prompts
,
sampling_params
=
vllm_sampling_params
,
use_tqdm
=
False
)
sampling_params
=
SamplingParams
(
n
=
1
,
temperature
=
0
,
max_tokens
=
5
,
logprobs
=
20
)
outputs
=
llm
.
generate
(
text_prompts
,
sampling_params
=
sampling_params
,
use_tqdm
=
True
)
results
=
[]
for
item
,
output
in
zip
(
prompts
,
outputs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment