Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
codecritic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ziyuan Nan
codecritic
Commits
2b493df5
Commit
2b493df5
authored
Sep 26, 2024
by
nanziyuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
step1: fix minor bugs and test code on a small scale
parent
6af73e89
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
16 deletions
+26
-16
step1_evaluate_code.py
+2
-3
utils.py
+8
-3
utils_vllm.py
+16
-10
No files found.
step1_evaluate_code.py
View file @
2b493df5
...
...
@@ -68,7 +68,7 @@ def test_generation(args, debug=False):
finally
:
assert
isinstance
(
curr_res
,
list
)
problem_results
=
np
.
asarray
(
curr_res
)
code_sample
[
"eval_result"
]
=
np
.
all
(
problem_results
>
0
)
code_sample
[
"eval_result"
]
=
bool
(
np
.
all
(
problem_results
>
0
)
)
return
code_sample
...
...
@@ -101,4 +101,4 @@ if __name__ == "__main__":
cfg
[
"sample"
][
"sample_result_path"
],
cfg
[
"apps"
],
cfg
[
"evaluate"
][
"evaluate_result_path"
],
)
\ No newline at end of file
)
utils.py
View file @
2b493df5
...
...
@@ -3,7 +3,13 @@ import json
import
os
import
re
from
codebleu
import
calc_codebleu
import
tomllib
import
sys
if
sys
.
version_info
<
(
3
,
11
):
import
tomli
as
tomllib
else
:
import
tomllib
def
load_jsonl
(
file_path
):
...
...
@@ -46,4 +52,4 @@ def read_config():
args
=
argparser
.
parse_args
()
with
open
(
args
.
config
,
"rb"
)
as
f
:
return
tomllib
.
load
(
f
)
\ No newline at end of file
return
tomllib
.
load
(
f
)
utils_vllm.py
View file @
2b493df5
...
...
@@ -2,19 +2,20 @@ from vllm import LLM, SamplingParams
import
os
import
multiprocessing
from
itertools
import
chain
from
functools
import
partial
from
utils
import
load_jsonl
,
save_jsonl
def
worker
(
cuda_device
,
prompts
,
model
,
sampling_params
):
def
worker
(
cuda_device
,
prompts
,
model
_path
,
sampling_params
):
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
cuda_device
llm
=
LLM
(
model
=
model
,
seed
=
42
,
max_model_len
=
8
*
1024
)
llm
=
LLM
(
model
=
model
_path
,
seed
=
42
,
max_model_len
=
8
*
1024
,
swap_space
=
16
)
tokenizer
=
llm
.
get_tokenizer
()
stop_tokens
=
[
tokenizer
.
eos_token_id
,
tokenizer
.
convert_tokens_to_ids
(
"<|eot_id|>"
)]
print
(
f
"SUCCESS: load llm {model} on cuda {cuda_device}"
)
print
(
f
"SUCCESS: load llm {model
_path
} on cuda {cuda_device}"
)
vllm_sampling_params
=
SamplingParams
(
n
=
sampling_params
[
'n'
],
...
...
@@ -25,14 +26,19 @@ def worker(cuda_device, prompts, model, sampling_params):
)
text_prompts
=
[
tokenizer
.
apply_chat_template
(
item
[
"messages"
],
tokenize
=
False
,
add_generation_prompt
=
True
)
for
item
in
prompts
]
outputs
=
llm
.
generate
(
text_prompts
,
sampling_params
=
vllm_sampling_params
,
use_tqdm
=
Fals
e
)
outputs
=
llm
.
generate
(
text_prompts
,
sampling_params
=
vllm_sampling_params
,
use_tqdm
=
Tru
e
)
result
=
[]
for
item
,
output
in
zip
(
prompts
,
outputs
):
for
response
in
output
.
outputs
:
generated_text
=
response
.
text
item
[
"messages"
]
.
append
({
"role"
:
"assistant"
,
"content"
:
generated_text
})
return
True
messages
,
newitem
=
item
[
"messages"
]
.
copy
(),
item
.
copy
()
messages
.
append
({
"role"
:
"assistant"
,
"content"
:
generated_text
})
newitem
[
"messages"
]
=
messages
result
.
append
(
newitem
)
return
result
def
vllm_inference
(
model_path
,
prompt_path
,
output_path
,
sampling_params
):
...
...
@@ -51,7 +57,8 @@ def vllm_inference(model_path, prompt_path, output_path, sampling_params):
worker_llm
=
partial
(
worker
,
model_path
=
model_path
,
sampling_params
=
sampling_params
)
with
multiprocessing
.
Pool
(
gpu_num
)
as
pool
:
status
=
pool
.
starmap
(
worker_llm
,
args
)
print
(
f
"Execution Status: {all(status)}"
)
nested_results
=
pool
.
starmap
(
worker_llm
,
args
)
save_jsonl
(
prompts
,
output_path
)
\ No newline at end of file
results
=
list
(
chain
(
*
nested_results
))
print
(
f
"size of dataset: {len(results)}"
)
save_jsonl
(
results
,
output_path
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment