Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
codecritic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Ziyuan Nan
codecritic
Commits
944b6831
Commit
944b6831
authored
Jan 07, 2025
by
nanziyuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bugs of algolr
parent
999362c5
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
184 additions
and
28 deletions
+184
-28
codecritic/cli/algolr.py
+21
-8
codecritic/cli/reformat.py
+8
-2
codecritic/cli/select_preference_pairs.py
+1
-1
codecritic/dataset/algolr_prompt.py
+22
-16
codecritic/dataset/genrm_prompt.py
+1
-1
scripts/algolr.sh
+52
-0
scripts/gen_dataset.sh
+9
-0
scripts/train_orm.sh
+70
-0
No files found.
codecritic/cli/algolr.py
View file @
944b6831
...
@@ -10,6 +10,7 @@ from codecritic.utils.inference import generate_worker
...
@@ -10,6 +10,7 @@ from codecritic.utils.inference import generate_worker
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
import
codecritic.evaluation.apps_eval
as
evaluation
import
codecritic.evaluation.apps_eval
as
evaluation
import
codecritic.dataset.algolr_prompt
as
promptlib
import
codecritic.dataset.algolr_prompt
as
promptlib
from
codecritic.dataset.genrm_prompt
import
JUDGE_MESSAGE
,
mk_judge_response
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -19,7 +20,7 @@ if __name__ == "__main__":
...
@@ -19,7 +20,7 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--pairinfo"
,
type
=
str
,
help
=
"path/to/pairinfo"
)
parser
.
add_argument
(
"--pairinfo"
,
type
=
str
,
help
=
"path/to/pairinfo"
)
parser
.
add_argument
(
"--apps"
,
type
=
str
,
help
=
"path/to/apps"
)
parser
.
add_argument
(
"--apps"
,
type
=
str
,
help
=
"path/to/apps"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/score"
)
parser
.
add_argument
(
"--
hint_
level"
,
type
=
str
,
choices
=
[
"beginner"
])
parser
.
add_argument
(
"--level"
,
type
=
str
,
choices
=
[
"beginner"
])
parser
.
add_argument
(
parser
.
add_argument
(
"--tp"
,
type
=
int
,
default
=
1
,
help
=
"tensor parallel"
"--tp"
,
type
=
int
,
default
=
1
,
help
=
"tensor parallel"
)
)
...
@@ -37,7 +38,7 @@ if __name__ == "__main__":
...
@@ -37,7 +38,7 @@ if __name__ == "__main__":
hint_prompts
=
[]
hint_prompts
=
[]
for
pair
in
pairinfo
:
for
pair
in
pairinfo
:
task_id
,
chosen_id
,
rejected_id
=
pair
[
"task_id"
],
pair
[
"chosen"
],
pair
[
"rejected"
]
task_id
,
chosen_id
,
rejected_id
=
pair
[
"task_id"
],
pair
[
"chosen"
],
pair
[
"rejected"
]
chosen
,
rejected
=
ds
[
task_id
][
chosen_id
],
ds
[
task_id
][
chosen
_id
]
chosen
,
rejected
=
ds
[
task_id
][
chosen_id
],
ds
[
task_id
][
rejected
_id
]
prompt
=
promptlib
.
process_to_hint_prompt
(
chosen
,
rejected
,
args
.
level
)
prompt
=
promptlib
.
process_to_hint_prompt
(
chosen
,
rejected
,
args
.
level
)
hint_prompts
.
append
(
prompt
)
hint_prompts
.
append
(
prompt
)
...
@@ -64,7 +65,7 @@ if __name__ == "__main__":
...
@@ -64,7 +65,7 @@ if __name__ == "__main__":
reason_prompts
=
[]
reason_prompts
=
[]
for
pair
in
pairinfo
:
for
pair
in
pairinfo
:
task_id
,
chosen_id
,
rejected_id
=
pair
[
"task_id"
],
pair
[
"chosen"
],
pair
[
"rejected"
]
task_id
,
chosen_id
,
rejected_id
=
pair
[
"task_id"
],
pair
[
"chosen"
],
pair
[
"rejected"
]
chosen
,
rejected
=
ds
[
task_id
][
chosen_id
],
ds
[
task_id
][
chosen
_id
]
chosen
,
rejected
=
ds
[
task_id
][
chosen_id
],
ds
[
task_id
][
rejected
_id
]
CORRECT_HINT
=
"The code is correct."
CORRECT_HINT
=
"The code is correct."
# chosen
# chosen
...
@@ -124,34 +125,46 @@ if __name__ == "__main__":
...
@@ -124,34 +125,46 @@ if __name__ == "__main__":
item
[
"code"
]
=
code
item
[
"code"
]
=
code
verify_passed
.
append
(
item
)
verify_passed
.
append
(
item
)
print
(
"verify passed (judgement consistent) size: {}"
.
format
(
len
(
verify_passed
)))
incorrects
,
corrects
=
[],
[]
incorrects
,
corrects
=
[],
[]
for
item
in
verify_passed
:
for
item
in
verify_passed
:
item
[
"meta_old_pass"
]
=
item
[
"pass"
]
if
not
item
[
"pass"
]:
if
not
item
[
"pass"
]:
incorrects
.
append
(
item
)
incorrects
.
append
(
item
)
else
:
else
:
corrects
.
append
(
item
)
corrects
.
append
(
item
)
print
(
"verify passed (judgement consistent) size: {}"
.
format
(
len
(
verify_passed
)))
print
(
"Corrects (judgement consistent) size: {}"
.
format
(
len
(
corrects
)))
print
(
"Incorrects (judgement consistent) size: {}"
.
format
(
len
(
incorrects
)))
# need a list of dict {"task_id": str, "solution_id": str(unique index), "code": ...}
# need a list of dict {"task_id": str, "solution_id": str(unique index), "code": ...}
apps
=
load_dataset
(
args
.
apps
)
apps
=
load_dataset
(
args
.
apps
)
fixed_incorrects
=
evaluation
.
evaluate
(
incorrects
,
apps
)
fixed_incorrects
=
evaluation
.
evaluate
(
incorrects
,
apps
)
# print(fixed_incorrects[0])
incorrects
=
[
x
for
x
in
fixed_incorrects
if
x
[
"pass"
]]
# filter that code is not correct.
verify_passed
=
incorrects
+
corrects
verify_passed
=
[
x
for
x
in
fixed_incorrects
if
x
[
"pass"
]]
+
corrects
print
(
"verify passed (execution consistent) size: {}"
.
format
(
len
(
verify_passed
)))
print
(
"verify passed (judgement consistent) size: {}"
.
format
(
len
(
verify_passed
)))
print
(
"Corrects (execution consistent) size: {}"
.
format
(
len
(
corrects
)))
print
(
"Incorrects (execution consistent) size: {}"
.
format
(
len
(
incorrects
)))
# Step4 Remove hints and Reformat to a SFT dataset
# Step4 Remove hints and Reformat to a SFT dataset
# extract reasoning sets
# extract reasoning sets
sft
=
[]
sft
=
[]
for
item
in
verify_passed
:
for
item
in
verify_passed
:
if
item
[
"meta_old_pass"
]:
judge_response
=
mk_judge_response
(
"positive"
)
else
:
judge_response
=
mk_judge_response
(
"negative"
)
response
=
[
item
[
"messages"
][
1
],
JUDGE_MESSAGE
,
judge_response
]
line
=
{
line
=
{
"dataset"
:
item
[
"dataset"
],
"dataset"
:
item
[
"dataset"
],
"task_id"
:
item
[
"task_id"
],
"task_id"
:
item
[
"task_id"
],
"solution_id"
:
item
[
"solution_id"
],
"solution_id"
:
item
[
"solution_id"
],
"question"
:
item
[
"messages"
][:
1
],
"question"
:
item
[
"messages"
][:
1
],
"response"
:
item
[
"messages"
][
1
:
2
]
,
"response"
:
response
,
}
}
sft
.
append
(
line
)
sft
.
append
(
line
)
...
...
codecritic/cli/reformat.py
View file @
944b6831
import
argparse
import
argparse
from
itertools
import
product
,
chain
from
collections
import
defaultdict
from
itertools
import
chain
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
from
codecritic.utils.json
import
load_jsonl
,
save_jsonl
...
@@ -43,10 +44,15 @@ if __name__ == "__main__":
...
@@ -43,10 +44,15 @@ if __name__ == "__main__":
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--dataset"
,
type
=
str
,
help
=
"path/to/dataset"
)
parser
.
add_argument
(
"--dataset"
,
type
=
str
,
help
=
"path/to/dataset"
)
parser
.
add_argument
(
"--pairs"
,
type
=
str
,
help
=
"path/to/selected_pairs"
)
parser
.
add_argument
(
"--pairs"
,
type
=
str
,
help
=
"path/to/selected_pairs"
)
parser
.
add_argument
(
"--format"
,
type
=
str
,
choices
=
[
"reward"
])
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/output"
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"path/to/output"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
dataset
=
load_jsonl
(
args
.
dataset
)
dataset_jsonl
=
load_jsonl
(
args
.
dataset
)
dataset
=
defaultdict
(
dict
)
for
item
in
dataset_jsonl
:
dataset
[
item
[
"task_id"
]][
item
[
"solution_id"
]]
=
item
selected_pairs
=
load_jsonl
(
args
.
pairs
)
selected_pairs
=
load_jsonl
(
args
.
pairs
)
if
args
.
format
==
"sft"
:
if
args
.
format
==
"sft"
:
...
...
codecritic/cli/select_preference_pairs.py
View file @
944b6831
...
@@ -84,7 +84,7 @@ if __name__ == "__main__":
...
@@ -84,7 +84,7 @@ if __name__ == "__main__":
selected_pairs
=
[]
selected_pairs
=
[]
for
task
,
items
in
task_groups
.
items
():
for
task
,
items
in
task_groups
.
items
():
sorted_items
=
sorted
(
items
,
key
=
lambda
x
:
x
[
"similarity"
],
reverse
=
True
)[:
4
]
sorted_items
=
sorted
(
items
,
key
=
lambda
x
:
x
[
"similarity"
],
reverse
=
True
)[:
2
]
selected_pairs
.
extend
(
sorted_items
)
selected_pairs
.
extend
(
sorted_items
)
save_jsonl
(
selected_pairs
,
args
.
output
)
save_jsonl
(
selected_pairs
,
args
.
output
)
codecritic/dataset/algolr_prompt.py
View file @
944b6831
...
@@ -34,10 +34,11 @@ Ensure the hint is clear, actionable, and appropriate for a **{level}-level** le
...
@@ -34,10 +34,11 @@ Ensure the hint is clear, actionable, and appropriate for a **{level}-level** le
Return your response in the following format:
Return your response in the following format:
### Hint
### Hint
[Your hint here. Include both algorithmic reasoning and actionable guidance.
Entirely in natural language
.]
[Your hint here. Include both algorithmic reasoning and actionable guidance.
Natural Language Only
.]
"""
"""
return
prompt
.
strip
()
return
prompt
.
strip
()
def
process_to_hint_prompt
(
chosen
,
rejected
,
level
):
def
process_to_hint_prompt
(
chosen
,
rejected
,
level
):
question
=
chosen
[
"messages"
][
0
][
"content"
]
question
=
chosen
[
"messages"
][
0
][
"content"
]
# question = "\n".join(question.strip().splitlines()[1:-1])
# question = "\n".join(question.strip().splitlines()[1:-1])
...
@@ -55,7 +56,8 @@ def process_to_hint_prompt(chosen, rejected, level):
...
@@ -55,7 +56,8 @@ def process_to_hint_prompt(chosen, rejected, level):
"messages"
:
messages
"messages"
:
messages
}
}
hint_pattern
=
re
.
compile
(
r"### hint\n(.*?)(?=\n###|$)"
,
re
.
DOTALL
)
hint_pattern
=
re
.
compile
(
r"### Hint\n(.*?)(?=\n###|$)"
,
re
.
DOTALL
)
def
postprocess_to_hint
(
llm_response
):
def
postprocess_to_hint
(
llm_response
):
messages
=
llm_response
.
pop
(
"messages"
)
messages
=
llm_response
.
pop
(
"messages"
)
response
=
messages
[
-
1
][
"content"
]
response
=
messages
[
-
1
][
"content"
]
...
@@ -113,7 +115,8 @@ def process_to_reason_prompt(item, hint):
...
@@ -113,7 +115,8 @@ def process_to_reason_prompt(item, hint):
"dataset"
:
item
[
"dataset"
],
"dataset"
:
item
[
"dataset"
],
"task_id"
:
item
[
"task_id"
],
"task_id"
:
item
[
"task_id"
],
"solution_id"
:
item
[
"solution_id"
],
"solution_id"
:
item
[
"solution_id"
],
"messages"
:
messages
"messages"
:
messages
,
"pass"
:
item
[
"pass"
]
}
}
...
@@ -121,10 +124,10 @@ def get_debug_prompt():
...
@@ -121,10 +124,10 @@ def get_debug_prompt():
return
"""
return
"""
Based on the analysis provided, please:
Based on the analysis provided, please:
1. **Draw a conclusion**: State whether the original code is correct or not by answering "Yes" or "No".
1. **Draw a conclusion**: State whether the original code is correct or not by answering "Yes" or "No".
- Format: `Conclusion: <Yes/No>`
- Format: `Conclusion: <Yes/No>`
2. **If the code is not correct**, provide the corrected code.
2. **If the code is not correct**, provide the corrected code.
---
---
...
@@ -149,20 +152,24 @@ def remove_hint(item):
...
@@ -149,20 +152,24 @@ def remove_hint(item):
def
extract_conclusion_and_code
(
response
):
def
extract_conclusion_and_code
(
response
):
# Extract conclusion
# Extract conclusion
conclusion_line
=
[
line
for
line
in
response
.
split
(
'
\n
'
)
if
line
.
startswith
(
'Conclusion:'
)][
0
]
if
'Conclusion:'
not
in
response
:
conclusion_str
=
conclusion_line
.
split
(
': '
)[
1
]
.
strip
()
.
lower
()
if
"yes"
in
conclusion_str
:
conclusion
=
True
elif
"no"
in
conclusion_str
:
conclusion
=
False
else
:
print
(
"llm doesn't draw to a conclusion"
)
conclusion
=
None
conclusion
=
None
print
(
"not found conclusion
\n
{}"
.
format
(
response
))
else
:
conclusion_line
=
[
line
for
line
in
response
.
split
(
'
\n
'
)
if
line
.
startswith
(
'Conclusion:'
)][
0
]
conclusion_str
=
conclusion_line
.
split
(
': '
)[
1
]
.
strip
()
.
lower
()
if
"yes"
in
conclusion_str
:
conclusion
=
True
elif
"no"
in
conclusion_str
:
conclusion
=
False
else
:
print
(
"llm doesn't draw to a conclusion
\n
{}"
.
format
(
response
))
conclusion
=
None
# Extract corrected code if conclusion is 'No'
# Extract corrected code if conclusion is 'No'
corrected_code
=
""
corrected_code
=
""
if
not
conclusion
:
if
not
conclusion
:
corrected_code
=
codelib
.
extract_code
(
response
)
corrected_code
=
codelib
.
extract_code
(
response
)
return
conclusion
,
corrected_code
return
conclusion
,
corrected_code
\ No newline at end of file
codecritic/dataset/genrm_prompt.py
View file @
944b6831
JUDGE_PROMPT
=
"Is the code correct (Yes/No)?"
JUDGE_PROMPT
=
"Is the
original
code correct (Yes/No)?"
JUDGE_MESSAGE
=
{
"role"
:
"user"
,
"content"
:
JUDGE_PROMPT
}
JUDGE_MESSAGE
=
{
"role"
:
"user"
,
"content"
:
JUDGE_PROMPT
}
JUDGE_TOEKNS
=
{
JUDGE_TOEKNS
=
{
"positive"
:
"Yes"
,
"positive"
:
"Yes"
,
...
...
scripts/algolr.sh
0 → 100644
View file @
944b6831
set
-xe
model
=
"/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project
=
"/lustre/S/nanziyuan/projects/ccc"
modelname
=
"qwen25_coder_inst"
trainset
=
"
${
project
}
/data/train/
${
modelname
}
-apps-train.jsonl"
testset
=
"
${
project
}
/data/test/
${
modelname
}
-apps-test.jsonl"
train_selected_pairs
=
"
${
project
}
/data/train/
${
modelname
}
-apps-train-selected_pairs.jsonl"
apps
=
"/lustre/S/nanziyuan/datasets/apps/"
sft
=
"
${
project
}
/data/train/
${
modelname
}
-sft.jsonl"
ftmodel
=
"
${
project
}
/model/algolr"
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
# python -m codecritic.cli.algolr \
# --model ${model} \
# --dataset ${trainset} \
# --pairinfo ${train_selected_pairs} \
# --apps ${apps} \
# --output ${sft} \
# --level beginner \
# --tp 1
deepspeed
--module
\
openrlhf.cli.train_sft
\
--max_len
4096
\
--dataset
${
sft
}
\
--input_key
question
\
--output_key
response
\
--apply_chat_template
\
--train_batch_size
32
\
--micro_train_batch_size
2
\
--max_samples
500000
\
--pretrain
${
model
}
\
--save_path
${
ftmodel
}
\
--save_steps
-1
\
--logging_steps
1
\
--eval_steps
-1
\
--zero_stage
2
\
--max_epochs
1
\
--bf16
\
--flash_attn
\
--learning_rate
5e-6
\
--load_checkpoint
\
--gradient_checkpointing
\
--use_tensorboard
${
ftmodel
}
/runs
scripts/gen_dataset.sh
View file @
944b6831
...
@@ -9,6 +9,9 @@ testset="${project}/data/test/${modelname}-apps-test.jsonl"
...
@@ -9,6 +9,9 @@ testset="${project}/data/test/${modelname}-apps-test.jsonl"
train_selected_pairs
=
"
${
project
}
/data/train/
${
modelname
}
-apps-train-selected_pairs.jsonl"
train_selected_pairs
=
"
${
project
}
/data/train/
${
modelname
}
-apps-train-selected_pairs.jsonl"
reward_ds
=
"
${
project
}
/data/train/
${
modelname
}
-apps-train-reward_dataset.jsonl"
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
export
CUDA_VISIBLE_DEVICES
=
0,1,2,3
## Sampling
## Sampling
...
@@ -44,3 +47,9 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
...
@@ -44,3 +47,9 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
python
-m
codecritic.cli.select_preference_pairs
\
python
-m
codecritic.cli.select_preference_pairs
\
--dataset
${
trainset
}
\
--dataset
${
trainset
}
\
--output
${
train_selected_pairs
}
--output
${
train_selected_pairs
}
python
-m
codecritic.cli.reformat
\
--dataset
${
trainset
}
\
--pairs
${
train_selected_pairs
}
\
--format
reward
\
--output
${
reward_ds
}
scripts/train_orm.sh
0 → 100644
View file @
944b6831
set
-xe
dataset
=
"/lustre/S/nanziyuan/projects/ccc/data/train/qwen25_coder_inst-apps-train-reward_dataset.jsonl"
model
=
"/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
ftmodel
=
"
${
project
}
/model/qwen25_coder_inst_7b-orm"
deepspeed
--module
\
openrlhf.cli.train_rm
\
--save_path
${
ftmodel
}
\
--save_steps
-1
\
--logging_steps
1
\
--eval_steps
-1
\
--train_batch_size
256
\
--micro_train_batch_size
1
\
--pretrain
${
model
}
\
--bf16
\
--max_epochs
1
\
--max_len
8192
\
--zero_stage
3
\
--learning_rate
9e-6
\
--dataset
${
dataset
}
\
--apply_chat_template
\
--prompt_key
messages
\
--chosen_key
chosen
\
--rejected_key
rejected
\
--flash_attn
\
--load_checkpoint
\
--gradient_checkpointing
\
--use_tensorboard
"
${
ftmodel
}
_log"
# start_server() {
# echo "Starting server..."
# CUDA_VISIBLE_DEVICES=0 \
# python -m openrlhf.cli.serve_rm \
# --reward_pretrain ${model} \
# --normalize_reward \
# --port 5000 \
# --bf16 \
# --max_len 8192 &
# SERVER_PID=$!
# echo "Server started with PID: $SERVER_PID"
# }
# # Function to start the client
# start_client() {
# echo "Starting client..."
# python -m codecritic.cli.run_rm_test \
# --model ${model} \
# --test "${datasets}/sample/min_test.jsonl" \
# --apps /lustre/S/nanziyuan/datasets/apps/
# CLIENT_EXIT_CODE=$?
# echo "Client finished with exit code: $CLIENT_EXIT_CODE"
# }
# # Function to stop the server
# stop_server() {
# echo "Stopping server..."
# kill -SIGINT $SERVER_PID
# wait $SERVER_PID 2>/dev/null
# echo "Server stopped."
# }
# start_server
# # Give the server some time to initialize (optional)
# sleep 60
# start_client
# stop_server
# echo "Execution complete."
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment