Commit 944b6831 by nanziyuan

fix bugs of algolr

parent 999362c5
...@@ -10,6 +10,7 @@ from codecritic.utils.inference import generate_worker ...@@ -10,6 +10,7 @@ from codecritic.utils.inference import generate_worker
from codecritic.utils.json import load_jsonl, save_jsonl from codecritic.utils.json import load_jsonl, save_jsonl
import codecritic.evaluation.apps_eval as evaluation import codecritic.evaluation.apps_eval as evaluation
import codecritic.dataset.algolr_prompt as promptlib import codecritic.dataset.algolr_prompt as promptlib
from codecritic.dataset.genrm_prompt import JUDGE_MESSAGE, mk_judge_response
if __name__ == "__main__": if __name__ == "__main__":
...@@ -19,7 +20,7 @@ if __name__ == "__main__": ...@@ -19,7 +20,7 @@ if __name__ == "__main__":
parser.add_argument("--pairinfo", type=str, help="path/to/pairinfo") parser.add_argument("--pairinfo", type=str, help="path/to/pairinfo")
parser.add_argument("--apps", type=str, help="path/to/apps") parser.add_argument("--apps", type=str, help="path/to/apps")
parser.add_argument("--output", type=str, help="path/to/score") parser.add_argument("--output", type=str, help="path/to/score")
parser.add_argument("--hint_level", type=str, choices=["beginner"]) parser.add_argument("--level", type=str, choices=["beginner"])
parser.add_argument( parser.add_argument(
"--tp", type=int, default=1, help="tensor parallel" "--tp", type=int, default=1, help="tensor parallel"
) )
...@@ -37,7 +38,7 @@ if __name__ == "__main__": ...@@ -37,7 +38,7 @@ if __name__ == "__main__":
hint_prompts = [] hint_prompts = []
for pair in pairinfo: for pair in pairinfo:
task_id, chosen_id, rejected_id = pair["task_id"], pair["chosen"], pair["rejected"] task_id, chosen_id, rejected_id = pair["task_id"], pair["chosen"], pair["rejected"]
chosen, rejected = ds[task_id][chosen_id], ds[task_id][chosen_id] chosen, rejected = ds[task_id][chosen_id], ds[task_id][rejected_id]
prompt = promptlib.process_to_hint_prompt(chosen, rejected, args.level) prompt = promptlib.process_to_hint_prompt(chosen, rejected, args.level)
hint_prompts.append(prompt) hint_prompts.append(prompt)
...@@ -64,7 +65,7 @@ if __name__ == "__main__": ...@@ -64,7 +65,7 @@ if __name__ == "__main__":
reason_prompts = [] reason_prompts = []
for pair in pairinfo: for pair in pairinfo:
task_id, chosen_id, rejected_id = pair["task_id"], pair["chosen"], pair["rejected"] task_id, chosen_id, rejected_id = pair["task_id"], pair["chosen"], pair["rejected"]
chosen, rejected = ds[task_id][chosen_id], ds[task_id][chosen_id] chosen, rejected = ds[task_id][chosen_id], ds[task_id][rejected_id]
CORRECT_HINT = "The code is correct." CORRECT_HINT = "The code is correct."
# chosen # chosen
...@@ -124,34 +125,46 @@ if __name__ == "__main__": ...@@ -124,34 +125,46 @@ if __name__ == "__main__":
item["code"] = code item["code"] = code
verify_passed.append(item) verify_passed.append(item)
print("verify passed (judgement consistent) size: {}".format(len(verify_passed)))
incorrects, corrects = [], [] incorrects, corrects = [], []
for item in verify_passed: for item in verify_passed:
item["meta_old_pass"] = item["pass"]
if not item["pass"]: if not item["pass"]:
incorrects.append(item) incorrects.append(item)
else: else:
corrects.append(item) corrects.append(item)
print("verify passed (judgement consistent) size: {}".format(len(verify_passed)))
print("Corrects (judgement consistent) size: {}".format(len(corrects)))
print("Incorrects (judgement consistent) size: {}".format(len(incorrects)))
# need a list of dict {"task_id": str, "solution_id": str(unique index), "code": ...} # need a list of dict {"task_id": str, "solution_id": str(unique index), "code": ...}
apps = load_dataset(args.apps) apps = load_dataset(args.apps)
fixed_incorrects = evaluation.evaluate(incorrects, apps) fixed_incorrects = evaluation.evaluate(incorrects, apps)
# print(fixed_incorrects[0])
incorrects = [x for x in fixed_incorrects if x["pass"]]
# filter that code is not correct. verify_passed = incorrects + corrects
verify_passed = [x for x in fixed_incorrects if x["pass"]] + corrects print("verify passed (execution consistent) size: {}".format(len(verify_passed)))
print("verify passed (judgement consistent) size: {}".format(len(verify_passed))) print("Corrects (execution consistent) size: {}".format(len(corrects)))
print("Incorrects (execution consistent) size: {}".format(len(incorrects)))
# Step4 Remove hints and Reformat to a SFT dataset # Step4 Remove hints and Reformat to a SFT dataset
# extract reasoning sets # extract reasoning sets
sft = [] sft = []
for item in verify_passed: for item in verify_passed:
if item["meta_old_pass"]:
judge_response = mk_judge_response("positive")
else:
judge_response = mk_judge_response("negative")
response = [item["messages"][1], JUDGE_MESSAGE, judge_response]
line = { line = {
"dataset": item["dataset"], "dataset": item["dataset"],
"task_id": item["task_id"], "task_id": item["task_id"],
"solution_id": item["solution_id"], "solution_id": item["solution_id"],
"question": item["messages"][:1], "question": item["messages"][:1],
"response": item["messages"][1:2], "response": response,
} }
sft.append(line) sft.append(line)
......
import argparse import argparse
from itertools import product, chain from collections import defaultdict
from itertools import chain
from codecritic.utils.json import load_jsonl, save_jsonl from codecritic.utils.json import load_jsonl, save_jsonl
...@@ -43,10 +44,15 @@ if __name__ == "__main__": ...@@ -43,10 +44,15 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, help="path/to/dataset") parser.add_argument("--dataset", type=str, help="path/to/dataset")
parser.add_argument("--pairs", type=str, help="path/to/selected_pairs") parser.add_argument("--pairs", type=str, help="path/to/selected_pairs")
parser.add_argument("--format", type=str, choices=["reward"])
parser.add_argument("--output", type=str, help="path/to/output") parser.add_argument("--output", type=str, help="path/to/output")
args = parser.parse_args() args = parser.parse_args()
dataset = load_jsonl(args.dataset) dataset_jsonl = load_jsonl(args.dataset)
dataset = defaultdict(dict)
for item in dataset_jsonl:
dataset[item["task_id"]][item["solution_id"]] = item
selected_pairs = load_jsonl(args.pairs) selected_pairs = load_jsonl(args.pairs)
if args.format == "sft": if args.format == "sft":
......
...@@ -84,7 +84,7 @@ if __name__ == "__main__": ...@@ -84,7 +84,7 @@ if __name__ == "__main__":
selected_pairs = [] selected_pairs = []
for task, items in task_groups.items(): for task, items in task_groups.items():
sorted_items = sorted(items, key=lambda x: x["similarity"], reverse=True)[:4] sorted_items = sorted(items, key=lambda x: x["similarity"], reverse=True)[:2]
selected_pairs.extend(sorted_items) selected_pairs.extend(sorted_items)
save_jsonl(selected_pairs, args.output) save_jsonl(selected_pairs, args.output)
...@@ -34,10 +34,11 @@ Ensure the hint is clear, actionable, and appropriate for a **{level}-level** le ...@@ -34,10 +34,11 @@ Ensure the hint is clear, actionable, and appropriate for a **{level}-level** le
Return your response in the following format: Return your response in the following format:
### Hint ### Hint
[Your hint here. Include both algorithmic reasoning and actionable guidance. Entirely in natural language.] [Your hint here. Include both algorithmic reasoning and actionable guidance. Natural Language Only.]
""" """
return prompt.strip() return prompt.strip()
def process_to_hint_prompt(chosen, rejected, level): def process_to_hint_prompt(chosen, rejected, level):
question = chosen["messages"][0]["content"] question = chosen["messages"][0]["content"]
# question = "\n".join(question.strip().splitlines()[1:-1]) # question = "\n".join(question.strip().splitlines()[1:-1])
...@@ -55,7 +56,8 @@ def process_to_hint_prompt(chosen, rejected, level): ...@@ -55,7 +56,8 @@ def process_to_hint_prompt(chosen, rejected, level):
"messages": messages "messages": messages
} }
hint_pattern = re.compile(r"### hint\n(.*?)(?=\n###|$)", re.DOTALL)
hint_pattern = re.compile(r"### Hint\n(.*?)(?=\n###|$)", re.DOTALL)
def postprocess_to_hint(llm_response): def postprocess_to_hint(llm_response):
messages = llm_response.pop("messages") messages = llm_response.pop("messages")
response = messages[-1]["content"] response = messages[-1]["content"]
...@@ -113,7 +115,8 @@ def process_to_reason_prompt(item, hint): ...@@ -113,7 +115,8 @@ def process_to_reason_prompt(item, hint):
"dataset": item["dataset"], "dataset": item["dataset"],
"task_id": item["task_id"], "task_id": item["task_id"],
"solution_id": item["solution_id"], "solution_id": item["solution_id"],
"messages": messages "messages": messages,
"pass": item["pass"]
} }
...@@ -121,10 +124,10 @@ def get_debug_prompt(): ...@@ -121,10 +124,10 @@ def get_debug_prompt():
return """ return """
Based on the analysis provided, please: Based on the analysis provided, please:
1. **Draw a conclusion**: State whether the original code is correct or not by answering "Yes" or "No". 1. **Draw a conclusion**: State whether the original code is correct or not by answering "Yes" or "No".
- Format: `Conclusion: <Yes/No>` - Format: `Conclusion: <Yes/No>`
2. **If the code is not correct**, provide the corrected code. 2. **If the code is not correct**, provide the corrected code.
--- ---
...@@ -149,20 +152,24 @@ def remove_hint(item): ...@@ -149,20 +152,24 @@ def remove_hint(item):
def extract_conclusion_and_code(response): def extract_conclusion_and_code(response):
# Extract conclusion # Extract conclusion
conclusion_line = [line for line in response.split('\n') if line.startswith('Conclusion:')][0] if 'Conclusion:' not in response:
conclusion_str = conclusion_line.split(': ')[1].strip().lower()
if "yes" in conclusion_str:
conclusion = True
elif "no" in conclusion_str:
conclusion = False
else:
print("llm doesn't draw to a conclusion")
conclusion = None conclusion = None
print("not found conclusion\n{}".format(response))
else:
conclusion_line = [line for line in response.split('\n') if line.startswith('Conclusion:')][0]
conclusion_str = conclusion_line.split(': ')[1].strip().lower()
if "yes" in conclusion_str:
conclusion = True
elif "no" in conclusion_str:
conclusion = False
else:
print("llm doesn't draw to a conclusion\n{}".format(response))
conclusion = None
# Extract corrected code if conclusion is 'No' # Extract corrected code if conclusion is 'No'
corrected_code = "" corrected_code = ""
if not conclusion: if not conclusion:
corrected_code = codelib.extract_code(response) corrected_code = codelib.extract_code(response)
return conclusion, corrected_code return conclusion, corrected_code
\ No newline at end of file
JUDGE_PROMPT = "Is the code correct (Yes/No)?" JUDGE_PROMPT = "Is the original code correct (Yes/No)?"
JUDGE_MESSAGE = {"role": "user", "content": JUDGE_PROMPT} JUDGE_MESSAGE = {"role": "user", "content": JUDGE_PROMPT}
JUDGE_TOEKNS = { JUDGE_TOEKNS = {
"positive": "Yes", "positive": "Yes",
......
set -xe
model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
project="/lustre/S/nanziyuan/projects/ccc"
modelname="qwen25_coder_inst"
trainset="${project}/data/train/${modelname}-apps-train.jsonl"
testset="${project}/data/test/${modelname}-apps-test.jsonl"
train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"
apps="/lustre/S/nanziyuan/datasets/apps/"
sft="${project}/data/train/${modelname}-sft.jsonl"
ftmodel="${project}/model/algolr"
export CUDA_VISIBLE_DEVICES=0,1,2,3
# python -m codecritic.cli.algolr \
# --model ${model} \
# --dataset ${trainset} \
# --pairinfo ${train_selected_pairs} \
# --apps ${apps} \
# --output ${sft} \
# --level beginner \
# --tp 1
deepspeed --module \
openrlhf.cli.train_sft \
--max_len 4096 \
--dataset ${sft} \
--input_key question \
--output_key response \
--apply_chat_template \
--train_batch_size 32 \
--micro_train_batch_size 2 \
--max_samples 500000 \
--pretrain ${model} \
--save_path ${ftmodel} \
--save_steps -1 \
--logging_steps 1 \
--eval_steps -1 \
--zero_stage 2 \
--max_epochs 1 \
--bf16 \
--flash_attn \
--learning_rate 5e-6 \
--load_checkpoint \
--gradient_checkpointing \
--use_tensorboard ${ftmodel}/runs
...@@ -9,6 +9,9 @@ testset="${project}/data/test/${modelname}-apps-test.jsonl" ...@@ -9,6 +9,9 @@ testset="${project}/data/test/${modelname}-apps-test.jsonl"
train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl" train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"
reward_ds="${project}/data/train/${modelname}-apps-train-reward_dataset.jsonl"
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
## Sampling ## Sampling
...@@ -44,3 +47,9 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 ...@@ -44,3 +47,9 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m codecritic.cli.select_preference_pairs \ python -m codecritic.cli.select_preference_pairs \
--dataset ${trainset} \ --dataset ${trainset} \
--output ${train_selected_pairs} --output ${train_selected_pairs}
python -m codecritic.cli.reformat \
--dataset ${trainset} \
--pairs ${train_selected_pairs} \
--format reward \
--output ${reward_ds}
set -xe
dataset="/lustre/S/nanziyuan/projects/ccc/data/train/qwen25_coder_inst-apps-train-reward_dataset.jsonl"
model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
ftmodel="${project}/model/qwen25_coder_inst_7b-orm"
deepspeed --module \
openrlhf.cli.train_rm \
--save_path ${ftmodel} \
--save_steps -1 \
--logging_steps 1 \
--eval_steps -1 \
--train_batch_size 256 \
--micro_train_batch_size 1 \
--pretrain ${model} \
--bf16 \
--max_epochs 1 \
--max_len 8192 \
--zero_stage 3 \
--learning_rate 9e-6 \
--dataset ${dataset} \
--apply_chat_template \
--prompt_key messages \
--chosen_key chosen \
--rejected_key rejected \
--flash_attn \
--load_checkpoint \
--gradient_checkpointing \
--use_tensorboard "${ftmodel}_log"
# start_server() {
# echo "Starting server..."
# CUDA_VISIBLE_DEVICES=0 \
# python -m openrlhf.cli.serve_rm \
# --reward_pretrain ${model} \
# --normalize_reward \
# --port 5000 \
# --bf16 \
# --max_len 8192 &
# SERVER_PID=$!
# echo "Server started with PID: $SERVER_PID"
# }
# # Function to start the client
# start_client() {
# echo "Starting client..."
# python -m codecritic.cli.run_rm_test \
# --model ${model} \
# --test "${datasets}/sample/min_test.jsonl" \
# --apps /lustre/S/nanziyuan/datasets/apps/
# CLIENT_EXIT_CODE=$?
# echo "Client finished with exit code: $CLIENT_EXIT_CODE"
# }
# # Function to stop the server
# stop_server() {
# echo "Stopping server..."
# kill -SIGINT $SERVER_PID
# wait $SERVER_PID 2>/dev/null
# echo "Server stopped."
# }
# start_server
# # Give the server some time to initialize (optional)
# sleep 60
# start_client
# stop_server
# echo "Execution complete."
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment