fix bugs of algolr

944b6831 · nanziyuan · 999362c5 · 944b6831 · 944b6831 · 944b6831
Commit 944b6831 authored Jan 07, 2025 by nanziyuan
8 changed files
--- a/codecritic/cli/algolr.py
+++ b/codecritic/cli/algolr.py
@@ -10,6 +10,7 @@ from codecritic.utils.inference import generate_worker
 from codecritic.utils.json import load_jsonl, save_jsonl
 import codecritic.evaluation.apps_eval as evaluation
 import codecritic.dataset.algolr_prompt as promptlib
+from codecritic.dataset.genrm_prompt import JUDGE_MESSAGE, mk_judge_response
 if __name__ == "__main__":
@@ -19,7 +20,7 @@ if __name__ == "__main__":
    parser.add_argument("--pairinfo", type=str, help="path/to/pairinfo")
    parser.add_argument("--apps", type=str, help="path/to/apps")
    parser.add_argument("--output", type=str, help="path/to/score")
-    parser.add_argument("--hint_level", type=str, choices=["beginner"])
+    parser.add_argument("--level", type=str, choices=["beginner"])
    parser.add_argument(
        "--tp", type=int, default=1, help="tensor parallel"
    )
@@ -37,7 +38,7 @@ if __name__ == "__main__":
    hint_prompts = []
    for pair in pairinfo:
        task_id, chosen_id, rejected_id = pair["task_id"], pair["chosen"], pair["rejected"]
-        chosen, rejected = ds[task_id][chosen_id], ds[task_id][chosen_id]
+        chosen, rejected = ds[task_id][chosen_id], ds[task_id][rejected_id]
        prompt = promptlib.process_to_hint_prompt(chosen, rejected, args.level)
        hint_prompts.append(prompt)
@@ -64,7 +65,7 @@ if __name__ == "__main__":
    reason_prompts = []
    for pair in pairinfo:
        task_id, chosen_id, rejected_id = pair["task_id"], pair["chosen"], pair["rejected"]
-        chosen, rejected = ds[task_id][chosen_id], ds[task_id][chosen_id]
+        chosen, rejected = ds[task_id][chosen_id], ds[task_id][rejected_id]
        CORRECT_HINT = "The code is correct."
        # chosen
@@ -124,34 +125,46 @@ if __name__ == "__main__":
            item["code"] = code
            verify_passed.append(item)
-    print("verify passed (judgement consistent) size: {}".format(len(verify_passed)))
    incorrects, corrects = [], []
    for item in verify_passed:
+        item["meta_old_pass"] = item["pass"]
        if not item["pass"]:
            incorrects.append(item)
        else:
            corrects.append(item)
+    print("verify passed (judgement consistent) size: {}".format(len(verify_passed)))
+    print("Corrects (judgement consistent) size: {}".format(len(corrects)))
+    print("Incorrects (judgement consistent) size: {}".format(len(incorrects)))
    # need a list of dict {"task_id": str, "solution_id": str(unique index), "code": ...}
    apps = load_dataset(args.apps)
    fixed_incorrects = evaluation.evaluate(incorrects, apps)
+    # print(fixed_incorrects[0])
+    incorrects = [x for x in fixed_incorrects if x["pass"]]
-    # filter that code is not correct.
+    verify_passed = incorrects + corrects
-    verify_passed = [x for x in fixed_incorrects if x["pass"]] + corrects
+    print("verify passed (execution consistent) size: {}".format(len(verify_passed)))
-    print("verify passed (judgement consistent) size: {}".format(len(verify_passed)))
+    print("Corrects (execution consistent) size: {}".format(len(corrects)))
+    print("Incorrects (execution consistent) size: {}".format(len(incorrects)))
    # Step4 Remove hints and Reformat to a SFT dataset
    # extract reasoning sets
    sft = []
    for item in verify_passed:
+        if item["meta_old_pass"]:
+            judge_response = mk_judge_response("positive")
+        else:
+            judge_response = mk_judge_response("negative")
+        response = [item["messages"][1], JUDGE_MESSAGE, judge_response]
        line = {
            "dataset": item["dataset"],
            "task_id": item["task_id"],
            "solution_id": item["solution_id"],
            "question": item["messages"][:1],
-            "response": item["messages"][1:2],
+            "response": response,
        }
        sft.append(line)

--- a/codecritic/cli/reformat.py
+++ b/codecritic/cli/reformat.py
 import argparse
-from itertools import product, chain
+from collections import defaultdict
+from itertools import chain
 from codecritic.utils.json import load_jsonl, save_jsonl
@@ -43,10 +44,15 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset", type=str, help="path/to/dataset")
    parser.add_argument("--pairs", type=str, help="path/to/selected_pairs")
+    parser.add_argument("--format", type=str, choices=["reward"])
    parser.add_argument("--output", type=str, help="path/to/output")
    args = parser.parse_args()
-    dataset = load_jsonl(args.dataset)
+    dataset_jsonl = load_jsonl(args.dataset)
+    dataset = defaultdict(dict)
+    for item in dataset_jsonl:
+        dataset[item["task_id"]][item["solution_id"]] = item
    selected_pairs = load_jsonl(args.pairs)
    if args.format == "sft":

--- a/codecritic/cli/select_preference_pairs.py
+++ b/codecritic/cli/select_preference_pairs.py
@@ -84,7 +84,7 @@ if __name__ == "__main__":
    selected_pairs = []
    for task, items in task_groups.items():
-        sorted_items = sorted(items, key=lambda x: x["similarity"], reverse=True)[:4]
+        sorted_items = sorted(items, key=lambda x: x["similarity"], reverse=True)[:2]
        selected_pairs.extend(sorted_items)
    save_jsonl(selected_pairs, args.output)
--- a/codecritic/dataset/algolr_prompt.py
+++ b/codecritic/dataset/algolr_prompt.py
@@ -34,10 +34,11 @@ Ensure the hint is clear, actionable, and appropriate for a **{level}-level** le
 Return your response in the following format:
 ### Hint
-[Your hint here. Include both algorithmic reasoning and actionable guidance. Entirely in natural language.]
+[Your hint here. Include both algorithmic reasoning and actionable guidance. Natural Language Only.]
 """
    return prompt.strip()
 def process_to_hint_prompt(chosen, rejected, level):
    question = chosen["messages"][0]["content"]
    # question = "\n".join(question.strip().splitlines()[1:-1])
@@ -55,7 +56,8 @@ def process_to_hint_prompt(chosen, rejected, level):
        "messages": messages
    }
-hint_pattern = re.compile(r"### hint\n(.*?)(?=\n###|$)", re.DOTALL)
+hint_pattern = re.compile(r"### Hint\n(.*?)(?=\n###|$)", re.DOTALL)
 def postprocess_to_hint(llm_response):
    messages = llm_response.pop("messages")
    response = messages[-1]["content"]
@@ -113,7 +115,8 @@ def process_to_reason_prompt(item, hint):
        "dataset": item["dataset"],
        "task_id": item["task_id"],
        "solution_id": item["solution_id"],
-        "messages": messages
+        "messages": messages,
+        "pass": item["pass"]
    }
@@ -121,10 +124,10 @@ def get_debug_prompt():
    return """
 Based on the analysis provided, please:
-1. **Draw a conclusion**: State whether the original code is correct or not by answering "Yes" or "No".  
+1. **Draw a conclusion**: State whether the original code is correct or not by answering "Yes" or "No".
   - Format: `Conclusion: <Yes/No>`
-2. **If the code is not correct**, provide the corrected code.  
+2. **If the code is not correct**, provide the corrected code.
 ---
@@ -149,20 +152,24 @@ def remove_hint(item):
 def extract_conclusion_and_code(response):
    # Extract conclusion
-    conclusion_line = [line for line in response.split('\n') if line.startswith('Conclusion:')][0]
+    if 'Conclusion:' not in response:
-    conclusion_str = conclusion_line.split(': ')[1].strip().lower()
-    if "yes" in conclusion_str:
-        conclusion = True
-    elif "no" in conclusion_str:
-        conclusion = False
-    else:
-        print("llm doesn't draw to a conclusion")
        conclusion = None
+        print("not found conclusion\n{}".format(response))
+    else:
+        conclusion_line = [line for line in response.split('\n') if line.startswith('Conclusion:')][0]
+        conclusion_str = conclusion_line.split(': ')[1].strip().lower()
+        if "yes" in conclusion_str:
+            conclusion = True
+        elif "no" in conclusion_str:
+            conclusion = False
+        else:
+            print("llm doesn't draw to a conclusion\n{}".format(response))
+            conclusion = None
    # Extract corrected code if conclusion is 'No'
    corrected_code = ""
    if not conclusion:
        corrected_code = codelib.extract_code(response)
    return conclusion, corrected_code
\ No newline at end of file
--- a/codecritic/dataset/genrm_prompt.py
+++ b/codecritic/dataset/genrm_prompt.py
-JUDGE_PROMPT = "Is the code correct (Yes/No)?"
+JUDGE_PROMPT = "Is the original code correct (Yes/No)?"
 JUDGE_MESSAGE = {"role": "user", "content": JUDGE_PROMPT}
 JUDGE_TOEKNS = {
    "positive": "Yes",

--- a/scripts/algolr.sh
+++ b/scripts/algolr.sh
+set -xe
+model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
+project="/lustre/S/nanziyuan/projects/ccc"
+modelname="qwen25_coder_inst"
+trainset="${project}/data/train/${modelname}-apps-train.jsonl"
+testset="${project}/data/test/${modelname}-apps-test.jsonl"
+train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"
+apps="/lustre/S/nanziyuan/datasets/apps/"
+sft="${project}/data/train/${modelname}-sft.jsonl"
+ftmodel="${project}/model/algolr"
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+# python -m codecritic.cli.algolr \
+#        --model ${model} \
+#        --dataset ${trainset} \
+#        --pairinfo ${train_selected_pairs} \
+#        --apps ${apps} \
+#        --output ${sft} \
+#        --level beginner \
+#        --tp 1
+deepspeed --module \
+openrlhf.cli.train_sft \
+   --max_len 4096 \
+   --dataset ${sft} \
+   --input_key question \
+   --output_key response \
+   --apply_chat_template \
+   --train_batch_size 32 \
+   --micro_train_batch_size 2 \
+   --max_samples 500000 \
+   --pretrain ${model} \
+   --save_path ${ftmodel} \
+   --save_steps -1 \
+   --logging_steps 1 \
+   --eval_steps -1 \
+   --zero_stage 2 \
+   --max_epochs 1 \
+   --bf16 \
+   --flash_attn \
+   --learning_rate 5e-6 \
+   --load_checkpoint \
+   --gradient_checkpointing \
+   --use_tensorboard ${ftmodel}/runs
--- a/scripts/gen_dataset.sh
+++ b/scripts/gen_dataset.sh
@@ -9,6 +9,9 @@ testset="${project}/data/test/${modelname}-apps-test.jsonl"
 train_selected_pairs="${project}/data/train/${modelname}-apps-train-selected_pairs.jsonl"
+reward_ds="${project}/data/train/${modelname}-apps-train-reward_dataset.jsonl"
 export CUDA_VISIBLE_DEVICES=0,1,2,3
 ## Sampling
@@ -44,3 +47,9 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3
 python -m codecritic.cli.select_preference_pairs \
       --dataset ${trainset} \
       --output ${train_selected_pairs}
+python -m codecritic.cli.reformat \
+       --dataset ${trainset} \
+       --pairs ${train_selected_pairs} \
+       --format reward \
+       --output ${reward_ds}
--- a/scripts/train_orm.sh
+++ b/scripts/train_orm.sh
+set -xe
+dataset="/lustre/S/nanziyuan/projects/ccc/data/train/qwen25_coder_inst-apps-train-reward_dataset.jsonl"
+model="/lustre/S/huangdi/open_for_out/models/Qwen2.5-Coder-7B-Instruct/"
+ftmodel="${project}/model/qwen25_coder_inst_7b-orm"
+deepspeed --module \
+openrlhf.cli.train_rm \
+   --save_path ${ftmodel} \
+   --save_steps -1 \
+   --logging_steps 1 \
+   --eval_steps -1 \
+   --train_batch_size 256 \
+   --micro_train_batch_size 1 \
+   --pretrain ${model} \
+   --bf16 \
+   --max_epochs 1 \
+   --max_len 8192 \
+   --zero_stage 3 \
+   --learning_rate 9e-6 \
+   --dataset  ${dataset} \
+   --apply_chat_template \
+   --prompt_key messages \
+   --chosen_key chosen \
+   --rejected_key rejected \
+   --flash_attn \
+   --load_checkpoint \
+   --gradient_checkpointing \
+   --use_tensorboard "${ftmodel}_log"
+# start_server() {
+#     echo "Starting server..."
+#     CUDA_VISIBLE_DEVICES=0 \
+#         python -m openrlhf.cli.serve_rm \
+#         --reward_pretrain ${model} \
+#         --normalize_reward \
+#         --port 5000 \
+#         --bf16 \
+#         --max_len 8192 &
+#     SERVER_PID=$!
+#     echo "Server started with PID: $SERVER_PID"
+# }
+# # Function to start the client
+# start_client() {
+#     echo "Starting client..."
+#     python -m codecritic.cli.run_rm_test \
+#            --model ${model} \
+#            --test "${datasets}/sample/min_test.jsonl" \
+#            --apps /lustre/S/nanziyuan/datasets/apps/
+#     CLIENT_EXIT_CODE=$?
+#     echo "Client finished with exit code: $CLIENT_EXIT_CODE"
+# }
+# # Function to stop the server
+# stop_server() {
+#     echo "Stopping server..."
+#     kill -SIGINT $SERVER_PID
+#     wait $SERVER_PID 2>/dev/null
+#     echo "Server stopped."
+# }
+# start_server
+# # Give the server some time to initialize (optional)
+# sleep 60
+# start_client
+# stop_server
+# echo "Execution complete."