add star

b65ddb76 · nanziyuan · d11a2acf · b65ddb76 · b65ddb76
Commit b65ddb76 authored Dec 28, 2024 by nanziyuan
Show whitespace changes
Inline Side-by-side

Showing with 242 additions and 0 deletions

codecritic/cli/star.py
+144 -0

scripts/run.slurm
+98 -0

No files found.
--- a/codecritic/cli/star.py
+++ b/codecritic/cli/star.py
+import argparse
+import random
+import os
+
+from transformers import AutoTokenizer
+
+import codecritic.data.cov as cov
+import codecritic.data.verify as verify
+import codecritic.utils.json as ijson
+from codecritic.utils.vllm import vllm_chatcomplete, vllm_score
+
+
+def preprocess_sft(item, idx):
+    messages = item["question"][:2]
+    messages.append({"role": "user", "content": cov.COV_PROMPT})
+
+    result = item["response"][0]["content"]
+    if result == "Yes":
+        eval_result = True
+    elif result == "No":
+        eval_result = False
+    else:
+        raise ValueError(f"Unknown critic token: {result}")
+
+    return {"messages": messages, "eval_result": eval_result, "index": idx}
+
+
+def is_reward_right(item):
+    score_bool = item["score"] > 0.5
+    eval_result = item["eval_result"]
+    return score_bool == eval_result
+
+
+def transform_to_ifdata(item):
+    question = item["messages"][:3]
+    response = [item["messages"][3]] + verify.mk_critic_verify(item["eval_result"])
+
+    return {
+        "question": question,
+        "response": response,
+        "eval_result": item["eval_result"],
+    }
+
+
+def run_STaR(model_path, dataset, outdir, model_gpu):
+    # step0 preprocess sft dataset & append prompt
+    dataset_size = len(dataset)
+    dataset = [preprocess_sft(x, idx) for idx, x in enumerate(dataset)]
+
+    # step1 generate Rationale
+    sampling_params = dict(n=10, temperature=0.6, max_tokens=2048)
+    dataset = vllm_chatcomplete(model_path, dataset, sampling_params, model_gpu)
+    print("reasoning finished")
+
+    # step2 score
+    for item in dataset:
+        item["messages"].append({"role": "user", "content": verify.JUDGE_PROMPT})
+
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    score_token = verify.get_score_token_id(tokenizer)
+
+    dataset = vllm_score(model_path, dataset, score_token, model_gpu)
+
+    # step3 filter consistent results
+    dataset = list(filter(is_reward_right, dataset))
+    print("number of consistent result:", len(dataset))
+    ijson.save_jsonl(dataset, outdir + "raw.jsonl")
+
+    # step4 select 2 problem for each problem
+    groups = {}
+    for item in dataset:
+        idx = item["index"]
+        if idx not in groups:
+            groups[idx] = []
+        groups[idx].append(item)
+
+    max_corrects, remain_corrects = [], []
+    min_incorrects, remain_incorrects = [], []
+
+    for idx, lst in groups.items():
+        correct_items = [item for item in lst if item["eval_result"]]
+        incorrect_items = [item for item in lst if not item["eval_result"]]
+
+        if correct_items:
+            max_score = float('-inf')  # Initialize with negative infinity
+            max_index = -1
+            for i, item in enumerate(correct_items):
+                if item["score"] > max_score:
+                    max_score = item["score"]
+                    max_index = i
+
+            max_corrects.append(correct_items.pop(max_index))
+
+            remain_corrects.extend(correct_items)
+
+        if incorrect_items:
+            min_score = float('inf')
+            min_index = -1
+            for i, item in enumerate(incorrect_items):
+                if item["score"] < min_score:
+                    min_score = item["score"]
+                    min_index = i
+
+            min_incorrects.append(incorrect_items.pop(min_index))
+            remain_incorrects.extend(incorrect_items)
+
+    target = dataset_size // 2
+    print("target size of correct sample:", target)
+    sorted_remain_correct = sorted(remain_corrects, key=lambda x: x["score"], reverse=True)
+    sorted_remain_incorrect = sorted(remain_incorrects, key=lambda x: x["score"])
+
+    lack_correct = target - len(max_corrects)
+    lack_incorrect = target - len(min_incorrects)
+    print("lack of correct", lack_correct, "lack of incorrect", lack_incorrect)
+
+    max_corrects += sorted_remain_correct[:lack_correct]
+    min_incorrects += sorted_remain_incorrect[:lack_incorrect]
+
+    train_number = int(target * 0.98)
+    train_dataset = max_corrects[:train_number] + min_incorrects[:train_number]
+    test_dataset = max_corrects[train_number:] + min_incorrects[train_number:]
+
+    train_dataset = [transform_to_ifdata(x) for x in train_dataset]
+    test_dataset = [transform_to_ifdata(x) for x in test_dataset]
+    random.shuffle(train_dataset)
+    random.shuffle(test_dataset)
+    print("train dataset size:", len(train_dataset))
+    print("test dataset size:", len(test_dataset))
+    ijson.save_jsonl(train_dataset, outdir + "train.jsonl")
+    ijson.save_jsonl(test_dataset, outdir + "test.jsonl")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str)
+    parser.add_argument("--sft", type=str)
+    parser.add_argument("--outdir", type=str)
+    parser.add_argument("--gpu", type=int, default=1, help="number of gpus 1 model required")
+
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    args = parser.parse_args()
+
+    sft_dataset = ijson.load_jsonl(args.sft)
+    run_STaR(args.model, sft_dataset, args.outdir, args.gpu)
--- a/scripts/run.slurm
+++ b/scripts/run.slurm
+#!/bin/bash
+
+#- Job parameters
+
+# Please modify job name
+
+#SBATCH -J cc              # The job name
+#SBATCH -o cc-%j.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e cc-%j.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# Please modify your requirements
+
+#SBATCH -p r8nv-gpu-hw-80g           # Submit to 'r8nv-gpu-hw' Partitiion
+#SBATCH -t 0-12:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:4                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal             # Request QOS Type
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=r8a30-a0          # Request a specific list of hosts 
+### #SBATCH --constraint="A30|A100"      # Request GPU Type: A30 or A100_40GB
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+echo "$(df -h | grep -v tmpfs)"
+
+#- Important setting!!!
+##  otherwise it will cause an error of insufficient RDMA resources:
+ulimit -l unlimited
+##  otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
+ulimit -v unlimited
+
+#- Load environments
+source /tools/module_env.sh
+module list                       # list modules loaded
+
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+module load gcc/9.3.0
+module load cuda-cudnn/12.1-8.9.3
+
+##- virtualenv
+source "/workspace/S/nanziyuan/miniconda3/etc/profile.d/conda.sh"
+conda activate openrlhf
+
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+#- Other
+
+cluster-quota                    # nas quota
+
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+
+#- WARNING! DO NOT MODIFY your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Using GPU(s) ${CUDA_VISIBLE_DEVICES}"                         # which GPUs
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+echo "This job is assigned the following resources by SLURM:"
+scontrol show jobid $SLURM_JOB_ID -dd | awk '/IDX/ {print $2, $4}'
+
+##- Monitor
+# The script continues executing other tasks while the following command will execute after a while
+module load slurm-tools/v1.0
+(sleep 3h && slurm-gpu-atop-log-stats $SLURM_JOB_ID $CUDA_VISIBLE_DEVICES) &
+echo "Main program continues to run. Monitoring information will be exported after three hours."
+
+#- Main program execution
+
+##- Job step
+bash run.sh
+
+#- End
+slurm-gpu-atop-log-stats $SLURM_JOB_ID $CUDA_VISIBLE_DEVICES
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
+# This will overwrite any existing atop logs from previous runs.
+# WARNING: If your program times out or is terminated by scancel,
+#          the above script part might not execute correctly.