add example for verilog tool call, might need tuning the prompt

3f1e287d · root · c765c731 · 3f1e287d · 3f1e287d · 3f1e287d
Commit 3f1e287d authored Aug 14, 2025 by root
14 changed files
--- a/README.md
+++ b/README.md
@@ -11,13 +11,13 @@

 ### 配置

-TIR的主要配置在`examples/tir/run_sandbox_fusion.sh`和`examples/tir/sandbox_fusion_tool_config.yaml`里面。
-其中，由于是本地起的沙盒服务，沙盒IP取决于被分配到了哪个计算节点，因此我套了一层`examples/tir/sandbox_fusion_tool_config.yaml.template`，在执行脚本里面用
+TIR的主要配置在`examples/tir/sandbox_fusion_python_config.yaml`里面。
+其中，由于是本地起的沙盒服务，沙盒IP取决于被分配到了哪个计算节点，因此我套了一层`examples/tir/sandbox_fusion_python_config.yaml.template`，在执行脚本里面用
 ```bash
-TOOL_CONFIG_PATH=$CURR_DIR/examples/tir/sandbox_fusion_tool_config.yaml
+TOOL_CONFIG_PATH=$CURR_DIR/examples/tir/sandbox_fusion_python_config.yaml
 envsubst < "$TOOL_CONFIG_PATH.template" > $TOOL_CONFIG_PATH
 ```
-生成实际的`examples/tir/sandbox_fusion_tool_config.yaml`。这个配置文件的`tool_schema`部分会给加到system prompt里面去。
+生成实际的`examples/tir/$TOOL_CONFIG_PATH.yaml`。在parquet数据预处理的时候每条加上`{"code_interpreter": {"create_kwargs": {'dummy': None}}}`（具体见`examples/data_preprocess/convert_eurus_tir.py`）后，这个配置文件的`tool_schema`部分会给加到system prompt里面去。

 具体执行tool的代码应该在`verl/tools/sandbox_fusion_tools.py`的`async def execute`那边。


--- a/examples/data_preprocess/codev.py
+++ b/examples/data_preprocess/codev.py
--- a/examples/tir/run_tir.sh
+++ b/examples/tir/run_tir.sh
--- a/examples/tir/sandbox_fusion_tool_config.yaml
+++ b/examples/tir/sandbox_fusion_tool_config.yaml
--- a/examples/tir/sandbox_fusion_tool_config.yaml.template
+++ b/examples/tir/sandbox_fusion_tool_config.yaml.template
--- a/examples/tir/sandbox_fusion_verilog_config.yaml
+++ b/examples/tir/sandbox_fusion_verilog_config.yaml
+tools:
+  - class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
+    config: 
+      sandbox_fusion_url: "http://10.21.0.12:8181/run_code"
+      num_workers: 32
+      enable_global_rate_limit: true
+      rate_limit: 32
+      default_timeout: 30
+      default_language: "verilog"
+
+    tool_schema:
+      type: "function"
+      function:
+        name: "code_interpreter"
+        description: "A code execution tool."
+        parameters:
+          type: "object"
+          properties:
+            code:
+              type: "string"
+              description: "The verilog code to execute."
+          required: ["code"]
\ No newline at end of file
--- a/examples/tir/sandbox_fusion_verilog_config.yaml.template
+++ b/examples/tir/sandbox_fusion_verilog_config.yaml.template
+tools:
+  - class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
+    config: 
+      sandbox_fusion_url: "$SANDBOX_URL"
+      num_workers: 32
+      enable_global_rate_limit: true
+      rate_limit: 32
+      default_timeout: 30
+      default_language: "verilog"
+
+    tool_schema:
+      type: "function"
+      function:
+        name: "code_interpreter"
+        description: "A code execution tool."
+        parameters:
+          type: "object"
+          properties:
+            code:
+              type: "string"
+              description: "The verilog code to execute."
+          required: ["code"]
\ No newline at end of file
--- a/examples/tir/tencent/run_tir_codev.sh
+++ b/examples/tir/tencent/run_tir_codev.sh
+set -x
+export VERL_LOGGING_LEVEL=INFO
+
+python3 -X faulthandler -u -m verl.trainer.main_ppo \
+    reward_model.sandbox_fusion.url=$SANDBOX_URL \
+    reward_model.sandbox_fusion.max_concurrent=128 \
+    reward_model.reward_manager=prime \
+    algorithm.adv_estimator=grpo \
+    data.train_files=$CURR_DIR/data/codev/v1/16k_r1_filtered/train.parquet \
+    data.val_files=$CURR_DIR/data/codev/v1/16k_r1_filtered/test.parquet \
+    data.train_batch_size=64 \
+    data.max_prompt_length=2048 \
+    data.max_response_length=4096 \
+    data.return_raw_chat=True \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    actor_rollout_ref.model.path=/nfs_global/models/Qwen2.5-Coder-7B-Instruct \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.use_remove_padding=True \
+    actor_rollout_ref.model.use_liger=True \
+    actor_rollout_ref.actor.use_dynamic_bsz=True \
+    actor_rollout_ref.actor.ppo_mini_batch_size=64 \
+    actor_rollout_ref.actor.ppo_max_token_len_per_gpu=12000 \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.actor.use_kl_loss=False \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
+    actor_rollout_ref.rollout.name=sglang \
+    actor_rollout_ref.rollout.multi_turn.enable=True \
+    actor_rollout_ref.rollout.multi_turn.max_turns=3 \
+    actor_rollout_ref.rollout.multi_turn.tool_config_path=$TOOL_CFG_PATH \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+    actor_rollout_ref.rollout.n=16 \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console'] \
+    trainer.project_name='verl_example_sandbox_fusion' \
+    trainer.experiment_name='codev_sandbox_fusion' \
+    trainer.n_gpus_per_node=8 \
+    trainer.nnodes=1 \
+    trainer.save_freq=20 \
+    trainer.test_freq=1 \
+    trainer.total_epochs=15 $@
+
+
+# actor_rollout_ref.rollout.multi_turn.max_turns 后面改成 max_assistant_turns 了
\ No newline at end of file
--- a/examples/tir/run_tir_tencent.sh
+++ b/examples/tir/run_tir_tencent.sh
@@ -30,7 +30,7 @@ python3 -X faulthandler -u -m verl.trainer.main_ppo \
    actor_rollout_ref.rollout.name=sglang \
    actor_rollout_ref.rollout.multi_turn.enable=True \
    actor_rollout_ref.rollout.multi_turn.max_turns=3 \
-    actor_rollout_ref.rollout.multi_turn.tool_config_path=$CURR_DIR/examples/tir/sandbox_fusion_tool_config.yaml \
+    actor_rollout_ref.rollout.multi_turn.tool_config_path=$TOOL_CFG_PATH \
    actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
    actor_rollout_ref.rollout.n=16 \
    algorithm.use_kl_in_reward=False \

--- a/scripts/preprocess.sh
+++ b/scripts/preprocess.sh
@@ -10,4 +10,7 @@
 # python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_filter_error_rate_l_0.2_from_87k.jsonl --local_dir data/codev/v1/err_l0.2_16k_r1_filtered --train_size 16364 --test_size 300
 # python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_filter_error_rate_l_0.2_from_87k.jsonl --local_dir data/codev/v1/err_l0.2_16k_r1_filtered_double_gt --gt double --train_size 16364 --test_size 300
 # python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_qwen7b32b_filter_gt_r1_error_rate_e_0.5_from_87k.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_error_rate_e_0.5_7.4k --gt r1 --train_size 7204 --test_size 200
-python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_87k_correct1234_filter_qwen7b32b_data.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_14k --gt r1 --train_size 14654 --test_size 300
\ No newline at end of file
+# python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_87k_correct1234_filter_qwen7b32b_data.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_14k --gt r1 --train_size 14654 --test_size 300
+
+# tencent cloud
+python examples/data_preprocess/codev.py --tool --tokenizer_path /nfs_global/models/Qwen2.5-Coder-7B-Instruct/ --data_path /nfs_global/datasets/codev/codev_r1_rl_16k.jsonl --local_dir data/codev/v1/16k_r1_filtered --train_size 15691 --test_size 300
\ No newline at end of file
--- a/verl/utils/reward_score/__init__.py
+++ b/verl/utils/reward_score/__init__.py
@@ -87,6 +87,13 @@ def default_compute_score(data_source, solution_str, ground_truth, extra_info=No
            from . import search_r1_like_qa_em

            res = search_r1_like_qa_em.compute_score(solution_str, ground_truth)
+        elif data_source in ['codev']:
+            # if sandbox_fusion_url:
+            #     from . import sandbox_fusion
+            #     res = sandbox_fusion.compute_score(sandbox_fusion_url, concurrent_semaphore, solution_str, ground_truth, continuous=True)
+            # else:
+            from . import codev
+            res = codev.compute_score(solution_str, ground_truth)
        else:
            raise NotImplementedError(f"Reward function is not implemented for {data_source=}")
    except Exception as e:

--- a/verl/utils/reward_score/codev.py
+++ b/verl/utils/reward_score/codev.py
--- a/verl/utils/reward_score/codev_eval_toolkit/eval_codev.py
+++ b/verl/utils/reward_score/codev_eval_toolkit/eval_codev.py
+from verl.utils.reward_score.codev_eval_toolkit.verify import eda_tools
+import json
+import re
+import os
+from tqdm.contrib.concurrent import process_map
+from multiprocessing import Process, Queue
+import psutil
+import hashlib
+import random
+import platform
+
+# # 根据不同系统导入不同的文件锁模块
+# if platform.system() == 'Windows':
+#     import msvcrt
+# else:
+#     import fcntl
+
+# # 假设的锁文件路径
+# LOCK_FILE_PATH = '.lock'
+
+
+# def create_lock_file():
+#     if not os.path.exists(LOCK_FILE_PATH):
+#         with open(LOCK_FILE_PATH, 'w') as f:
+#             pass
+
+
+# def acquire_lock():
+#     if platform.system() == 'Windows':
+#         f = open(LOCK_FILE_PATH, 'r+')
+#         msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
+#         return f
+#     else:
+#         f = open(LOCK_FILE_PATH, 'r+')
+#         fcntl.flock(f.fileno(), fcntl.LOCK_EX)
+#         return f
+
+
+# def release_lock(f):
+#     if platform.system() == 'Windows':
+#         msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
+#     else:
+#         fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+#     f.close()
+
+
+
+def verify_one_sample(gold_code, dut_code, uid=None):
+    uid = dut_code + str(random.randint(0,2147483647))
+    uid = hashlib.md5(uid.encode("utf-8")).hexdigest()
+    v = eda_tools(quiet=True)
+    # v = eda_tools(quiet=False)
+
+    if not gold_code or not dut_code:
+        return {"correct": False}
+
+    try:
+        gold_top = v.auto_top(gold_code)
+        gate_top = v.auto_top(dut_code)
+    except Exception as e:
+        # exception in verification, gold code or dut code have syntax problems
+        # print("Parse error:", e.args)
+        return {"correct": False, "parse_error": e.args}
+
+    gold_path, dut_path = f"./tmp/testcase/{uid}_gold.v", f"./tmp/testcase/{uid}_dut.v"
+    test_path = f"./tmp/work/{uid}"
+    
+    try:
+        if not os.path.exists("./tmp/testcase"):
+            os.makedirs("./tmp/testcase", exist_ok=True)
+        if not os.path.exists("./tmp/work"):
+            os.makedirs("./tmp/work", exist_ok=True)
+        if not os.path.exists(test_path):
+            os.makedirs(test_path, exist_ok=True)
+    finally:
+        # release_lock(f)
+        pass
+    
+    with open(gold_path, "w") as f:
+        f.write(gold_code)
+    with open(dut_path, "w") as f:
+        f.write(dut_code)
+
+    # 如果想生成testbench代码并运行，参考以下内容
+    result = None
+    try:
+        equiv = v.equiv_with_testbench(
+            gold_path,
+            dut_path,
+            gold_top,
+            gate_top,
+            test_path,
+        )
+    except Exception as e:
+        # print("Test error:", e.args)
+        result = {"correct": False, "test_error": e.args}
+    finally:
+        if os.path.exists(gold_path):
+            os.remove(gold_path)
+        if os.path.exists(dut_path):
+            os.remove(dut_path)
+        if os.path.exists(test_path):
+            os.system(f"rm -r {test_path}")
+
+    if result is None:
+        result = {"correct": equiv[0], "error_rate": equiv[1], "detail": equiv[2]}
+    return result
+
+
+def kill_process_tree(pid):
+    parent = psutil.Process(pid)
+    children = parent.children(recursive=True)  # 获取所有子进程
+    for child in children:
+        child.terminate()  # 终止子进程
+    parent.terminate()  # 终止父进程
+
+
+def verify_one_sample_wrapper(args):
+    def target(queue):
+        result = verify_one_sample(*args)
+        queue.put(result)
+
+    queue = Queue()
+    process = Process(target=target, args=(queue,))
+    process.start()
+    process.join(timeout=30)
+
+    if process.is_alive():
+        # 如果超时，终止进程
+        kill_process_tree(process.pid)
+        process.join()
+        print("Function timed out!")
+        return {"correct": False, "timeout": True}
+    else:
+        # 返回结果
+        return queue.get()
+
+
+def extract_verilog(verilog_code):
+    """
+    从 Verilog 代码中提取 module 声明部分（module_head）。
+    """
+    pattern = re.compile(r"```verilog\s*([\s\S]*?)\s*```")
+    matches = re.findall(pattern, verilog_code)
+    if matches:
+        return matches[-1]  # 返回匹配的 module 声明
+    return None
+
+
+if __name__ == "__main__":
+
+    for part in range(16):
+        name = f"codev_dataset_165k_o1_part{part}"
+
+        with open(f"data/evolve/{name}.jsonl", "r") as f:
+            data_gold = list(map(json.loads, f.read().strip().splitlines()))
+            data_gold = [extract_verilog(x["response"][0]["content"]) for x in data_gold]
+        with open(f"results/evolve/sample/{name}.jsonl", "r") as f:
+            data_dut = list(map(json.loads, f.read().strip().splitlines()))
+            problem_ids = [x["problem_id"] for x in data_dut]
+            data_dut = [extract_verilog(x["response"][0]["content"]) for x in data_dut]
+
+        print(len(data_gold), len(data_dut), len(problem_ids))
+        assert len(data_dut) % len(data_gold) == 0
+        n_sample = len(data_dut) // len(data_gold)
+        testcases = []
+        for i, dut in enumerate(data_dut):
+            gold = data_gold[i // n_sample]
+            testcases.append((gold, dut, i))
+
+        # testcases = testcases[:1000]
+
+        if not os.path.exists("./tmp/testcase"):
+            os.makedirs("./tmp/testcase")
+        if not os.path.exists("./tmp/work"):
+            os.makedirs("./tmp/work")
+
+        # cpu_num = multiprocessing.cpu_count()
+        cpu_num = 64
+        # chunksize = max(len(testcases) // (cpu_num * 5), 1)
+        chunksize = 1
+        results = process_map(verify_one_sample_wrapper, testcases, max_workers=cpu_num, chunksize=chunksize)
+        for i in range(len(results)):
+            results[i]["problem_id"] = problem_ids[i]
+
+        with open(f"results/evolve/eval/{name}.jsonl", "w") as f:
+            f.write("\n".join(map(json.dumps, results)) + "\n")
+
+        print(f"{name}.jsonl is processed!!!")
--- a/verl/utils/reward_score/codev_eval_toolkit/verify.py
+++ b/verl/utils/reward_score/codev_eval_toolkit/verify.py