Commit 3f1e287d by root

add example for verilog tool call, might need tuning the prompt

parent c765c731
......@@ -11,13 +11,13 @@
### 配置
TIR的主要配置在`examples/tir/run_sandbox_fusion.sh``examples/tir/sandbox_fusion_tool_config.yaml`里面。
其中,由于是本地起的沙盒服务,沙盒IP取决于被分配到了哪个计算节点,因此我套了一层`examples/tir/sandbox_fusion_tool_config.yaml.template`,在执行脚本里面用
TIR的主要配置在`examples/tir/sandbox_fusion_python_config.yaml`里面。
其中,由于是本地起的沙盒服务,沙盒IP取决于被分配到了哪个计算节点,因此我套了一层`examples/tir/sandbox_fusion_python_config.yaml.template`,在执行脚本里面用
```bash
TOOL_CONFIG_PATH=$CURR_DIR/examples/tir/sandbox_fusion_tool_config.yaml
TOOL_CONFIG_PATH=$CURR_DIR/examples/tir/sandbox_fusion_python_config.yaml
envsubst < "$TOOL_CONFIG_PATH.template" > $TOOL_CONFIG_PATH
```
生成实际的`examples/tir/sandbox_fusion_tool_config.yaml`这个配置文件的`tool_schema`部分会给加到system prompt里面去。
生成实际的`examples/tir/$TOOL_CONFIG_PATH.yaml`。在parquet数据预处理的时候每条加上`{"code_interpreter": {"create_kwargs": {'dummy': None}}}`(具体见`examples/data_preprocess/convert_eurus_tir.py`)后,这个配置文件的`tool_schema`部分会给加到system prompt里面去。
具体执行tool的代码应该在`verl/tools/sandbox_fusion_tools.py``async def execute`那边。
......
tools:
- class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
config:
sandbox_fusion_url: "http://10.21.0.12:8181/run_code"
num_workers: 32
enable_global_rate_limit: true
rate_limit: 32
default_timeout: 30
default_language: "verilog"
tool_schema:
type: "function"
function:
name: "code_interpreter"
description: "A code execution tool."
parameters:
type: "object"
properties:
code:
type: "string"
description: "The verilog code to execute."
required: ["code"]
\ No newline at end of file
tools:
- class_name: "verl.tools.sandbox_fusion_tools.SandboxFusionTool"
config:
sandbox_fusion_url: "$SANDBOX_URL"
num_workers: 32
enable_global_rate_limit: true
rate_limit: 32
default_timeout: 30
default_language: "verilog"
tool_schema:
type: "function"
function:
name: "code_interpreter"
description: "A code execution tool."
parameters:
type: "object"
properties:
code:
type: "string"
description: "The verilog code to execute."
required: ["code"]
\ No newline at end of file
set -x
export VERL_LOGGING_LEVEL=INFO
python3 -X faulthandler -u -m verl.trainer.main_ppo \
reward_model.sandbox_fusion.url=$SANDBOX_URL \
reward_model.sandbox_fusion.max_concurrent=128 \
reward_model.reward_manager=prime \
algorithm.adv_estimator=grpo \
data.train_files=$CURR_DIR/data/codev/v1/16k_r1_filtered/train.parquet \
data.val_files=$CURR_DIR/data/codev/v1/16k_r1_filtered/test.parquet \
data.train_batch_size=64 \
data.max_prompt_length=2048 \
data.max_response_length=4096 \
data.return_raw_chat=True \
data.filter_overlong_prompts=True \
data.truncation='error' \
actor_rollout_ref.model.path=/nfs_global/models/Qwen2.5-Coder-7B-Instruct \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.model.use_liger=True \
actor_rollout_ref.actor.use_dynamic_bsz=True \
actor_rollout_ref.actor.ppo_mini_batch_size=64 \
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=12000 \
actor_rollout_ref.actor.fsdp_config.param_offload=False \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
actor_rollout_ref.actor.use_kl_loss=False \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.multi_turn.enable=True \
actor_rollout_ref.rollout.multi_turn.max_turns=3 \
actor_rollout_ref.rollout.multi_turn.tool_config_path=$TOOL_CFG_PATH \
actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
actor_rollout_ref.rollout.n=16 \
algorithm.use_kl_in_reward=False \
trainer.critic_warmup=0 \
trainer.logger=['console'] \
trainer.project_name='verl_example_sandbox_fusion' \
trainer.experiment_name='codev_sandbox_fusion' \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=20 \
trainer.test_freq=1 \
trainer.total_epochs=15 $@
# actor_rollout_ref.rollout.multi_turn.max_turns 后面改成 max_assistant_turns 了
\ No newline at end of file
......@@ -30,7 +30,7 @@ python3 -X faulthandler -u -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.name=sglang \
actor_rollout_ref.rollout.multi_turn.enable=True \
actor_rollout_ref.rollout.multi_turn.max_turns=3 \
actor_rollout_ref.rollout.multi_turn.tool_config_path=$CURR_DIR/examples/tir/sandbox_fusion_tool_config.yaml \
actor_rollout_ref.rollout.multi_turn.tool_config_path=$TOOL_CFG_PATH \
actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
actor_rollout_ref.rollout.n=16 \
algorithm.use_kl_in_reward=False \
......
......@@ -10,4 +10,7 @@
# python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_filter_error_rate_l_0.2_from_87k.jsonl --local_dir data/codev/v1/err_l0.2_16k_r1_filtered --train_size 16364 --test_size 300
# python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_filter_error_rate_l_0.2_from_87k.jsonl --local_dir data/codev/v1/err_l0.2_16k_r1_filtered_double_gt --gt double --train_size 16364 --test_size 300
# python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_qwen7b32b_filter_gt_r1_error_rate_e_0.5_from_87k.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_error_rate_e_0.5_7.4k --gt r1 --train_size 7204 --test_size 200
python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_87k_correct1234_filter_qwen7b32b_data.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_14k --gt r1 --train_size 14654 --test_size 300
\ No newline at end of file
# python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_87k_correct1234_filter_qwen7b32b_data.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_14k --gt r1 --train_size 14654 --test_size 300
# tencent cloud
python examples/data_preprocess/codev.py --tool --tokenizer_path /nfs_global/models/Qwen2.5-Coder-7B-Instruct/ --data_path /nfs_global/datasets/codev/codev_r1_rl_16k.jsonl --local_dir data/codev/v1/16k_r1_filtered --train_size 15691 --test_size 300
\ No newline at end of file
......@@ -87,6 +87,13 @@ def default_compute_score(data_source, solution_str, ground_truth, extra_info=No
from . import search_r1_like_qa_em
res = search_r1_like_qa_em.compute_score(solution_str, ground_truth)
elif data_source in ['codev']:
# if sandbox_fusion_url:
# from . import sandbox_fusion
# res = sandbox_fusion.compute_score(sandbox_fusion_url, concurrent_semaphore, solution_str, ground_truth, continuous=True)
# else:
from . import codev
res = codev.compute_score(solution_str, ground_truth)
else:
raise NotImplementedError(f"Reward function is not implemented for {data_source=}")
except Exception as e:
......
from verl.utils.reward_score.codev_eval_toolkit.verify import eda_tools
import json
import re
import os
from tqdm.contrib.concurrent import process_map
from multiprocessing import Process, Queue
import psutil
import hashlib
import random
import platform
# # 根据不同系统导入不同的文件锁模块
# if platform.system() == 'Windows':
# import msvcrt
# else:
# import fcntl
# # 假设的锁文件路径
# LOCK_FILE_PATH = '.lock'
# def create_lock_file():
# if not os.path.exists(LOCK_FILE_PATH):
# with open(LOCK_FILE_PATH, 'w') as f:
# pass
# def acquire_lock():
# if platform.system() == 'Windows':
# f = open(LOCK_FILE_PATH, 'r+')
# msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
# return f
# else:
# f = open(LOCK_FILE_PATH, 'r+')
# fcntl.flock(f.fileno(), fcntl.LOCK_EX)
# return f
# def release_lock(f):
# if platform.system() == 'Windows':
# msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
# else:
# fcntl.flock(f.fileno(), fcntl.LOCK_UN)
# f.close()
def verify_one_sample(gold_code, dut_code, uid=None):
uid = dut_code + str(random.randint(0,2147483647))
uid = hashlib.md5(uid.encode("utf-8")).hexdigest()
v = eda_tools(quiet=True)
# v = eda_tools(quiet=False)
if not gold_code or not dut_code:
return {"correct": False}
try:
gold_top = v.auto_top(gold_code)
gate_top = v.auto_top(dut_code)
except Exception as e:
# exception in verification, gold code or dut code have syntax problems
# print("Parse error:", e.args)
return {"correct": False, "parse_error": e.args}
gold_path, dut_path = f"./tmp/testcase/{uid}_gold.v", f"./tmp/testcase/{uid}_dut.v"
test_path = f"./tmp/work/{uid}"
try:
if not os.path.exists("./tmp/testcase"):
os.makedirs("./tmp/testcase", exist_ok=True)
if not os.path.exists("./tmp/work"):
os.makedirs("./tmp/work", exist_ok=True)
if not os.path.exists(test_path):
os.makedirs(test_path, exist_ok=True)
finally:
# release_lock(f)
pass
with open(gold_path, "w") as f:
f.write(gold_code)
with open(dut_path, "w") as f:
f.write(dut_code)
# 如果想生成testbench代码并运行,参考以下内容
result = None
try:
equiv = v.equiv_with_testbench(
gold_path,
dut_path,
gold_top,
gate_top,
test_path,
)
except Exception as e:
# print("Test error:", e.args)
result = {"correct": False, "test_error": e.args}
finally:
if os.path.exists(gold_path):
os.remove(gold_path)
if os.path.exists(dut_path):
os.remove(dut_path)
if os.path.exists(test_path):
os.system(f"rm -r {test_path}")
if result is None:
result = {"correct": equiv[0], "error_rate": equiv[1], "detail": equiv[2]}
return result
def kill_process_tree(pid):
parent = psutil.Process(pid)
children = parent.children(recursive=True) # 获取所有子进程
for child in children:
child.terminate() # 终止子进程
parent.terminate() # 终止父进程
def verify_one_sample_wrapper(args):
def target(queue):
result = verify_one_sample(*args)
queue.put(result)
queue = Queue()
process = Process(target=target, args=(queue,))
process.start()
process.join(timeout=30)
if process.is_alive():
# 如果超时,终止进程
kill_process_tree(process.pid)
process.join()
print("Function timed out!")
return {"correct": False, "timeout": True}
else:
# 返回结果
return queue.get()
def extract_verilog(verilog_code):
"""
从 Verilog 代码中提取 module 声明部分(module_head)。
"""
pattern = re.compile(r"```verilog\s*([\s\S]*?)\s*```")
matches = re.findall(pattern, verilog_code)
if matches:
return matches[-1] # 返回匹配的 module 声明
return None
if __name__ == "__main__":
for part in range(16):
name = f"codev_dataset_165k_o1_part{part}"
with open(f"data/evolve/{name}.jsonl", "r") as f:
data_gold = list(map(json.loads, f.read().strip().splitlines()))
data_gold = [extract_verilog(x["response"][0]["content"]) for x in data_gold]
with open(f"results/evolve/sample/{name}.jsonl", "r") as f:
data_dut = list(map(json.loads, f.read().strip().splitlines()))
problem_ids = [x["problem_id"] for x in data_dut]
data_dut = [extract_verilog(x["response"][0]["content"]) for x in data_dut]
print(len(data_gold), len(data_dut), len(problem_ids))
assert len(data_dut) % len(data_gold) == 0
n_sample = len(data_dut) // len(data_gold)
testcases = []
for i, dut in enumerate(data_dut):
gold = data_gold[i // n_sample]
testcases.append((gold, dut, i))
# testcases = testcases[:1000]
if not os.path.exists("./tmp/testcase"):
os.makedirs("./tmp/testcase")
if not os.path.exists("./tmp/work"):
os.makedirs("./tmp/work")
# cpu_num = multiprocessing.cpu_count()
cpu_num = 64
# chunksize = max(len(testcases) // (cpu_num * 5), 1)
chunksize = 1
results = process_map(verify_one_sample_wrapper, testcases, max_workers=cpu_num, chunksize=chunksize)
for i in range(len(results)):
results[i]["problem_id"] = problem_ids[i]
with open(f"results/evolve/eval/{name}.jsonl", "w") as f:
f.write("\n".join(map(json.dumps, results)) + "\n")
print(f"{name}.jsonl is processed!!!")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment