Commit 560f8208 by root

fix some bugs

parent d22477c0
......@@ -222,6 +222,12 @@ builtins.print = traced_print
'event': 'stdin pipe closed early while writing large input', 'logger': 'sandbox.runners.base', 'level': 'warning'
```
#### Prompt过滤长度没加tool导致后续prompt长度超了报错
verl在`verl/utils/dataset/rl_dataset.py``_read_files_and_tokenize`函数里面初筛prompt长度的时候没有把tool对应的prompt给算上,导致prompt长度估计短了,在后续运行的时候会因为长度超了报错。
#### 最后贴一个(应该是)正常的初始测试集准确率
**警告:这个结果可能被sympy超时影响导致acc偏低!!!**
......
......@@ -72,7 +72,8 @@ async def run_command_bare(command: str | List[str],
if p.stdin:
print('p.stdin is', p.stdin)
p.stdin.write(stdin.encode())
p.stdin.flush()
# p.stdin.flush()
await p.stdin.drain()
else:
logger.warning("Attempted to write to stdin, but stdin is closed.")
except Exception as e:
......
......@@ -96,14 +96,6 @@ if __name__ == '__main__':
question = mk_prompt_r1(question)
return question
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
raw_dataset = raw_dataset.shuffle(seed=42)
print(len(raw_dataset))
assert len(raw_dataset) >= TRAIN_SIZE + TEST_SIZE
train_dataset = raw_dataset.select(range(TRAIN_SIZE))
test_dataset = raw_dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))
def make_map_fn(split):
def process_fn(example, idx):
if "ground_truth" in example:
......@@ -134,8 +126,38 @@ if __name__ == '__main__':
return data
return process_fn
train_dataset = train_dataset.map(function=make_map_fn('train'), with_indices=True)
test_dataset = test_dataset.map(function=make_map_fn('test'), with_indices=True)
raw_dataset = raw_dataset.map(function=make_map_fn('train'), with_indices=True)
MAX_TOKEN_LEN = 2048
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
from omegaconf import OmegaConf
tools_config_file = 'examples/tir/sandbox_fusion_verilog_config.yaml'
tools = OmegaConf.load(tools_config_file)
tools = [OmegaConf.to_container(tools.tools[0].tool_schema, resolve=True)]
def filter_by_token_len(example):
question = make_question(example["prompt"])
token_ids = tokenizer.apply_chat_template(
question,
tools=tools,
add_generation_prompt=False,
tokenize=True,
truncation=False,
)
token_len = len(token_ids)
example["question_token_len"] = token_len
return token_len <= MAX_TOKEN_LEN - 20
raw_dataset = raw_dataset.filter(
function=filter_by_token_len,
desc="Filtering train dataset by token length" # 显示进度描述
)
raw_dataset = raw_dataset.shuffle(seed=42)
print(len(raw_dataset))
assert len(raw_dataset) >= TRAIN_SIZE + TEST_SIZE
train_dataset = raw_dataset.select(range(TRAIN_SIZE))
test_dataset = raw_dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))
local_dir = args.local_dir
hdfs_dir = args.hdfs_dir
......
......@@ -3,14 +3,15 @@ export VERL_LOGGING_LEVEL=INFO
# export VERL_LOGGING_LEVEL=DEBUG
# MODEL_PATH=/nfs_global/models/Qwen2.5-Coder-7B-Instruct
MODEL_PATH=/nfs_global/models/Qwen3-8B
DATA_PATH=$CURR_DIR/data/codev/v1/16k_r1_filtered
python3 -X faulthandler -u -m verl.trainer.main_ppo \
reward_model.sandbox_fusion.url=$SANDBOX_URL \
reward_model.sandbox_fusion.max_concurrent=128 \
reward_model.reward_manager=prime \
algorithm.adv_estimator=grpo \
data.train_files=$CURR_DIR/data/codev/v1/16k_r1_filtered/train.parquet \
data.val_files=$CURR_DIR/data/codev/v1/16k_r1_filtered/test.parquet \
data.train_files=$DATA_PATH/train.parquet \
data.val_files=$DATA_PATH/test.parquet \
data.train_batch_size=64 \
data.max_prompt_length=2048 \
data.max_response_length=8192 \
......@@ -44,6 +45,7 @@ python3 -X faulthandler -u -m verl.trainer.main_ppo \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=20 \
trainer.default_local_dir=$SAVE_DIR \
trainer.test_freq=1 \
trainer.total_epochs=15 $@
......
......@@ -41,6 +41,7 @@ python3 -X faulthandler -u -m verl.trainer.main_ppo \
trainer.n_gpus_per_node=8 \
trainer.nnodes=1 \
trainer.save_freq=20 \
trainer.default_local_dir=$SAVE_DIR \
trainer.test_freq=1 \
trainer.total_epochs=15 $@
......
......@@ -13,4 +13,5 @@
# python examples/data_preprocess/codev.py --data_path /nfs_global/S/lvhanqi/codev_data/sft_model_87k_correct1234_filter_qwen7b32b_data.jsonl --local_dir data/codev/v1/qwen7b32b_filter_gt_r1_14k --gt r1 --train_size 14654 --test_size 300
# tencent cloud
python examples/data_preprocess/codev.py --tool --tokenizer_path /nfs_global/models/Qwen2.5-Coder-7B-Instruct/ --data_path /nfs_global/datasets/codev/codev_r1_rl_16k.jsonl --local_dir data/codev/v1/16k_r1_filtered --train_size 15691 --test_size 300
\ No newline at end of file
python examples/data_preprocess/codev.py --tool --tokenizer_path /nfs_global/models/Qwen2.5-Coder-7B-Instruct/ --data_path /nfs_global/datasets/codev/codev_r1_rl_16k.jsonl --local_dir data/codev/v1/16k_r1_filtered --train_size 15685 --test_size 300
# python examples/data_preprocess/codev.py --tool --tokenizer_path /nfs_global/models/Qwen2.5-Coder-7B-Instruct/ --data_path /nfs_global/datasets/codev/codev_r1_rl_16k.jsonl --local_dir data/codev/v1/test --train_size 139 --test_size 11
\ No newline at end of file
......@@ -159,7 +159,7 @@ class SandboxFusionTool(BaseTool):
code = str(code)
result = await self.execution_pool.execute.remote(self.execute_code, instance_id, code, timeout, language)
print(f'In sandbox code execution, code is {code}, timeout is {timeout}, language is {language}, result is {result}, parameter keys are {parameters.keys()}.')
# print(f'In sandbox code execution, code is {code}, timeout is {timeout}, language is {language}, result is {result}, parameter keys are {parameters.keys()}.')
return result, result, result.strip()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment