Commit 2c7b8a6b by Your Name

fix: a str bug & num seq param

parent ad1b62bf
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -218,7 +218,7 @@ class CoTEnv(BaseEnv):
processed_act = self.post_process_act(texts[i])
if (
len(processed_act) > 0
and processed_act not in text_list
# and processed_act not in text_list
# only stop is valid, otherwise the output action is truncated actually
and result.finish_reason[i] == "stop"
):
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -21,7 +21,7 @@
#SBATCH --gres=gpu:8 # Request M GPU per node
#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
#SBATCH --qos=gpu-normal # Request QOS Type
#SBATCH --constraint="L40S"
#SBATCH --constraint="L40"
###
### The system will alloc 8 or 16 cores per gpu by default.
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -71,6 +71,8 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
pattern = '<\|im_start\|>user\s*\n\s*(.*?)\s*<\|im_end\|>'
match = re.search(pattern, input_str, re.DOTALL)
if match is None:
print(input_str)
assert match is not None, f"No match found for pattern: {pattern}"
question = match.group(1)
solution = question_item_map[question]["solution"]
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -78,7 +78,8 @@ if __name__ == "__main__":
else:
# assume qwen
prm_step_tag = "\n\n\n\n\n "
prm_format_str = "{question} {answer}"
# prm_format_str = "{question} {answer}"
prm_format_str = "<|im_start|>user\n{question}<|im_end|> {answer}"
if "qwen" in config.LM.lower():
lm_step_tag = "\n\n"
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -430,14 +430,22 @@ class SearchTree:
self._expand_leaf_node(root, simulate_env, reward_model_fn)
self.root = root
end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())]
simulate_env_copy = simulate_env.copy()
print(simulate_env_copy.config["max_actions"])
simulate_env_copy.config["max_actions"] = int(simulate_env_copy.config["max_actions"] / beam_size)
print(simulate_env_copy.config["max_actions"])
# end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env.copy())]
end_nodes, top_k_nodes = [], [(-root._initial_value, root, simulate_env_copy)]
k = beam_size
print("k: ", k)
for _ in range(max_step + 1):
cur_nodes_to_search = top_k_nodes
top_k_nodes = []
for cur_neg_v, cur_node, cur_env in cur_nodes_to_search:
print("cur_node.children_num: ", len(cur_node.children))
if cur_node.terminated:
print("signal for k-1")
end_nodes.append((cur_neg_v, cur_node, cur_env))
k -= 1
elif k > 0:
......@@ -454,6 +462,7 @@ class SearchTree:
key=lambda x: x[2],
reverse=True,
)[:k]
print("top_k_children_num: ", len(top_k_children))
for c_act, c_node, c_value in top_k_children:
new_env = cur_env.copy()
heapq.heappush(top_k_nodes, (-c_value, c_node, new_env))
......@@ -679,7 +688,7 @@ class SearchTree:
prms = reward_fn(
[
(
f'<|im_start|>user\n{simulate_env.question}<|im_end|>',
simulate_env.question,
simulate_env.answer + x["action"],
)
for x in simulate_env.legal_actions
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -26,7 +26,7 @@ python reason/evaluation/evaluate.py \
--task_name MATH \
--temperature 0.7 \
--max_new_tokens 2048 \
--num_sequence 1 \
--num_sequence 2 \
--tree_max_width 4 \
--tree_max_depth 50 \
--save_dir $SAVE_DIR \
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment