Merge branch 'swx' into 'master'

Swx See merge request !1

Merge branch 'swx' into 'master'
Swx See merge request !1
00cffb98 · Shi wenxuan · ca6feac2 · 2c7b8a6b · 00cffb98 · 00cffb98
Commit 00cffb98 authored Mar 04, 2025 by Shi wenxuan
184 changed files
--- a/.gitignore
+++ b/.gitignore
--- a/README.md
+++ b/README.md
--- a/analysis/README.md
+++ b/analysis/README.md
+### beam size关键路径
+prm/openr/reason/evaluation/evaluate.py line 201
+prm/openr/reason/evaluation/methods.py line 122
+prm/openr/reason/guided_search/tree.py line 434
+prm/openr/reason/guided_search/tree.py line 449
+prm/openr/reason/guided_search/tree.py line 461
\ No newline at end of file
--- a/openr/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/openr/.github/ISSUE_TEMPLATE/bug-report.yml
--- a/openr/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/openr/.github/ISSUE_TEMPLATE/feature-request.yml
--- a/openr/.gitignore
+++ b/openr/.gitignore
--- a/openr/CONTRIBUTING.md
+++ b/openr/CONTRIBUTING.md
--- a/openr/LICENSE
+++ b/openr/LICENSE
--- a/openr/README.md
+++ b/openr/README.md
--- a/openr/README_zh.md
+++ b/openr/README_zh.md
--- a/openr/benchmark/plots.md
+++ b/openr/benchmark/plots.md
--- a/openr/benchmark/tables.md
+++ b/openr/benchmark/tables.md
--- a/openr/config/__init__.py
+++ b/openr/config/__init__.py
--- a/openr/config/config_utils.py
+++ b/openr/config/config_utils.py
--- a/openr/data/README.md
+++ b/openr/data/README.md
--- a/openr/data/config.yaml
+++ b/openr/data/config.yaml
--- a/openr/data/extracted_problems_and_answers.json
+++ b/openr/data/extracted_problems_and_answers.json
--- a/openr/data/gen_data.py
+++ b/openr/data/gen_data.py
--- a/openr/data/model_utils.py
+++ b/openr/data/model_utils.py
--- a/openr/data/module.py
+++ b/openr/data/module.py
--- a/openr/data/omegaPRM_v2/llm_utils.py
+++ b/openr/data/omegaPRM_v2/llm_utils.py
--- a/openr/data/omegaPRM_v2/omegaprm.py
+++ b/openr/data/omegaPRM_v2/omegaprm.py
--- a/openr/data/omegaPRM_v2/process_json.py
+++ b/openr/data/omegaPRM_v2/process_json.py
--- a/openr/data/omegaPRM_v2/readme.md
+++ b/openr/data/omegaPRM_v2/readme.md
--- a/openr/data/omegaPRM_v2/run_omegaprm.py
+++ b/openr/data/omegaPRM_v2/run_omegaprm.py
--- a/openr/data/omegaPRM_v2/run_omegaprm_multi_gpu.sh
+++ b/openr/data/omegaPRM_v2/run_omegaprm_multi_gpu.sh
--- a/openr/distributed/utils.py
+++ b/openr/distributed/utils.py
--- a/openr/envs/MATH/__init__.py
+++ b/openr/envs/MATH/__init__.py
--- a/openr/envs/MATH/data.py
+++ b/openr/envs/MATH/data.py
--- a/openr/envs/MATH/dataset/test500.jsonl
+++ b/openr/envs/MATH/dataset/test500.jsonl
--- a/openr/envs/MATH/dataset/train.jsonl
+++ b/openr/envs/MATH/dataset/train.jsonl
--- a/openr/envs/MATH/env.py
+++ b/openr/envs/MATH/env.py
--- a/openr/envs/MATH/grader.py
+++ b/openr/envs/MATH/grader.py
--- a/openr/envs/MATH/parse_utils_qwen.py
+++ b/openr/envs/MATH/parse_utils_qwen.py
--- a/openr/envs/MATH/prompt.py
+++ b/openr/envs/MATH/prompt.py
--- a/openr/envs/MATH/verify_utils.py
+++ b/openr/envs/MATH/verify_utils.py
--- a/openr/envs/__init__.py
+++ b/openr/envs/__init__.py
--- a/openr/envs/base_env.py
+++ b/openr/envs/base_env.py
@@ -218,7 +218,7 @@ class CoTEnv(BaseEnv):
            processed_act = self.post_process_act(texts[i])
            if (
                len(processed_act) > 0
-                and processed_act not in text_list
+                # and processed_act not in text_list
                # only stop is valid, otherwise the output action is truncated actually
                and result.finish_reason[i] == "stop" 
            ):

--- a/openr/envs/rstar/README.md
+++ b/openr/envs/rstar/README.md
--- a/openr/envs/rstar/__init__.py
+++ b/openr/envs/rstar/__init__.py
--- a/openr/envs/rstar/data.py
+++ b/openr/envs/rstar/data.py
--- a/openr/envs/rstar/eval_src/Evaluator.py
+++ b/openr/envs/rstar/eval_src/Evaluator.py
--- a/openr/envs/rstar/eval_src/do_eval.py
+++ b/openr/envs/rstar/eval_src/do_eval.py
--- a/openr/envs/rstar/eval_src/toolkit_for_MATH/latex_answer_check.py
+++ b/openr/envs/rstar/eval_src/toolkit_for_MATH/latex_answer_check.py
--- a/openr/envs/rstar/eval_src/toolkit_for_MATH/metamath_utils.py
+++ b/openr/envs/rstar/eval_src/toolkit_for_MATH/metamath_utils.py
--- a/openr/envs/rstar/eval_src/toolkit_for_MATH/parsing_lib.py
+++ b/openr/envs/rstar/eval_src/toolkit_for_MATH/parsing_lib.py
--- a/openr/envs/rstar/eval_src/toolkit_for_MATH/simple_answer_check.py
+++ b/openr/envs/rstar/eval_src/toolkit_for_MATH/simple_answer_check.py
--- a/openr/envs/rstar/prompts/MATH/decompose/decompose_prompt.txt
+++ b/openr/envs/rstar/prompts/MATH/decompose/decompose_prompt.txt
--- a/openr/envs/rstar/prompts/MATH/decompose/decompose_prompt_rephrased.txt
+++ b/openr/envs/rstar/prompts/MATH/decompose/decompose_prompt_rephrased.txt
--- a/openr/envs/rstar/prompts/MATH/decompose/decompose_template.json
+++ b/openr/envs/rstar/prompts/MATH/decompose/decompose_template.json
--- a/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_config--xijie.json
+++ b/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_config--xijie.json
--- a/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_config.json
+++ b/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_config.json
--- a/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt--xijie.txt
+++ b/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt--xijie.txt
--- a/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt.txt
+++ b/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt.txt
--- a/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt_old.txt
+++ b/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt_old.txt
--- a/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt_rephrased.txt
+++ b/openr/envs/rstar/prompts/MATH/fewshot_cot/fewshot_cot_prompt_rephrased.txt
--- a/openr/envs/rstar/prompts/MATH/fewshot_ost/fewshot_ost_config.json
+++ b/openr/envs/rstar/prompts/MATH/fewshot_ost/fewshot_ost_config.json
--- a/openr/envs/rstar/prompts/MATH/fewshot_ost/fewshot_ost_prompt.txt
+++ b/openr/envs/rstar/prompts/MATH/fewshot_ost/fewshot_ost_prompt.txt
--- a/openr/envs/rstar/prompts/MATH/rephrasing_prompt_template.txt
+++ b/openr/envs/rstar/prompts/MATH/rephrasing_prompt_template.txt
--- a/openr/envs/rstar/rstar_env.py
+++ b/openr/envs/rstar/rstar_env.py
--- a/openr/envs/rstar/rstar_utils.py
+++ b/openr/envs/rstar/rstar_utils.py
--- a/openr/envs/tests/test_math.py
+++ b/openr/envs/tests/test_math.py
--- a/openr/figure/MATH_subsampled.png
+++ b/openr/figure/MATH_subsampled.png
--- a/openr/figure/QA/QA1.png
+++ b/openr/figure/QA/QA1.png
--- a/openr/figure/QA/QA2.png
+++ b/openr/figure/QA/QA2.png
--- a/openr/figure/QA/QA3.png
+++ b/openr/figure/QA/QA3.png
--- a/openr/figure/QA/QA4.png
+++ b/openr/figure/QA/QA4.png
--- a/openr/figure/QA/QA5.png
+++ b/openr/figure/QA/QA5.png
--- a/openr/figure/QA/QA6.png
+++ b/openr/figure/QA/QA6.png
--- a/openr/figure/QA/QA7.png
+++ b/openr/figure/QA/QA7.png
--- a/openr/figure/compare_prm_by_boN.png
+++ b/openr/figure/compare_prm_by_boN.png
--- a/openr/figure/logo.png
+++ b/openr/figure/logo.png
--- a/openr/figure/logo_text.png
+++ b/openr/figure/logo_text.png
--- a/openr/figure/openr_logo.png
+++ b/openr/figure/openr_logo.png
--- a/openr/figure/wechat_qrcode.jpg
+++ b/openr/figure/wechat_qrcode.jpg
--- a/openr/gen_rm/fine_tuning.py
+++ b/openr/gen_rm/fine_tuning.py
--- a/openr/gen_rm/organize_dataset.py
+++ b/openr/gen_rm/organize_dataset.py
--- a/openr/infer.sh
+++ b/openr/infer.sh
 #!/bin/bash
+if [ ! -d "ret_one" ]; then
+    mkdir -p "ret_one"
+fi 

-#- Job parameters
-
-# (TODO)
-# Please modify job name
-
-#SBATCH -J inference            # The job name
-#SBATCH -o inference.out        # Write the standard output to file named 'ret-<job_number>.out'
-#SBATCH -e inference.err        # Write the standard error to file named 'ret-<job_number>.err'
-
-
-#- Resources
-
-# (TODO)
-# Please modify your requirements
-
-#SBATCH -p r8nv-gpu-hw                   # Submit to 'nv-gpu' Partitiion
-#SBATCH -t 1-06:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
-#SBATCH --nodes=1                    # Request N nodes
-#SBATCH --gres=gpu:8                 # Request M GPU per node
-#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
-#SBATCH --qos=gpu-normal           # Request QOS Type
-#SBATCH --constraint="L40S"
-
-###
-### The system will alloc 8 or 16 cores per gpu by default.
-### If you need more or less, use following:
-### #SBATCH --cpus-per-task=K            # Request K cores
-###
-### 
-### Without specifying the constraint, any available nodes that meet the requirement will be allocated
-### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
-###
-### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
-### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
-###
-
-#- Log information
-
-echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
-echo "Job run at:"
-echo "$(hostnamectl)"
-# echo "$(df -h | grep -v tmpfs)"
-
-#- Load environments
-#- Load environments
-module unload cuda-cudnn
-source ~/.bashrc
-module list                       # list modules loaded
-conda activate open_reasoner
-
-echo $(module list)              # list modules loaded
-echo $(which gcc)
-echo $(which python)
-echo $(which python3)
-
-cluster-quota                    # nas quota
-
-# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
-nvidia-smi
-
-#- Warning! Please not change your CUDA_VISIBLE_DEVICES
-#- in `.bashrc`, `env.sh`, or your job script
-echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
-#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
-
-# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
-
-#- Important setting!!!
-##  otherwise it will cause an error of insufficient RDMA resources:
-ulimit -l unlimited
-##  otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
-ulimit -v unlimited
-ulimit -n 65535
-ulimit -u 4125556
-
-#- Job step
-# sleep 30h
-cd /nfs_global/S/zhangxiaoyun/prm/openr
-export PYTHONPATH=$(pwd)
-
-bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
-sleep 100s
-bash scripts/eval/beam_search.sh --acc 1.0 --policy_model_name Qwen2.5-Math-7B-Instruct
-
-#- End
-echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
+sbatch --job-name=test -o "ret_one/%j.out" -e "ret_one/%j.err" infer.slurm
\ No newline at end of file
--- a/openr/infer.slurm
+++ b/openr/infer.slurm
+#!/bin/bash
+
+#- Job parameters
+
+# (TODO)
+# Please modify job name
+
+#SBATCH -J inference            # The job name
+#SBATCH -o inference.out        # Write the standard output to file named 'ret-<job_number>.out'
+#SBATCH -e inference.err        # Write the standard error to file named 'ret-<job_number>.err'
+
+
+#- Resources
+
+# (TODO)
+# Please modify your requirements
+
+#SBATCH -p r8nv-gpu-hw                   # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 1-06:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:8                 # Request M GPU per node
+#SBATCH --gres-flags=enforce-binding # CPU-GPU Affinity
+#SBATCH --qos=gpu-normal           # Request QOS Type
+#SBATCH --constraint="L40"
+
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=gpu-v00           # Request a specific list of hosts 
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+
+#- Log information
+
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+# echo "$(df -h | grep -v tmpfs)"
+
+#- Load environments
+#- Load environments
+module unload cuda-cudnn
+source ~/.bashrc
+module list                       # list modules loaded
+conda activate open_reasoner
+
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+
+cluster-quota                    # nas quota
+
+# nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+nvidia-smi
+
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+
+# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
+
+#- Important setting!!!
+##  otherwise it will cause an error of insufficient RDMA resources:
+ulimit -l unlimited
+##  otherwise it will result in an insufficient virtual memory size error, especially when loading LLM:
+ulimit -v unlimited
+ulimit -n 65535
+ulimit -u 20000
+
+#- Job step
+# sleep 30h
+cd /nfs_global/S/shiwenxuan/prm/openr
+export PYTHONPATH=$(pwd)
+
+bash reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+sleep 100s
+bash scripts/eval/beam_search.sh --acc 0.3 --policy_model_name Qwen2.5-Math-1.5B-Instruct
+
+# sleep 6h
+
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/openr/preprocess/.mypy.ini
+++ b/openr/preprocess/.mypy.ini
--- a/openr/preprocess/.pytest.ini
+++ b/openr/preprocess/.pytest.ini
--- a/openr/preprocess/README.org
+++ b/openr/preprocess/README.org
--- a/openr/preprocess/cli.py
+++ b/openr/preprocess/cli.py
--- a/openr/preprocess/src/__init__.py
+++ b/openr/preprocess/src/__init__.py
--- a/openr/preprocess/src/data_types/__init__.py
+++ b/openr/preprocess/src/data_types/__init__.py
--- a/openr/preprocess/src/data_types/base.py
+++ b/openr/preprocess/src/data_types/base.py
--- a/openr/preprocess/src/data_types/converted.py
+++ b/openr/preprocess/src/data_types/converted.py
--- a/openr/preprocess/src/data_types/math_aps.py
+++ b/openr/preprocess/src/data_types/math_aps.py
--- a/openr/preprocess/src/data_types/math_shepherd.py
+++ b/openr/preprocess/src/data_types/math_shepherd.py
--- a/openr/preprocess/src/data_types/prm800k.py
+++ b/openr/preprocess/src/data_types/prm800k.py
--- a/openr/preprocess/src/data_types/utils.py
+++ b/openr/preprocess/src/data_types/utils.py
--- a/openr/preprocess/src/preprocessors/__init__.py
+++ b/openr/preprocess/src/preprocessors/__init__.py
--- a/openr/preprocess/src/preprocessors/base.py
+++ b/openr/preprocess/src/preprocessors/base.py
--- a/openr/preprocess/src/preprocessors/math_aps.py
+++ b/openr/preprocess/src/preprocessors/math_aps.py
--- a/openr/preprocess/src/preprocessors/math_shepherd.py
+++ b/openr/preprocess/src/preprocessors/math_shepherd.py
--- a/openr/preprocess/src/preprocessors/prm800k.py
+++ b/openr/preprocess/src/preprocessors/prm800k.py
--- a/openr/preprocess/src/preprocessors/utils.py
+++ b/openr/preprocess/src/preprocessors/utils.py
--- a/openr/preprocess/tests/__init__.py
+++ b/openr/preprocess/tests/__init__.py
--- a/openr/preprocess/tests/samples_math-aps-tree.jsonl
+++ b/openr/preprocess/tests/samples_math-aps-tree.jsonl
--- a/openr/preprocess/tests/samples_math-aps.jsonl
+++ b/openr/preprocess/tests/samples_math-aps.jsonl
--- a/openr/preprocess/tests/samples_math-shepherd.jsonl
+++ b/openr/preprocess/tests/samples_math-shepherd.jsonl
--- a/openr/preprocess/tests/samples_prm800k.jsonl
+++ b/openr/preprocess/tests/samples_prm800k.jsonl
--- a/openr/preprocess/tests/test_data_types.py
+++ b/openr/preprocess/tests/test_data_types.py
--- a/openr/preprocess/tests/test_preprocessors.py
+++ b/openr/preprocess/tests/test_preprocessors.py
--- a/openr/prm/README.md
+++ b/openr/prm/README.md
--- a/openr/prm/code/evaluate.py
+++ b/openr/prm/code/evaluate.py
--- a/openr/prm/code/evaluate_qwen.py
+++ b/openr/prm/code/evaluate_qwen.py
--- a/openr/prm/code/finetune_llama.py
+++ b/openr/prm/code/finetune_llama.py
--- a/openr/prm/code/finetune_math_shep.py
+++ b/openr/prm/code/finetune_math_shep.py
--- a/openr/prm/code/finetune_qwen.py
+++ b/openr/prm/code/finetune_qwen.py
--- a/openr/prm/code/finetune_qwen_single_gpu.py
+++ b/openr/prm/code/finetune_qwen_single_gpu.py
--- a/openr/prm/code/test.json
+++ b/openr/prm/code/test.json
--- a/openr/prm/code/test_qwen.py
+++ b/openr/prm/code/test_qwen.py
--- a/openr/prm/infer_fns.py
+++ b/openr/prm/infer_fns.py
--- a/openr/prm/offline_eval.py
+++ b/openr/prm/offline_eval.py
--- a/openr/prm/record.jsonl
+++ b/openr/prm/record.jsonl
--- a/openr/reason/README.md
+++ b/openr/reason/README.md
--- a/openr/reason/controller.log
+++ b/openr/reason/controller.log
--- a/openr/reason/evaluation/evaluate.py
+++ b/openr/reason/evaluation/evaluate.py
--- a/openr/reason/evaluation/evaluator.py
+++ b/openr/reason/evaluation/evaluator.py
--- a/openr/reason/evaluation/methods.py
+++ b/openr/reason/evaluation/methods.py
--- a/openr/reason/evaluation/utils.py
+++ b/openr/reason/evaluation/utils.py
--- a/openr/reason/guided_search/rstar.py
+++ b/openr/reason/guided_search/rstar.py
--- a/openr/reason/guided_search/tree.py
+++ b/openr/reason/guided_search/tree.py
--- a/openr/reason/inference/lm_call.py
+++ b/openr/reason/inference/lm_call.py
--- a/openr/reason/inference/rm_call.py
+++ b/openr/reason/inference/rm_call.py
--- a/openr/reason/inference/text_generation.py
+++ b/openr/reason/inference/text_generation.py
--- a/openr/reason/inference/value.py
+++ b/openr/reason/inference/value.py
--- a/openr/reason/llm_service/README.md
+++ b/openr/reason/llm_service/README.md
--- a/openr/reason/llm_service/create_service_math_shepherd.sh
+++ b/openr/reason/llm_service/create_service_math_shepherd.sh
--- a/openr/reason/llm_service/create_service_qwen2.5_math_hf.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_hf.sh
--- a/openr/reason/llm_service/create_service_qwen2.5_math_vllm.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_vllm.sh
--- a/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm.sh
--- a/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
+++ b/openr/reason/llm_service/create_service_qwen2.5_math_vllm_gold_prm_speed.sh
--- a/openr/reason/llm_service/workers/base_model_worker.py
+++ b/openr/reason/llm_service/workers/base_model_worker.py
--- a/openr/reason/llm_service/workers/gold_reward_model_worker.py
+++ b/openr/reason/llm_service/workers/gold_reward_model_worker.py
--- a/openr/reason/llm_service/workers/inference.py
+++ b/openr/reason/llm_service/workers/inference.py
--- a/openr/reason/llm_service/workers/model_worker.py
+++ b/openr/reason/llm_service/workers/model_worker.py
--- a/openr/reason/llm_service/workers/reward_model_worker.py
+++ b/openr/reason/llm_service/workers/reward_model_worker.py
--- a/openr/reason/llm_service/workers/test.ipynb
+++ b/openr/reason/llm_service/workers/test.ipynb
--- a/openr/reason/llm_service/workers/vllm_worker.py
+++ b/openr/reason/llm_service/workers/vllm_worker.py
--- a/openr/reason/reranking/vote_utils.py
+++ b/openr/reason/reranking/vote_utils.py
--- a/openr/reason/test.py
+++ b/openr/reason/test.py
--- a/openr/reports/OpenR-Wang.pdf
+++ b/openr/reports/OpenR-Wang.pdf
--- a/openr/reports/Tutorial-LLM-Reasoning-Wang.pdf
+++ b/openr/reports/Tutorial-LLM-Reasoning-Wang.pdf
--- a/openr/reports/tutorial.pdf
+++ b/openr/reports/tutorial.pdf
--- a/openr/requirements.txt
+++ b/openr/requirements.txt
--- a/openr/scripts/eval/beam_search.sh
+++ b/openr/scripts/eval/beam_search.sh
--- a/openr/scripts/eval/cot_greedy.sh
+++ b/openr/scripts/eval/cot_greedy.sh
--- a/openr/scripts/eval/cot_rerank.sh
+++ b/openr/scripts/eval/cot_rerank.sh
--- a/openr/scripts/eval/rstar_mcts.sh
+++ b/openr/scripts/eval/rstar_mcts.sh
--- a/openr/scripts/eval/vanila_mcts.sh
+++ b/openr/scripts/eval/vanila_mcts.sh
--- a/openr/train/README.md
+++ b/openr/train/README.md
--- a/openr/train/mat/__init__.py
+++ b/openr/train/mat/__init__.py
--- a/openr/train/mat/agents/qwen_lora_agent.py
+++ b/openr/train/mat/agents/qwen_lora_agent.py
--- a/openr/train/mat/config.py
+++ b/openr/train/mat/config.py
--- a/openr/train/mat/envs/__init__.py
+++ b/openr/train/mat/envs/__init__.py
--- a/openr/train/mat/envs/math/__init__.py
+++ b/openr/train/mat/envs/math/__init__.py
--- a/openr/train/mat/envs/math/data/__init__.py
+++ b/openr/train/mat/envs/math/data/__init__.py
--- a/openr/train/mat/envs/math/data/math_500.jsonl
+++ b/openr/train/mat/envs/math/data/math_500.jsonl
--- a/openr/train/mat/envs/math/data/merged_precalculus_test.json
+++ b/openr/train/mat/envs/math/data/merged_precalculus_test.json
--- a/openr/train/mat/envs/math/data/merged_precalculus_train.json
+++ b/openr/train/mat/envs/math/data/merged_precalculus_train.json
--- a/openr/train/mat/envs/math/math_env.py
+++ b/openr/train/mat/envs/math/math_env.py
--- a/openr/train/mat/envs/math/math_env_wrappers.py
+++ b/openr/train/mat/envs/math/math_env_wrappers.py
--- a/openr/train/mat/envs/math/prompts.py
+++ b/openr/train/mat/envs/math/prompts.py
--- a/openr/train/mat/models/__init__.py
+++ b/openr/train/mat/models/__init__.py
--- a/openr/train/mat/models/critic.py
+++ b/openr/train/mat/models/critic.py
--- a/openr/train/mat/models/ms_prm.py
+++ b/openr/train/mat/models/ms_prm.py
--- a/openr/train/mat/models/qwen_prm.py
+++ b/openr/train/mat/models/qwen_prm.py
--- a/openr/train/mat/runner/shared/math_runner.py
+++ b/openr/train/mat/runner/shared/math_runner.py
--- a/openr/train/mat/scripts/__init__.py
+++ b/openr/train/mat/scripts/__init__.py
--- a/openr/train/mat/scripts/test_llm.sh
+++ b/openr/train/mat/scripts/test_llm.sh
--- a/openr/train/mat/scripts/test_math.py
+++ b/openr/train/mat/scripts/test_math.py
--- a/openr/train/mat/scripts/train_llm.sh
+++ b/openr/train/mat/scripts/train_llm.sh
--- a/openr/train/mat/scripts/train_math.py
+++ b/openr/train/mat/scripts/train_math.py
--- a/openr/train/mat/trainers/llm_trainer_appo.py
+++ b/openr/train/mat/trainers/llm_trainer_appo.py
--- a/openr/train/mat/trainers/llm_trainer_grpo.py
+++ b/openr/train/mat/trainers/llm_trainer_grpo.py
--- a/openr/train/mat/trainers/llm_trainer_tppo.py
+++ b/openr/train/mat/trainers/llm_trainer_tppo.py
--- a/openr/train/mat/utils/__init__.py
+++ b/openr/train/mat/utils/__init__.py
--- a/openr/train/mat/utils/language_buffer.py
+++ b/openr/train/mat/utils/language_buffer.py
--- a/openr/train/mat/utils/util.py
+++ b/openr/train/mat/utils/util.py
--- a/result.md
+++ b/result.md
--- a/results/README.md
+++ b/results/README.md
--- a/results/image.png
+++ b/results/image.png