before modify vllm

c4d903e9 · wyt2000 · 2c4ba911 · c4d903e9 · c4d903e9 · c4d903e9
Commit c4d903e9 authored Sep 09, 2024 by wyt2000
15 changed files
--- a/.gitignore
+++ b/.gitignore
 .vscode/
 __pycache__/
 logs/
+ret_one/
--- a/configs/RMaxTS.py
+++ b/configs/RMaxTS.py
@@ -14,7 +14,7 @@ lean_timeout = 300
 # model
 batch_size = 512
-model_path = 'deepseek-ai/DeepSeek-Prover-V1.5-RL'
+model_path = '/lustre/S/huangdi/open_for_out/models/DeepSeek-Prover-V1.5-Base'
 model_args = AttrDict(
    mode='cot', # `cot` or `non-cot`
    temperature=1,

--- a/configs/sampling.py
+++ b/configs/sampling.py
@@ -14,7 +14,7 @@ lean_timeout = 300
 # model
 batch_size = 32
-model_path = 'deepseek-ai/DeepSeek-Prover-V1.5-RL'
+model_path = '/lustre/S/huangdi/open_for_out/models/DeepSeek-Prover-V1.5-Base'
 model_args = AttrDict(
    mode='cot',  # `cot` or `non-cot`
    temperature=1,

--- a/configs/sampling_few_shot.py
+++ b/configs/sampling_few_shot.py
@@ -14,7 +14,7 @@ lean_timeout = 300
 # model
 batch_size = 32 
-model_path = 'deepseek-ai/DeepSeek-Prover-V1.5-Base'
+model_path = '/lustre/S/huangdi/open_for_out/models/DeepSeek-Prover-V1.5-Base'
 model_args = AttrDict(
    mode='cot',  # `cot` or `non-cot`
    temperature=1,

--- a/configs/sampling_few_shot_pass_at_1.py
+++ b/configs/sampling_few_shot_pass_at_1.py
+from prover.utils import AttrDict
+from prover.algorithms import Sampling
+# dataset
+data_path = 'datasets/minif2f.jsonl'
+data_split = ['valid', 'test']
+data_repeat = 1
+# verifier
+lean_max_concurrent_requests = 64 
+lean_memory_limit = 10
+lean_timeout = 300
+# model
+# batch_size = 32 
+batch_size = 1 
+model_args = AttrDict(
+    mode='cot',  # `cot` or `non-cot`
+    temperature=0,
+    max_tokens=2048,
+    top_p=1
+)
+# algorithm
+n_search_procs = 64 
+sampler = dict(
+    algorithm=Sampling,
+    # sample_num=128,
+    sample_num=1,
+    # log_interval=32,
+    log_interval=1,
+    few_shot_dataset='datasets/minif2f_valid_few_shot.jsonl',
+    few_shot_num=3,
+)
--- a/configs/sampling_few_shot_pass_at_128.py
+++ b/configs/sampling_few_shot_pass_at_128.py
+from prover.utils import AttrDict
+from prover.algorithms import Sampling
+# dataset
+data_path = 'datasets/minif2f.jsonl'
+data_split = ['valid', 'test']
+data_repeat = 1
+# verifier
+lean_max_concurrent_requests = 64 
+lean_memory_limit = 10
+lean_timeout = 300
+# model
+batch_size = 32 
+model_path = '/lustre/S/huangdi/open_for_out/models/DeepSeek-Prover-V1.5-Base'
+model_args = AttrDict(
+    mode='cot',  # `cot` or `non-cot`
+    temperature=1,
+    max_tokens=2048,
+    top_p=0.95,
+)
+# algorithm
+n_search_procs = 64 
+sampler = dict(
+    algorithm=Sampling,
+    sample_num=128,
+    log_interval=32,
+    few_shot_dataset='datasets/minif2f_valid_few_shot.jsonl',
+    few_shot_num=3,
+)
--- a/inference.slurm
+++ b/inference.slurm
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p r8nv-gpu-dedicated        # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 4-00:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:8                 # Request M GPU per node
+#SBATCH --ntasks-per-node=1          # Request P tasks per node
+#SBATCH --cpus-per-task=64           # Request Q core per task; means that P*Q cores per node
+#SBATCH --qos=normal                 # Request QOS Type
+### #SBATCH --constraint="IB&A100"
+#SBATCH --nodelist=r8a100-b00
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=r8a100-b00
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+# set constraint for RTX8000 to meet my cuda
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+#- Load environments
+source /tools/module_env.sh
+module list                       # list modules loaded
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+##- language
+module load python3/3.8.16
+##- CUDA
+module load cuda-cudnn/11.6-8.4.1
+##- virtualenv
+# source xxxxx/activate
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+CUDA_VISIBLE_DEVICES = 8
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+# [EDIT HERE(TODO)]
+export http_proxy=10.200.22.22:18421
+export https_proxy=10.200.22.22:18421
+python quick_start.py
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/prover/launch.py
+++ b/prover/launch.py
@@ -15,6 +15,7 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", type=str)
    parser.add_argument("--log_dir", type=str, default=f'logs/{get_datetime()}')
+    parser.add_argument("--model_path", type=str, default='/lustre/S/huangdi/open_for_out/models/DeepSeek-Prover-V1.5-Base')
    parser.add_argument("--node_rank", type=int, default=0)
    parser.add_argument("--world_size", type=int, default=1)
    args = parser.parse_args()
@@ -49,7 +50,7 @@ if __name__ == "__main__":
        GeneratorProcess(
            local_rank=local_rank,
            node_rank=args.node_rank,
-            model_path=cfg.model_path,
+            model_path=args.model_path,
            task_queue=generator_scheduler.task_queue,
            request_statuses=generator_scheduler.request_statuses,
            lock=generator_scheduler.lock,
@@ -69,7 +70,7 @@ if __name__ == "__main__":
        SearchProcess(
            idx=i+args.node_rank*cfg.n_search_procs,
            log_dir=args.log_dir,
-            tokenizer_path=cfg.model_path,
+            tokenizer_path=args.model_path,
            scheduler=scheduler,
            data_loader=data_loader,
            cfg=cfg,

--- a/prover/lean/verifier.py
+++ b/prover/lean/verifier.py
@@ -18,7 +18,8 @@ from prover.utils import AttrDict
 HOME_DIR = os.path.expanduser('~')
-DEFAULT_LAKE_PATH = f'{HOME_DIR}/.elan/bin/lake'
+# DEFAULT_LAKE_PATH = f'{HOME_DIR}/.elan/bin/lake'
+DEFAULT_LAKE_PATH = '/lustre/S/wuyt/.elan/bin/lake'
 DEFAULT_LEAN_WORKSPACE = 'mathlib4/'
@@ -33,6 +34,7 @@ def verify_lean4_file(code, lake_path=DEFAULT_LAKE_PATH, lean_workspace=DEFAULT_
    system_messages = ''
    try:
        with tempfile.TemporaryFile(mode='w+', encoding='utf-8') as temp_file:
+            print(message_str)
            temp_file.write(message_str + "\r\n\r\n")
            temp_file.seek(0)
            outputs = subprocess.run([lake_path, "exe", 'repl'], stdin=temp_file, capture_output=True, text=True, cwd=lean_workspace, timeout=timeout)

--- a/prover/workers/generator.py
+++ b/prover/workers/generator.py
@@ -3,7 +3,8 @@ import time
 import torch
 import torch.multiprocessing as mp
-from vllm import LLM, SamplingParams
+# from vllm import LLM, SamplingParams
+from vllm import SamplingParams
 from prover.utils import AttrDict, MODEL_FORMAT
@@ -29,6 +30,7 @@ class GeneratorProcess(mp.Process):
    def run(self):
        seed = int(time.time()) % 1000 + (self.node_rank * 8 + self.local_rank) * 1000
        os.environ['LOCAL_RANK'] = str(self.local_rank)
+        from vllm import LLM
        llm = LLM(model=self.model_path, max_num_batched_tokens=8192, seed=seed, trust_remote_code=True)
        while True:
            inputs = self.task_queue.get()

--- a/quick_start.py
+++ b/quick_start.py
@@ -7,7 +7,7 @@ from vllm import LLM, SamplingParams
 from prover.lean.verifier import Lean4ServerScheduler
-model_name = "deepseek-ai/DeepSeek-Prover-V1.5-RL"
+model_name = "/lustre/S/huangdi/open_for_out/models/DeepSeek-Prover-V1.5-RL"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = LLM(model=model_name, max_num_batched_tokens=8192, seed=1, trust_remote_code=True)

--- a/quick_start_oracle.py
+++ b/quick_start_oracle.py
+import re
+from prover.lean.verifier import Lean4ServerScheduler
+lean4_scheduler = Lean4ServerScheduler(max_concurrent_requests=1, timeout=300, memory_limit=10, name='verifier')
+prompt = r'''Complete the following Lean 4 code:
+```lean4
+'''
+code_prefix = r'''import Mathlib
+import Aesop
+set_option maxHeartbeats 0
+open BigOperators Real Nat Topology Rat
+/-- The second and fourth terms of a geometric sequence are $2$ and $6$. Which of the following is a possible first term?
+Show that it is $\frac{2\sqrt{3}}{3}$.-/
+theorem amc12b_2003_p6 (a r : ℝ) (u : ℕ → ℝ) (h₀ : ∀ k, u k = a * r ^ k) (h₁ : u 1 = 2)
+  (h₂ : u 3 = 6) : u 0 = 2 / Real.sqrt 3 ∨ u 0 = -(2 / Real.sqrt 3) := by
+'''
+# Expected output:
+result = prompt + code_prefix + '''  simp_all only [Nat.one_eq_succ_zero, Nat.zero_eq, zero_add, Nat.add_succ, Nat.add_zero,
+    Nat.succ_add]
+  have h₁' : a * r = 2 := by simpa [h₀] using h₁
+  have h₂' : a * r ^ 3 = 6 := by simpa [h₀] using h₂
+  have h₃ : r ^ 2 = 3 := by
+    nlinarith
+  have h₄ : a = 2 / Real.sqrt 3 ∨ a = -(2 / Real.sqrt 3) := by
+    apply eq_or_eq_neg_of_sq_eq_sq <;>
+    field_simp <;>
+    nlinarith
+  simpa [h₀] using h₄
+```
+'''
+requests = [re.search(r'```lean4\n(.*?)\n```', result, re.DOTALL).group(1)]
+# print(requests[0])
+request_id_list = lean4_scheduler.submit_all_request(requests)
+# print(request_id_list)
+outputs_list = lean4_scheduler.get_all_request_outputs(request_id_list)
+print(outputs_list)
+lean4_scheduler.close()
--- a/submit.sh
+++ b/submit.sh
+#!/bin/bash
+if [ ! -d "ret_one" ]; then
+    mkdir -p "ret_one"
+fi 
+sed -i -e "s/#SBATCH --nodelist=[^\n]*/#SBATCH --nodelist=$2/g" $1.slurm
+sbatch --job-name=$1 -o "ret_one/$1_%j.out" -e "ret_one/$1_%j.err" $1.slurm
--- a/test-proof.slurm
+++ b/test-proof.slurm
+#!/bin/bash
+#- Job parameters
+# (TODO)
+# Please modify job name
+#- Resources
+# (TODO)
+# Please modify your requirements
+#SBATCH -p r8nv-gpu-dedicated        # Submit to 'nv-gpu' Partitiion
+#SBATCH -t 4-00:00:00                # Run for a maximum time of 0 days, 12 hours, 00 mins, 00 secs
+#SBATCH --nodes=1                    # Request N nodes
+#SBATCH --gres=gpu:8                 # Request M GPU per node
+#SBATCH --ntasks-per-node=1          # Request P tasks per node
+#SBATCH --cpus-per-task=64           # Request Q core per task; means that P*Q cores per node
+#SBATCH --qos=normal                 # Request QOS Type
+### #SBATCH --constraint="IB&A100"
+#SBATCH --nodelist=r8a100-b06
+###
+### The system will alloc 8 or 16 cores per gpu by default.
+### If you need more or less, use following:
+### #SBATCH --cpus-per-task=K            # Request K cores
+###
+### 
+### Without specifying the constraint, any available nodes that meet the requirement will be allocated
+### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
+###
+### #SBATCH --nodelist=r8a100-b06
+### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
+###
+# set constraint for RTX8000 to meet my cuda
+#- Log information
+echo "Job start at $(date "+%Y-%m-%d %H:%M:%S")"
+echo "Job run at:"
+echo "$(hostnamectl)"
+#- Load environments
+source /tools/module_env.sh
+module list                       # list modules loaded
+##- Tools
+module load cluster-tools/v1.0
+module load slurm-tools/v1.0
+##- language
+module load python3/3.8.16
+##- CUDA
+module load cuda-cudnn/11.6-8.4.1
+##- virtualenv
+# source xxxxx/activate
+echo $(module list)              # list modules loaded
+echo $(which gcc)
+echo $(which python)
+echo $(which python3)
+cluster-quota                    # nas quota
+nvidia-smi --format=csv --query-gpu=name,driver_version,power.limit # gpu info
+CUDA_VISIBLE_DEVICES = 8
+#- Warning! Please not change your CUDA_VISIBLE_DEVICES
+#- in `.bashrc`, `env.sh`, or your job script
+echo "Use GPU ${CUDA_VISIBLE_DEVICES}"                              # which gpus
+#- The CUDA_VISIBLE_DEVICES variable is assigned and specified by SLURM
+# [EDIT HERE(TODO)]
+export http_proxy=10.200.22.22:18421
+export https_proxy=10.200.22.22:18421
+export config_name=sampling_few_shot_pass_at_1
+export model_name=DeepSeek-Prover-V1.5-Base
+export model_path=/lustre/S/huangdi/open_for_out/models/aimo-progress-prize-trained-models/$model_name
+export log_dir=logs/${model_name}_${config_name}
+python -m prover.launch --config=configs/${config_name}.py --model_path=${model_path} --log_dir=${log_dir}
+python -m prover.summarize --config=configs/${config_name}.py --log_dir=${log_dir}
+#- End
+echo "Job end at $(date "+%Y-%m-%d %H:%M:%S")"
--- a/test.lean
+++ b/test.lean
+{"cmd": "import Mathlib\nimport Aesop\n\nset_option maxHeartbeats 0\n\nopen BigOperators Real Nat Topology Rat\n\n/-- The second and fourth terms of a geometric sequence are $2$ and $6$. Which of the following is a possible first term?\nShow that it is $\\frac{2\\sqrt{3}}{3}$.-/\ntheorem amc12b_2003_p6 (a r : ℝ) (u : ℕ → ℝ) (h₀ : ∀ k, u k = a * r ^ k) (h₁ : u 1 = 2)\n  (h₂ : u 3 = 6) : u 0 = 2 / Real.sqrt 3 ∨ u 0 = -(2 / Real.sqrt 3) := by\n  simp_all only [Nat.one_eq_succ_zero, Nat.zero_eq, zero_add, Nat.add_succ, Nat.add_zero,\n    Nat.succ_add]\n  have h₁' : a * r = 2 := by simpa [h₀] using h₁\n  have h₂' : a * r ^ 3 = 6 := by simpa [h₀] using h₂\n  have h₃ : r ^ 2 = 3 := by\n    nlinarith\n  have h₄ : a = 2 / Real.sqrt 3 ∨ a = -(2 / Real.sqrt 3) := by\n    apply eq_or_eq_neg_of_sq_eq_sq <;>\n    field_simp <;>\n    nlinarith\n  simpa [h₀] using h₄", "allTactics": false, "ast": false, "tactics": false, "premises": false}