Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
DeepSeek-Prover-V1.5
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yutong Wu
DeepSeek-Prover-V1.5
Commits
1bb353c5
Commit
1bb353c5
authored
Sep 10, 2024
by
wyt2000
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vllm -> trans.
parent
c4d903e9
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
54 additions
and
20 deletions
+54
-20
.gitignore
+1
-0
prover/workers/generator.py
+49
-16
test-proof.slurm
+4
-4
No files found.
.gitignore
View file @
1bb353c5
...
@@ -2,3 +2,4 @@
...
@@ -2,3 +2,4 @@
__pycache__/
__pycache__/
logs/
logs/
ret_one/
ret_one/
output_rank_*
prover/workers/generator.py
View file @
1bb353c5
...
@@ -7,6 +7,7 @@ import torch.multiprocessing as mp
...
@@ -7,6 +7,7 @@ import torch.multiprocessing as mp
from
vllm
import
SamplingParams
from
vllm
import
SamplingParams
from
prover.utils
import
AttrDict
,
MODEL_FORMAT
from
prover.utils
import
AttrDict
,
MODEL_FORMAT
from
transformers
import
AutoTokenizer
,
AutoModelForCausalLM
class
GeneratorProcess
(
mp
.
Process
):
class
GeneratorProcess
(
mp
.
Process
):
...
@@ -18,37 +19,69 @@ class GeneratorProcess(mp.Process):
...
@@ -18,37 +19,69 @@ class GeneratorProcess(mp.Process):
self
.
task_queue
=
task_queue
self
.
task_queue
=
task_queue
self
.
request_statuses
=
request_statuses
self
.
request_statuses
=
request_statuses
self
.
lock
=
lock
self
.
lock
=
lock
self
.
sampling_params
=
SamplingParams
(
self
.
temperature
=
args
.
temperature
temperature
=
args
.
temperature
,
self
.
max_tokens
=
args
.
max_tokens
max_tokens
=
args
.
max_tokens
,
self
.
top_p
=
args
.
top_p
top_p
=
args
.
top_p
,
n
=
1
,
)
self
.
prompt_func
=
MODEL_FORMAT
[
args
.
mode
][
'prompt'
]
self
.
prompt_func
=
MODEL_FORMAT
[
args
.
mode
][
'prompt'
]
self
.
output_func
=
MODEL_FORMAT
[
args
.
mode
][
'output'
]
self
.
output_func
=
MODEL_FORMAT
[
args
.
mode
][
'output'
]
def
run
(
self
):
def
run
(
self
):
seed
=
int
(
time
.
time
())
%
1000
+
(
self
.
node_rank
*
8
+
self
.
local_rank
)
*
1000
seed
=
int
(
time
.
time
())
%
1000
+
(
self
.
node_rank
*
8
+
self
.
local_rank
)
*
1000
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
self
.
local_rank
)
model
=
AutoModelForCausalLM
.
from_pretrained
(
from
vllm
import
LLM
self
.
model_path
,
llm
=
LLM
(
model
=
self
.
model_path
,
max_num_batched_tokens
=
8192
,
seed
=
seed
,
trust_remote_code
=
True
)
device_map
=
{
""
:
f
"cuda:{self.local_rank}"
},
low_cpu_mem_usage
=
True
,
trust_remote_code
=
True
,
use_flash_attention_2
=
False
,
torch_dtype
=
torch
.
float16
,
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
self
.
model_path
,
use_fast
=
False
)
while
True
:
while
True
:
inputs
=
self
.
task_queue
.
get
()
inputs
=
self
.
task_queue
.
get
()
if
inputs
is
None
:
# Terminate when receiving None
if
inputs
is
None
:
# Terminate when receiving None
break
break
model_inputs
=
[
model_inputs
_text
=
[
''
.
join
([
''
.
join
([
item
.
get
(
'_extra_header'
,
str
()),
item
.
get
(
'_extra_header'
,
str
()),
self
.
prompt_func
(
item
),
self
.
prompt_func
(
item
),
item
.
get
(
'_extra_prompt'
,
str
()),
item
.
get
(
'_extra_prompt'
,
str
()),
])
for
_
,
_
,
item
in
inputs
])
for
_
,
_
,
item
in
inputs
]
]
model_outputs
=
llm
.
generate
(
with
open
(
f
'output_rank_{self.local_rank}'
,
'a'
)
as
f
:
model_inputs
,
f
.
write
(
'
\n
########## INPUT BEGIN ##########
\n
'
)
self
.
sampling_params
,
f
.
write
(
model_inputs_text
[
0
])
use_tqdm
=
False
,
f
.
write
(
'
\n
########## INPUT END ##########
\n
'
)
)
model_inputs
=
[
outputs
=
[
self
.
output_func
(
_output
.
outputs
[
0
]
.
text
)
for
_output
in
model_outputs
]
tokenizer
(
inp
,
return_tensors
=
"pt"
)
.
to
(
model
.
device
)
for
inp
in
model_inputs_text
]
model_outputs
=
[
model
.
generate
(
**
inp
,
max_new_tokens
=
self
.
max_tokens
,
eos_token_id
=
tokenizer
.
eos_token_id
,
do_sample
=
False
)
for
inp
in
model_inputs
]
model_outputs
=
[
tokenizer
.
decode
(
out
[
0
],
skip_special_tokens
=
True
)
for
out
in
model_outputs
]
with
open
(
f
'output_rank_{self.local_rank}'
,
'a'
)
as
f
:
f
.
write
(
'
\n
########## RAW OUTPUT BEGIN ##########
\n
'
)
f
.
write
(
model_outputs
[
0
])
f
.
write
(
'
\n
########## RAW OUTPUT END ##########
\n
'
)
model_outputs
=
[
out
[
len
(
inp
):]
.
strip
()
for
inp
,
out
in
zip
(
model_inputs_text
,
model_outputs
)
]
with
open
(
f
'output_rank_{self.local_rank}'
,
'a'
)
as
f
:
f
.
write
(
'
\n
########## REMOVED OUTPUT BEGIN ##########
\n
'
)
f
.
write
(
model_outputs
[
0
])
f
.
write
(
'
\n
########## REMOVED OUTPUT END ##########
\n
'
)
outputs
=
[
self
.
output_func
(
_output
)
for
_output
in
model_outputs
]
with
open
(
f
'output_rank_{self.local_rank}'
,
'a'
)
as
f
:
f
.
write
(
'
\n
########## CLEANED OUTPUT BEGIN ##########
\n
'
)
f
.
write
(
outputs
[
0
])
f
.
write
(
'
\n
########## CLEANED OUTPUT END ##########
\n
'
)
with
self
.
lock
:
with
self
.
lock
:
for
(
_
,
request_id
,
_
),
output
in
zip
(
inputs
,
outputs
):
for
(
_
,
request_id
,
_
),
output
in
zip
(
inputs
,
outputs
):
self
.
request_statuses
[
request_id
]
=
output
self
.
request_statuses
[
request_id
]
=
output
test-proof.slurm
View file @
1bb353c5
...
@@ -18,7 +18,7 @@
...
@@ -18,7 +18,7 @@
#SBATCH --cpus-per-task=64 # Request Q core per task; means that P*Q cores per node
#SBATCH --cpus-per-task=64 # Request Q core per task; means that P*Q cores per node
#SBATCH --qos=normal # Request QOS Type
#SBATCH --qos=normal # Request QOS Type
### #SBATCH --constraint="IB&A100"
### #SBATCH --constraint="IB&A100"
#SBATCH --nodelist=r8a100-b0
6
#SBATCH --nodelist=r8a100-b0
0
###
###
...
@@ -30,7 +30,7 @@
...
@@ -30,7 +30,7 @@
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
###
### #SBATCH --nodelist=r8a100-b0
6
### #SBATCH --nodelist=r8a100-b0
0
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
###
...
@@ -54,7 +54,7 @@ module load slurm-tools/v1.0
...
@@ -54,7 +54,7 @@ module load slurm-tools/v1.0
module load python3/3.8.16
module load python3/3.8.16
##- CUDA
##- CUDA
module load cuda-cudnn/11.
6-8.4
.1
module load cuda-cudnn/11.
8-8.8
.1
##- virtualenv
##- virtualenv
# source xxxxx/activate
# source xxxxx/activate
...
@@ -79,7 +79,7 @@ export http_proxy=10.200.22.22:18421
...
@@ -79,7 +79,7 @@ export http_proxy=10.200.22.22:18421
export
https_proxy
=
10.200.22.22:18421
export
https_proxy
=
10.200.22.22:18421
export
config_name
=
sampling_few_shot_pass_at_1
export
config_name
=
sampling_few_shot_pass_at_1
export
model_name
=
DeepSeek-Prover-V1.5-
Base
export
model_name
=
DeepSeek-Prover-V1.5-
RL
export
model_path
=
/lustre/S/huangdi/open_for_out/models/aimo-progress-prize-trained-models/
$model_name
export
model_path
=
/lustre/S/huangdi/open_for_out/models/aimo-progress-prize-trained-models/
$model_name
export
log_dir
=
logs/
${
model_name
}
_
${
config_name
}
export
log_dir
=
logs/
${
model_name
}
_
${
config_name
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment