Commit 1bb353c5 by wyt2000

vllm -> trans.

parent c4d903e9
......@@ -2,3 +2,4 @@
__pycache__/
logs/
ret_one/
output_rank_*
......@@ -7,6 +7,7 @@ import torch.multiprocessing as mp
from vllm import SamplingParams
from prover.utils import AttrDict, MODEL_FORMAT
from transformers import AutoTokenizer, AutoModelForCausalLM
class GeneratorProcess(mp.Process):
......@@ -18,37 +19,69 @@ class GeneratorProcess(mp.Process):
self.task_queue = task_queue
self.request_statuses = request_statuses
self.lock = lock
self.sampling_params = SamplingParams(
temperature=args.temperature,
max_tokens=args.max_tokens,
top_p=args.top_p,
n=1,
)
self.temperature = args.temperature
self.max_tokens = args.max_tokens
self.top_p = args.top_p
self.prompt_func = MODEL_FORMAT[args.mode]['prompt']
self.output_func = MODEL_FORMAT[args.mode]['output']
def run(self):
seed = int(time.time()) % 1000 + (self.node_rank * 8 + self.local_rank) * 1000
os.environ['LOCAL_RANK'] = str(self.local_rank)
from vllm import LLM
llm = LLM(model=self.model_path, max_num_batched_tokens=8192, seed=seed, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
self.model_path,
device_map={"": f"cuda:{self.local_rank}"},
low_cpu_mem_usage=True,
trust_remote_code=True,
use_flash_attention_2=False,
torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
while True:
inputs = self.task_queue.get()
if inputs is None: # Terminate when receiving None
break
model_inputs = [
model_inputs_text = [
''.join([
item.get('_extra_header', str()),
self.prompt_func(item),
item.get('_extra_prompt', str()),
]) for _, _, item in inputs
]
model_outputs = llm.generate(
model_inputs,
self.sampling_params,
use_tqdm=False,
)
outputs = [self.output_func(_output.outputs[0].text) for _output in model_outputs]
with open(f'output_rank_{self.local_rank}', 'a') as f:
f.write('\n########## INPUT BEGIN ##########\n')
f.write(model_inputs_text[0])
f.write('\n########## INPUT END ##########\n')
model_inputs = [
tokenizer(inp, return_tensors="pt").to(model.device)
for inp in model_inputs_text
]
model_outputs = [
model.generate(**inp, max_new_tokens=self.max_tokens, eos_token_id=tokenizer.eos_token_id, do_sample=False)
for inp in model_inputs
]
model_outputs = [
tokenizer.decode(out[0], skip_special_tokens=True)
for out in model_outputs
]
with open(f'output_rank_{self.local_rank}', 'a') as f:
f.write('\n########## RAW OUTPUT BEGIN ##########\n')
f.write(model_outputs[0])
f.write('\n########## RAW OUTPUT END ##########\n')
model_outputs = [
out[len(inp):].strip()
for inp, out in zip(model_inputs_text, model_outputs)
]
with open(f'output_rank_{self.local_rank}', 'a') as f:
f.write('\n########## REMOVED OUTPUT BEGIN ##########\n')
f.write(model_outputs[0])
f.write('\n########## REMOVED OUTPUT END ##########\n')
outputs = [self.output_func(_output) for _output in model_outputs]
with open(f'output_rank_{self.local_rank}', 'a') as f:
f.write('\n########## CLEANED OUTPUT BEGIN ##########\n')
f.write(outputs[0])
f.write('\n########## CLEANED OUTPUT END ##########\n')
with self.lock:
for (_, request_id, _), output in zip(inputs, outputs):
self.request_statuses[request_id] = output
......@@ -18,7 +18,7 @@
#SBATCH --cpus-per-task=64 # Request Q core per task; means that P*Q cores per node
#SBATCH --qos=normal # Request QOS Type
### #SBATCH --constraint="IB&A100"
#SBATCH --nodelist=r8a100-b06
#SBATCH --nodelist=r8a100-b00
###
......@@ -30,7 +30,7 @@
### Without specifying the constraint, any available nodes that meet the requirement will be allocated
### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
###
### #SBATCH --nodelist=r8a100-b06
### #SBATCH --nodelist=r8a100-b00
### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
###
......@@ -54,7 +54,7 @@ module load slurm-tools/v1.0
module load python3/3.8.16
##- CUDA
module load cuda-cudnn/11.6-8.4.1
module load cuda-cudnn/11.8-8.8.1
##- virtualenv
# source xxxxx/activate
......@@ -79,7 +79,7 @@ export http_proxy=10.200.22.22:18421
export https_proxy=10.200.22.22:18421
export config_name=sampling_few_shot_pass_at_1
export model_name=DeepSeek-Prover-V1.5-Base
export model_name=DeepSeek-Prover-V1.5-RL
export model_path=/lustre/S/huangdi/open_for_out/models/aimo-progress-prize-trained-models/$model_name
export log_dir=logs/${model_name}_${config_name}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment