Commit 33c722f7 by ZhangXiaoyun

vllm model

parent d925c583
...@@ -37,22 +37,31 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device): ...@@ -37,22 +37,31 @@ def _math_shepherd_infer_fn(input_str: str, model, tokenizer, device):
@torch.inference_mode() @torch.inference_mode()
def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device): def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
GOOD_TOKEN = '+'
BAD_TOKEN = '-'
STEP_TAG = '\n\n\n\n\n' STEP_TAG = '\n\n\n\n\n'
steps = input_str.split(STEP_TAG) steps = input_str.split(STEP_TAG)
inputs = [steps[0]]
for i in range(len(steps)-1):
inputs.append(steps[i+1] + inputs[i])
inference_params = {
"temperature": 0.6,
"max_tokens": 4096,
"stop_token_ids": [tokenizer.eos_token_id, tokenizer.pad_token_id],
"n": 8
# "stop_token_ids": [151643, 151643],
}
sampling_params = SamplingParams(**inference_params)
candidate_tokens = tokenizer.encode(f" {GOOD_TOKEN} {BAD_TOKEN}") # [488, 481] outputs = model.generate(inputs, sampling_params)
step_tag_id = torch.tensor([tokenizer.encode(f" {STEP_TAG}")], device=device) # 76325 step_scores = []
input_id = torch.tensor(
[tokenizer.encode(input_str)], device=device) for output in outputs:
logits = model(input_id).logits[:,:,candidate_tokens] step = output.prompt
rollouts = [i.text for i in output.outputs]
step_scores.append(0)
scores = logits.softmax(dim=-1)[:,:,0]
mask = input_id == step_tag_id
step_scores = scores[mask]
return step_scores return step_scores
......
...@@ -61,6 +61,7 @@ class ModelWorker(BaseModelWorker): ...@@ -61,6 +61,7 @@ class ModelWorker(BaseModelWorker):
device: str, device: str,
num_gpus: int, num_gpus: int,
max_gpu_memory: str, max_gpu_memory: str,
acc: float = 1.0,
dtype: Optional[torch.dtype] = None, dtype: Optional[torch.dtype] = None,
load_8bit: bool = False, load_8bit: bool = False,
cpu_offloading: bool = False, cpu_offloading: bool = False,
...@@ -115,13 +116,14 @@ class ModelWorker(BaseModelWorker): ...@@ -115,13 +116,14 @@ class ModelWorker(BaseModelWorker):
self.stream_interval = stream_interval self.stream_interval = stream_interval
self.embed_in_truncate = embed_in_truncate self.embed_in_truncate = embed_in_truncate
self.seed = seed self.seed = seed
self.acc = acc
if not no_register: if not no_register:
self.init_heart_beat() self.init_heart_beat()
infer_fn = get_infer_fn(model_path) infer_fn = get_infer_fn(model_path)
self.infer_fn = functools.partial( self.infer_fn = functools.partial(
infer_fn, model=self.model, tokenizer=self.tokenizer, device=self.device infer_fn, model=self.model, tokenizer=self.tokenizer, device=self.device, acc=self.acc
) )
@torch.inference_mode() @torch.inference_mode()
...@@ -195,6 +197,12 @@ def create_model_worker(): ...@@ -195,6 +197,12 @@ def create_model_worker():
default=False, default=False,
help="Enable SSL. Requires OS Environment variables 'SSL_KEYFILE' and 'SSL_CERTFILE'.", help="Enable SSL. Requires OS Environment variables 'SSL_KEYFILE' and 'SSL_CERTFILE'.",
) )
parser.add_argument(
"--acc",
type=float,
default=1.0,
help="acc for prm",
)
args = parser.parse_args() args = parser.parse_args()
logger.info(f"args: {args}") logger.info(f"args: {args}")
...@@ -246,6 +254,7 @@ def create_model_worker(): ...@@ -246,6 +254,7 @@ def create_model_worker():
device=args.device, device=args.device,
num_gpus=args.num_gpus, num_gpus=args.num_gpus,
max_gpu_memory=args.max_gpu_memory, max_gpu_memory=args.max_gpu_memory,
acc=args.acc,
dtype=str_to_torch_dtype(args.dtype), dtype=str_to_torch_dtype(args.dtype),
load_8bit=args.load_8bit, load_8bit=args.load_8bit,
cpu_offloading=args.cpu_offloading, cpu_offloading=args.cpu_offloading,
......
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([False, False, False, False, False, True, False, False, False, False,\n",
" False, True, False, False, False, True, False])\n",
"tensor([10, 10, 10])\n"
]
}
],
"source": [
"import torch\n",
"\n",
"input_ids = torch.tensor([1,2,3,4,5,0,2,3,4,5,6,0,1,2,3,0,1])\n",
"sorces = torch.tensor([1,2,3,4,5,10,2,3,4,5,6,10,1,2,3,10,1])\n",
"step_tag_id = torch.tensor([0])\n",
"mask = input_ids == step_tag_id\n",
"print(mask)\n",
"print(sorces[mask])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "logic",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment