Commit a7508d55 by ZhangXiaoyun

redis

parent d70794a9
......@@ -10,6 +10,19 @@ import multiprocessing
lock = multiprocessing.Lock()
print(id(lock), os.getpid())
import redis
redis_client = redis.Redis(host='127.0.0.1', port=6379, db=0)
def set_shared_value(key, value):
redis_client.set(key, value)
def get_shared_value(key):
value = redis_client.get(key)
if value:
return int(value.decode('utf-8'))
else:
return None
question_item_map = {}
with open("./envs/MATH/dataset/test500.jsonl", encoding="utf-8") as f:
for line in f:
......@@ -85,11 +98,24 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
# "stop_token_ids": [151643, 151643],
}
sampling_params = SamplingParams(**inference_params)
step_scores = []
for i in range(len(inputs)):
step_score = get_shared_value(inputs[i])
if step_score is None:
break
# invers
if random.random() > acc:
step_score = 1 - step_score
step_scores.append(step_score)
inputs = inputs[len(step_scores):]
global lock
with lock:
outputs = model.generate(inputs, sampling_params)
step_scores = []
for output in outputs:
step = output.prompt
......@@ -104,6 +130,7 @@ def _qwen_math_gold_infer_fn(input_str: str, model, tokenizer, device, acc):
break
# invers
set_shared_value(step, step_score)
if random.random() > acc:
step_score = 1 - step_score
step_scores.append(step_score)
......
......@@ -13,7 +13,7 @@ PYTHON_EXECUTABLE=$(which python3)
MODEL_BASE=/share/collab/codemodel/models
CUDA_DEVICE_BASE=0
POLICY_MODEL_NAME=Qwen2.5-Math-1.5B-Instruct
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
# VALUE_MODEL_NAME=qwen_prm/checkpoint-6898/
# VALUE_MODEL_NAME=Qwen/Qwen2.5-Math-7B-PRM
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
......
set -e
export RAY_TEMP_DIR="/tmp/ray_$SLURM_JOBID"
echo "RAY TEMP DIR is $RAY_TEMP_DIR"
HOST_ADDR=0.0.0.0
CONTROLER_PORT=28777
WORKER_BASE_PORT=30010
ACC=1.0
MODEL_BASE=/share/collab/codemodel/models
CUDA_DEVICE_BASE=0
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
while [[ "$#" -gt 0 ]]; do
case $1 in
--acc) ACC="$2"; shift ;;
--policy_model_name) POLICY_MODEL_NAME="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
VALUE_MODEL_PATH=$MODEL_BASE/$POLICY_MODEL_NAME
echo PYTHON_EXECUTABLE=$(which python3)
PYTHON_EXECUTABLE=$(which python3)
LOGDIR=logs_fastchat
tmux start-server
tmux new-session -s FastChat -n controller -d
tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "$PYTHON_EXECUTABLE -m fastchat.serve.controller --port ${CONTROLER_PORT} --host $HOST_ADDR" Enter
NUM_LM_WORKER=2
NUM_RM_WORKER=14
echo "Wait 5 seconds ..."
sleep 5
echo "Starting workers"
for i in $(seq 0 $((NUM_LM_WORKER-1)))
do
WORKER_PORT=$((WORKER_BASE_PORT+i))
tmux new-window -n policy_worker_$i
tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "CUDA_VISIBLE_DEVICES=$((CUDA_DEVICE_BASE)) $PYTHON_EXECUTABLE -m reason.llm_service.workers.vllm_worker --model-path $MODEL_PATH --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --gpu_memory_utilization 0.45" Enter
done
# start value service
for i in $(seq 0 $((NUM_RM_WORKER-1)))
do
WORKER_PORT=$((i+WORKER_BASE_PORT+NUM_LM_WORKER))
GPU_ID=$(((i + NUM_LM_WORKER) / 2 + CUDA_DEVICE_BASE))
tmux new-window -n value_worker
tmux send-keys "export LOGDIR=${LOGDIR}" Enter
tmux send-keys "CUDA_VISIBLE_DEVICES=$GPU_ID $PYTHON_EXECUTABLE -m reason.llm_service.workers.gold_reward_model_worker --model-path $VALUE_MODEL_PATH --model-names $VALUE_MODEL_NAME --controller-address http://$HOST_ADDR:$CONTROLER_PORT --host $HOST_ADDR --port $WORKER_PORT --worker-address http://$HOST_ADDR:$WORKER_PORT --gpu_memory_utilization 0.45 --acc $ACC" Enter
done
tmux send-keys "redis-server ~/redis_conf/redis.conf" Enter
export RAY_TEMP_DIR="/tmp/ray_$SLURM_JOBID"
echo "RAY TEMP DIR is $RAY_TEMP_DIR"
POLICY_MODEL_NAME=Qwen2.5-Math-7B-Instruct
ACC=1.0
while [[ "$#" -gt 0 ]]; do
case $1 in
--acc) ACC="$2"; shift ;;
--policy_model_name) POLICY_MODEL_NAME="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done
VALUE_MODEL_NAME=${POLICY_MODEL_NAME}_RM
SAVE_DIR="${POLICY_MODEL_NAME}/${ACC}"
python reason/evaluation/evaluate.py \
--LM Qwen2.5-Math-1.5B-Instruct \
--RM Qwen2.5-Math-1.5B-Instruct_RM \
--LM $POLICY_MODEL_NAME \
--RM $VALUE_MODEL_NAME \
--task_name MATH \
--temperature 0.7 \
--max_new_tokens 2048 \
--num_sequence 1 \
--tree_max_width 4 \
--tree_max_depth 50 \
--save_dir debug \
--save_dir $SAVE_DIR \
--method beam_search \
--num_worker 64 \
--controller_addr http://0.0.0.0:28777
# math-shepherd-mistral-7b-prm
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment