Commit e864d804 by nanziyuan

r1

parent ef55d00d
......@@ -5,9 +5,13 @@ from functools import partial
import codecritic.evaluation.metric as metric
from codecritic.utils.json import load_jsonl
import pprint
def confidence(item):
sign = 1 if item["prediction"] else -1
if item["confidence"] is None:
return -1
return sign * item["confidence"]
def eval(scores):
......@@ -37,12 +41,21 @@ if __name__ == "__main__":
scores = load_jsonl(args.score)
groups = defaultdict(list)
for item in scores:
groups[item["dataset"]].append(item)
groups[item["task_id"]].append(item)
newscores = []
for dataset, lst in groups.items():
results = eval(lst)
for r in results:
r["dataset"] = dataset
r["strategy"] = "r1_qwen_7b"
del r["score_func"]
print(json.dumps(r))
pass_lst = [x["pass"] for x in lst]
if any(pass_lst):
print(sum(pass_lst))
newscores.extend(lst)
# results = eval(lst)
# for r in results:
# r["dataset"] = dataset
# r["strategy"] = "r1_qwen_7b"
# del r["score_func"]
# print(json.dumps(r))
print(len(newscores))
labels, bscores = [x["pass"] for x in newscores], [1 if x["prediction"] else 0 for x in scores]
pprint.pp(metric.binary_metrics(labels, bscores))
......@@ -118,7 +118,7 @@ if __name__ == "__main__":
#with ThreadPoolExecutor(max_workers=4) as executor:
# responses = executor.map(chat_fun, prompts)
responses = thread_map(chat_fun, prompts, max_workers=4)
responses = thread_map(chat_fun, prompts, max_workers=8)
for item, response in zip(dataset, responses):
judgement, confidence = postprocess_response(response)
......
......@@ -4,7 +4,7 @@ llm_kit:
router_port: 8000
tensor_parallel_size: 1
pipeline_parallel_size: 1
data_parallel_size: 4
data_parallel_size: 8
router_timeout: 1200
random_seeds:
- 1111
......
......@@ -3,11 +3,11 @@ model="/share/collab/codemodel/models/DeepSeek-R1-Distill-Qwen-7B/"
data="/nfs_global/S/nanziyuan/projects/ccc/data"
testset="${data}/test/qwen25_coder_inst-apps-test.jsonl"
evalresults="${data}/eval/qwen25_code_inst-apps-test-r1_7b_test.jsonl"
evalresults="${data}/eval/qwen25_code_inst-apps-test-r1_7b.jsonl"
# python -m llmkit_data.cli.serve --config /nfs_global/S/nanziyuan/projects/ccc/src/scripts/config.yaml &
python -m llmkit_data.cli.serve --config /nfs_global/S/nanziyuan/projects/ccc/src/scripts/config.yaml &
# vllm serve ${model} --max_model 12288
# sleep 300s
sleep 300s
python -m codecritic.cli.test_r1 \
--model ${model} \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment