Commit ecf95207 by nzy

step1: code refactoring: split a large config file to multiple small pieces

parent 89c84aef
model = "/path/to/model"
apps = "/path/to/apps_dataset"
[sample]
sample_prompt_path = "path"
sample_result_path = "path"
[sample.sampling_params]
n = 0
temperature = 0.6
max_new_tokens = 2048
[evaluate]
evaluate_result_path = ""
[dataset]
train_path = ""
test_path = ""
minimal_test_path = ""
\ No newline at end of file
...@@ -147,12 +147,3 @@ def evaluate(code_sample_path, dataset_path, output_path): ...@@ -147,12 +147,3 @@ def evaluate(code_sample_path, dataset_path, output_path):
results = evaluate_incorrect_code_samples_again(results, apps, 10) results = evaluate_incorrect_code_samples_again(results, apps, 10)
save_jsonl(results, output_path) save_jsonl(results, output_path)
if __name__ == "__main__":
cfg = read_config()
evaluate(
cfg["sample"]["sample_result_path"],
cfg["apps"],
cfg["evaluate"]["evaluate_result_path"],
)
from utils import read_config
from utils_vllm import vllm_chatcomplete
from step1_sample_apps import mk_sample_prompt
from step1_evaluate_code import evaluate
from step1_sort_split_dataset import sort_and_split_dataset
if __name__ == "__main__":
cfg = read_config()
mk_sample_prompt(cfg["model"], cfg["apps"], cfg["sample"]["sample_prompt_path"])
vllm_chatcomplete(
cfg["model"],
cfg["sample"]["sample_prompt_path"],
cfg["sample"]["sample_result_path"],
cfg["sample"]["sampling_params"],
)
evaluate(
cfg["sample"]["sample_result_path"],
cfg["apps"],
cfg["evaluate"]["evaluate_result_path"],
)
sort_and_split_dataset(
cfg["evaluate"]["evaluate_result_path"],
cfg["dataset"]["train_path"],
cfg["dataset"]["test_path"],
cfg["dataset"]["minimal_test_path"],
cfg["sample"]["sampling_params"]["n"]
)
\ No newline at end of file
...@@ -60,16 +60,3 @@ def mk_sample_prompt(model_path, apps_path, output_path): ...@@ -60,16 +60,3 @@ def mk_sample_prompt(model_path, apps_path, output_path):
print(f"size of dataset: {len(prompts)}") print(f"size of dataset: {len(prompts)}")
save_jsonl(prompts, output_path) save_jsonl(prompts, output_path)
if __name__ == "__main__":
cfg = read_config()
mk_sample_prompt(cfg["model"], cfg["apps"], cfg["sample"]["sample_prompt_path"])
from utils_vllm import vllm_chatcomplete
vllm_chatcomplete(
cfg["model"],
cfg["sample"]["sample_prompt_path"],
cfg["sample"]["sample_result_path"],
cfg["sample"]["sampling_params"],
)
\ No newline at end of file
...@@ -56,14 +56,3 @@ def sort_and_split_dataset(raw_dataset_path, new_train_path, new_test_path, mini ...@@ -56,14 +56,3 @@ def sort_and_split_dataset(raw_dataset_path, new_train_path, new_test_path, mini
save_jsonl(new_train, new_train_path) save_jsonl(new_train, new_train_path)
save_jsonl(new_test, new_test_path) save_jsonl(new_test, new_test_path)
save_jsonl(minimal_test, minimal_test_path) save_jsonl(minimal_test, minimal_test_path)
if __name__ == "__main__":
cfg = read_config()
sort_and_split_dataset(
cfg["evaluate"]["evaluate_result_path"],
cfg["dataset"]["train_path"],
cfg["dataset"]["test_path"],
cfg["dataset"]["minimal_test_path"],
cfg["sample"]["sampling_params"]["n"]
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment