Commit 83e4a159 by nzy

step2 update prefrence pair's format

parent bd555b17
...@@ -117,10 +117,10 @@ if __name__ == "__main__": ...@@ -117,10 +117,10 @@ if __name__ == "__main__":
all_edit_distance_pairs, 10 * 1000, 5, is_max=False all_edit_distance_pairs, 10 * 1000, 5, is_max=False
) )
dataset_info = mk_dataset_info("apps_min_edit_distance_prefrence") dataset_info = mk_dataset_info("apps_min_edit_distance_prefrence")
save_jsonl( save_json(
metadata, cfg["preference_dataset"]["min_edit_distance"]["metadata_path"] metadata, cfg["preference_dataset"]["min_edit_distance"]["metadata_path"]
) )
save_jsonl( save_json(
preference_pairs, preference_pairs,
cfg["preference_dataset"]["min_edit_distance"]["preference_dataset_path"], cfg["preference_dataset"]["min_edit_distance"]["preference_dataset_path"],
) )
......
...@@ -19,12 +19,14 @@ def mk_dataset_info(dataset_name): ...@@ -19,12 +19,14 @@ def mk_dataset_info(dataset_name):
} }
} }
# see utils.extract_code
code_template = r"```python{}```"
def mk_preference_pair(instruction, chosen_code, rejected_code): def mk_preference_pair(instruction, chosen_code, rejected_code):
return { return {
"messages": [ "messages": [
{"role": "user", "content": instruction}, {"role": "user", "content": instruction},
], ],
"chosen": {"role": "assistant", "content": chosen_code}, "chosen": {"role": "assistant", "content": code_template.format(chosen_code)},
"rejected": {"role": "assistant", "content": rejected_code}, "rejected": {"role": "assistant", "content": code_template.format(rejected_code)},
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment