Commit b3bf4ddf by nzy

utils: udpate

parent 59fa167d
import json import json
import re import re
from codebleu import calc_codebleu
import sys
if sys.version_info < (3, 11):
import tomli as tomllib
else:
import tomllib
def load_jsonl(file_path): def load_jsonl(file_path):
...@@ -25,6 +17,7 @@ def save_jsonl(data, file_path): ...@@ -25,6 +17,7 @@ def save_jsonl(data, file_path):
for item in data: for item in data:
f.write(json.dumps(item) + "\n") f.write(json.dumps(item) + "\n")
def save_json(data, file_path, indent=None): def save_json(data, file_path, indent=None):
with open(file_path, "w", encoding="utf-8") as f: with open(file_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=indent) json.dump(data, f, indent=indent)
...@@ -36,6 +29,7 @@ code_template = """```python ...@@ -36,6 +29,7 @@ code_template = """```python
``` ```
""" """
def extract_code(text: str): def extract_code(text: str):
codes = [match.strip() for match in re.findall(codeblock_pattern, text)] codes = [match.strip() for match in re.findall(codeblock_pattern, text)]
if len(codes) > 0: if len(codes) > 0:
...@@ -43,7 +37,3 @@ def extract_code(text: str): ...@@ -43,7 +37,3 @@ def extract_code(text: str):
return code return code
else: else:
return "" return ""
def code_similarity(ref, pred):
return calc_codebleu([ref], [pred], lang="python", weights=(0, 0.5, 0.5, 0))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment