Commit fabd2413 by nzy

refactor: remove code related functions to data.code

parent 78b884ce
......@@ -162,4 +162,6 @@ cython_debug/
#.idea/
readme.pdf
test/
\ No newline at end of file
test/
# temporary scripts
*.py
\ No newline at end of file
......@@ -7,8 +7,8 @@ from transformers import AutoTokenizer
import pprint
from pathlib import Path
from codecritic.data.code import code_template, extract_code
from codecritic.utils.json import load_jsonl, save_jsonl
from codecritic.utils.data import extract_code, code_template
from codecritic.utils.metric import group_results, score_pass_at_k
......
# Additional Experiment:
# Is reasoning really work? Let's verify step by step.
from codecritic.data.code import extract_code, code_template
from codecritic.utils.data import (
extract_code,
code_template,
mk_message,
mk_messages,
mk_critic_verify,
......
from codecritic.utils.json import load_jsonl
from codecritic.utils.data import extract_code
from codecritic.data.code import extract_code, code_template
from nltk.metrics.distance import edit_distance
from collections import defaultdict
from itertools import product, chain
......
import re
from pathlib import Path
from codecritic.utils.json import save_jsonl
codeblock_pattern = re.compile(r"```python(.+?)```", flags=re.DOTALL)
code_template = """```python
{}
```
"""
def extract_code(text: str):
codes = [match.strip() for match in re.findall(codeblock_pattern, text)]
if len(codes) > 0:
code = "\n".join(codes)
return code
else:
return ""
# Note that the human and observation should appear in odd positions
# while llm should appear in even positions.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment