Commit fabd2413 by nzy

refactor: remove code related functions to data.code

parent 78b884ce
...@@ -163,3 +163,5 @@ cython_debug/ ...@@ -163,3 +163,5 @@ cython_debug/
readme.pdf readme.pdf
test/ test/
# temporary scripts
*.py
\ No newline at end of file
...@@ -7,8 +7,8 @@ from transformers import AutoTokenizer ...@@ -7,8 +7,8 @@ from transformers import AutoTokenizer
import pprint import pprint
from pathlib import Path from pathlib import Path
from codecritic.data.code import code_template, extract_code
from codecritic.utils.json import load_jsonl, save_jsonl from codecritic.utils.json import load_jsonl, save_jsonl
from codecritic.utils.data import extract_code, code_template
from codecritic.utils.metric import group_results, score_pass_at_k from codecritic.utils.metric import group_results, score_pass_at_k
......
# Additional Experiment: # Additional Experiment:
# Is reasoning really work? Let's verify step by step. # Is reasoning really work? Let's verify step by step.
from codecritic.data.code import extract_code, code_template
from codecritic.utils.data import ( from codecritic.utils.data import (
extract_code,
code_template,
mk_message, mk_message,
mk_messages, mk_messages,
mk_critic_verify, mk_critic_verify,
......
from codecritic.utils.json import load_jsonl from codecritic.utils.json import load_jsonl
from codecritic.utils.data import extract_code from codecritic.data.code import extract_code, code_template
from nltk.metrics.distance import edit_distance from nltk.metrics.distance import edit_distance
from collections import defaultdict from collections import defaultdict
from itertools import product, chain from itertools import product, chain
......
import re
from pathlib import Path from pathlib import Path
from codecritic.utils.json import save_jsonl from codecritic.utils.json import save_jsonl
codeblock_pattern = re.compile(r"```python(.+?)```", flags=re.DOTALL)
code_template = """```python
{}
```
"""
def extract_code(text: str):
codes = [match.strip() for match in re.findall(codeblock_pattern, text)]
if len(codes) > 0:
code = "\n".join(codes)
return code
else:
return ""
# Note that the human and observation should appear in odd positions # Note that the human and observation should appear in odd positions
# while llm should appear in even positions. # while llm should appear in even positions.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment