feat: add debug.

9a24359b · wyt2000 · be06b7c4 · 9a24359b · 9a24359b · 9a24359b
Commit 9a24359b authored Sep 12, 2024 by wyt2000
Show whitespace changes
Inline Side-by-side

Showing with 77 additions and 8 deletions

draw_fig.py
+3 -3

draw_fig_compare.py
+51 -0

prover/workers/generator.py
+20 -2

test-proof.slurm
+3 -3

No files found.
--- a/draw_fig.py
+++ b/draw_fig.py
@@ -23,7 +23,7 @@ def read_dataset(path):
                vers.append(float(x[1]))
    return gens, vers
-gens, vers = read_dataset('ret_one/test-proof_542524.out')
+gens, vers = read_dataset('ret_one/test-proof_542678.out')
 def draw(dataset, color, label):
    num_samples = len(dataset)
@@ -42,9 +42,9 @@ def draw(dataset, color, label):
 draw(gens, 'green', 'Generation')
 draw(vers, 'blue', 'Verification')
-plt.xlabel("Time (s) of DeepSeek-Prover-V1.5-Base", fontsize=15)
+plt.xlabel("Time (s) of Code-Math-QA-Proof-minicpm-quant-fp4", fontsize=15)
 plt.ylabel("Percentage", fontsize=15)
 plt.legend(prop={'size': 10})
-plt.savefig('DeepSeek-Prover-V1.5-Base.png', format='png')   # 保存为 PNG 格式
+plt.savefig('Code-Math-QA-Proof-minicpm-quant-fp4.png', format='png')   # 保存为 PNG 格式
--- a/draw_fig_compare.py
+++ b/draw_fig_compare.py
+import matplotlib.pyplot as plt
+import json
+import numpy as np
+from collections import Counter
+import re
+def extract_time(text):
+    pattern = r"Generation:\s*([\d.]+)\s*secs\s*Verfication:\s*([\d.]+)\s*secs"
+    matches = re.search(pattern, text)
+    if matches:
+        gen = matches.group(1)
+        ver = matches.group(2)
+        return gen, ver
+    return None
+def read_dataset(path):
+    gens, vers = [], []
+    with open(path) as f:
+        for line in f.readlines():
+            if x := extract_time(line):
+                gens.append(float(x[0]))
+                vers.append(float(x[1]))
+    return gens, vers
+int4gens, _ = read_dataset('ret_one/test-proof_542677.out')
+fp4gens, _ = read_dataset('ret_one/test-proof_542678.out')
+def draw(dataset, color, label):
+    num_samples = len(dataset)
+    hist, bins = np.histogram(dataset, 10)
+    avg_score = np.mean(dataset)
+    max_score = np.max(dataset)
+    min_score = np.min(dataset)
+    xs = bins[:-1]
+    ys = [h / num_samples for h in hist]
+    plt.plot(xs, ys, color=color, label=f'{label}; Avg: {avg_score:.3f}s; Max: {max_score:.3f}s; Min: {min_score:.3f}s', zorder=3)
+    plt.fill_between(xs, ys, color=color, alpha=0.3, zorder=2)
+    plt.axvline(x=avg_score, color=color, linestyle='dotted')
+draw(int4gens, 'green', 'int4')
+draw(fp4gens, 'blue', 'fp4')
+plt.xlabel("Time (s) of int4 and fp4", fontsize=15)
+plt.ylabel("Percentage", fontsize=15)
+plt.legend(prop={'size': 10})
+plt.savefig('int4vsfp4.png', format='png')   # 保存为 PNG 格式
--- a/prover/workers/generator.py
+++ b/prover/workers/generator.py
@@ -24,6 +24,15 @@ class GeneratorProcess(mp.Process):
        self.top_p       = args.top_p
        self.prompt_func = MODEL_FORMAT[args.mode]['prompt']
        self.output_func = MODEL_FORMAT[args.mode]['output']
+        self.debug       = False 
+    def debug_print(self, name, msg):
+        if not self.debug:
+            return
+        with open(f'output_rank_{self.local_rank}', 'a') as f:
+            f.write(f'\n########## {name} BEGIN ##########\n')
+            f.write(msg)
+            f.write(f'\n########## {name} END ##########\n')
    def run(self):
        seed = int(time.time()) % 1000 + (self.node_rank * 8 + self.local_rank) * 1000
@@ -40,6 +49,7 @@ class GeneratorProcess(mp.Process):
            inputs = self.task_queue.get()
            if inputs is None: # Terminate when receiving None
                break
            model_inputs_text = [
                ''.join([
                    item.get('_extra_header', str()),
@@ -48,6 +58,13 @@ class GeneratorProcess(mp.Process):
                ]) for _, _, item in inputs
            ]
+            # prompt format, only for aimo-minicpm
+            model_inputs_text = [
+                f'### Problem:\n{inp}### Solution: \n'
+                for inp in model_inputs_text
+            ]
+            self.debug_print('INPUT', model_inputs_text[0])
            # encode inputs
            model_inputs = [
                tokenizer(inp, return_tensors="pt").to(model.device)
@@ -65,16 +82,17 @@ class GeneratorProcess(mp.Process):
                tokenizer.decode(out[0], skip_special_tokens=True)
                for out in model_outputs
            ]
+            self.debug_print('RAW OUTPUT', model_outputs[0])
            # remove inputs in decoding results
            model_outputs = [
-                out[len(inp):].strip()
+                out[len(inp):]
                for inp, out in zip(model_inputs_text, model_outputs)
            ]
            # extract code snippet
            outputs = [self.output_func(_output) for _output in model_outputs]
+            self.debug_print('OUTPUT', outputs[0])
            # send to verify
            with self.lock:

--- a/test-proof.slurm
+++ b/test-proof.slurm
@@ -18,7 +18,7 @@
 #SBATCH --cpus-per-task=64           # Request Q core per task; means that P*Q cores per node
 #SBATCH --qos=normal                 # Request QOS Type
 ### #SBATCH --constraint="IB&A100"
-#SBATCH --nodelist=r8a100-a02
+#SBATCH --nodelist=r8a100-d04
 ###
@@ -30,7 +30,7 @@
 ### Without specifying the constraint, any available nodes that meet the requirement will be allocated
 ### You can specify the characteristics of the compute nodes, and even the names of the compute nodes
 ###
-### #SBATCH --nodelist=r8a100-a02
+### #SBATCH --nodelist=r8a100-d04
 ### #SBATCH --constraint="Volta|RTX8000" # Request GPU Type: Volta(V100 or V100S) or RTX8000
 ###
@@ -79,7 +79,7 @@ export http_proxy=10.200.22.22:18421
 export https_proxy=10.200.22.22:18421
 export config_name=sampling_few_shot_pass_at_1
-export model_name=deepseekproof-minicpm-quant-fixed-0906
+export model_name=deepseekproof-minicpm-bf16-0911
 export model_path=/lustre/S/huangdi/open_for_out/models/aimo-progress-prize-trained-models/$model_name
 export log_dir=logs/${model_name}_${config_name}