Commit 8edf8925 by zhengzifu

First Commit

parents
__pycache__
.DS_Store
.venv
.vscode
outputs
outputs-qwen
weights
model.safetensors
001-H-LLM
build
src-Optimize_HN
*.egg-info
*.egg
*.so
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from config import CFG
from tqdm import tqdm
import pickle
import json
path_dir = "Optimized_HN"
def calculate_WW(matrix: np.array, value_range):
WW = [0] * len(value_range)
for i in range(len(value_range)):
WW[i] = max(
[len([x for x in row if abs(x - value_range[i]) <= 0.01]) for row in matrix]
)
return WW
def find_index(arr, target, epsilon=1e-3):
arr = np.array(arr) # 转换为numpy数组
diff = np.abs(arr - target) # 计算差值数组
min_diff = np.min(diff) # 找到最小的差值
if min_diff < epsilon: # 如果最小差值在允许的误差范围内
return np.where(diff == min_diff)[0][0] # 返回第一个匹配的索引
raise ValueError("No match found") # 如果没有找到匹配项,则引发异常
# %%
def generate_module(
matrix,
module_name="HN",
H=16,
L=5,
value_range=[-1, 1],
WW=[8, 8],
CUR_VN=0,
weights_file_name="",
):
# with VerilogGenerator() as generator:
with ModuleBlock(f"{module_name}") as module:
add_parameter("H", H)
add_parameter("L", L)
# for i in range(len(value_range)):
# add_parameter(f"WW_{i}", WW[i])
add_input("HN_in", "H")
add_input("CST_LOW")
node_file = os.path.join(
"output", weights_file_name, f"{weights_file_name}_vn_{CUR_VN}.json"
)
node = json.load(open(node_file))["node"]
# max_L=0
# 内部连线
for i, hn_in_layers in enumerate(node):
color_file = os.path.join(
"output",
weights_file_name,
f"{weights_file_name}_vn_{CUR_VN}_value_{value_range[i]}.json",
)
mux_port = json.load(open(color_file))["color"]
max_mux_port = max(mux_port) + 1
add_parameter(f"WW_{i}", max_mux_port)
max_L = max(mux_port.count(x) for x in set(mux_port) if x != -1)
add_output(
f"HN_out_{i}",
f"WW_{i}",
f"{max_L}",
)
# 第一维是该颜色的使用次数 第二维是染的颜色即mux_port
hn_out = [[-1 for _ in range(max_mux_port)] for _ in range(max_L)]
used_mux_port = [0 for _ in range(max_mux_port)]
for j, hn_in_layer in enumerate(hn_in_layers):
if mux_port[j] == -1:
continue
hn_out[used_mux_port[mux_port[j]]][mux_port[j]] = j
used_mux_port[mux_port[j]] += 1
for j in range(max_L):
for k in range(max_mux_port):
if hn_out[j][k] == -1:
add_assign(f"HN_out_{i}", [j, k], "CST_LOW", [])
else:
add_assign(f"HN_out_{i}", [j, k], "HN_in", [hn_out[j][k]])
return module
# %%
def process_task(i, weights_file_name, matrix, H, L):
try:
WW = calculate_WW(matrix, CFG.value_range)
file_dir = os.path.join(path_dir, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(
file_dir, f"{path_dir}_tp_{weights_file_name}_vc_{i}.sv"
)
with open(file_name, "w") as f:
text = generate_module(
matrix,
module_name=f"{path_dir}_tp_{weights_file_name}_vc_{i}",
H=H,
L=L,
value_range=CFG.value_range,
WW=WW,
CUR_VN=i,
weights_file_name=weights_file_name,
).generate()
f.write(text)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run():
os.makedirs(path_dir, exist_ok=True)
for weights_file in os.listdir("mapped_weights"):
if weights_file != CFG.run_weights:
continue
weights_path = os.path.join("mapped_weights", weights_file)
weights_file_name = os.path.splitext(weights_file)[0]
print(f"Processing {weights_file_name}")
with open(weights_path, "rb") as f:
print(f"Loading {weights_file_name}")
matrixs = pickle.load(f)
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=CFG.num_workers) as executor:
futures = [
executor.submit(process_task, i, weights_file_name, matrixs[i], H, L)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
if __name__ == "__main__":
run()
# %%
import numpy as np
import pickle
import os
import sys
from config import CFG
if CFG.mode == "run":
exit
def run():
shape = CFG.test_weights_shape
weights = np.random.choice(CFG.value_range, shape)
filename_pkl = os.path.join(CFG.weights_dir, CFG.test_weights)
filename_txt = os.path.join(
CFG.weights_dir, CFG.test_weights.split(".")[0] + ".txt"
)
with open(filename_pkl, "wb") as f:
pickle.dump(weights, f)
with open(filename_txt, "w") as f:
for row in weights:
for val in row:
f.write(f"{val}\n")
f.write("\n\n")
CXX = g++
CXXFLAGS = -std=c++11 -Wall -pthread
TARGET = optimize_HN
SRC = optimize_HN.cpp
$(TARGET): $(SRC)
$(CXX) $(CXXFLAGS) -o $(TARGET) $(SRC)
.PHONY: clean
clean:
rm -f $(TARGET)
import os
import sys
import pickle
from config import CFG
import generate_ww
import generate_sub_wrapper
import generate_lm
import generate_wt_group
import generate_wrappers
import generate_mid_wrapper
import generate_wallace
import generate_fsm
import generate_hn
def run_generate_verilog():
generate_ww.run()
print("生成WW完成")
generate_sub_wrapper.run()
print("生成Sub_wrapper完成")
generate_hn.run()
print("生成HN完成")
generate_lm.run()
print("生成Layer_mux完成")
generate_wt_group.run()
print("生成WT_group完成")
generate_wrappers.run()
print("生成Wrappers完成")
generate_mid_wrapper.run()
print("生成Mid_wrappers完成")
generate_wallace.run()
print("生成Wallace Tree完成")
generate_fsm.run()
print("生成FSM完成")
def print_menu():
print("\n可用的生成选项:")
print("1. 生成 WW")
print("2. 生成 Sub_wrapper")
print("3. 生成 HN")
print("4. 生成 Layer_mux")
print("5. 生成 WT_group")
print("6. 生成 Wrappers")
print("7. 生成 Mid_wrappers")
print("8. 生成 Wallace Tree")
print("9. 生成 FSM")
print("10. 生成全部")
print("0. 退出")
return input("请选择要生成的模块 (0-10): ")
def run_selected_generate(choice):
if choice == "1":
generate_ww.run()
print("生成WW完成")
elif choice == "2":
generate_sub_wrapper.run()
print("生成Sub_wrapper完成")
elif choice == "3":
generate_hn.run()
print("生成HN完成")
elif choice == "4":
generate_lm.run()
print("生成Layer_mux完成")
elif choice == "5":
generate_wt_group.run()
print("生成WT_group完成")
elif choice == "6":
generate_wrappers.run()
print("生成Wrappers完成")
elif choice == "7":
generate_mid_wrapper.run()
print("生成Mid_wrappers完成")
elif choice == "8":
generate_wallace.run()
print("生成Wallace Tree完成")
elif choice == "9":
generate_fsm.run()
print("生成FSM完成")
elif choice == "10":
run_generate_verilog()
if __name__ == "__main__":
print("请选择运行模式:")
print(
f"1. 使用 run_weights_batch(批量运行), 当前权重文件:{CFG.run_weights_batch}"
)
print(f"2. 使用 run_weights(单次运行), 当前权重文件:{CFG.run_weights}")
mode = input("请选择 (1/2): ")
if mode == "1":
while True:
choice = print_menu()
if choice == "0":
break
for weights in CFG.run_weights_batch:
print(f"\n正在处理 weights: {weights}")
CFG.run_weights = weights
run_selected_generate(choice)
elif mode == "2":
while True:
choice = print_menu()
if choice == "0":
break
run_selected_generate(choice)
else:
print("无效的选择!")
from setuptools import setup, Extension
import pybind11
ext_modules = [
Extension(
'optimize_HN',
['optimize_HN.cpp'],
include_dirs=[pybind11.get_include()],
language='c++',
extra_compile_args=['-std=c++11'],
extra_link_args=['-static-libstdc++'],
),
]
setup(
name='optimize_HN',
version='0.1',
ext_modules=ext_modules,
)
\ No newline at end of file
# %%
from multiprocessing import Pool
import pickle
import numpy as np
from tqdm import tqdm
from prettytable import PrettyTable
import os
from concurrent.futures import ProcessPoolExecutor
from hllm.config import CFG
from hllm.utils import calculate_WW, find_index
# 返回第i位
def get_bit(num, i):
if i < 0:
return 0
return (num >> i) & 1
# %%
class HN:
def __init__(self, matrix, H, L):
self.matrix = matrix
self.H = H
self.L = L
# def find_index(self, value):
# return np.searchsorted(CFG.value_range, value)
def calculate(self, HN_in: np.ndarray):
HN_out = np.zeros((self.L, len(CFG.value_range)), dtype=int)
ans = np.zeros(self.L)
matrix_masked = self.matrix * HN_in
for i, layer in enumerate(matrix_masked):
for j, value in enumerate(layer):
if abs(value) <= 1e-3:
continue
index = find_index(CFG.value_range, value)
HN_out[i][index] += 1
ans[i] += value
# indices=list(map(self.find_index,layer))
# np.add.at(HN_out[i],indices,1)
return HN_out, ans
class HN_GROUP:
def __init__(self, weights: np.ndarray):
self.VN, self.L, self.H = weights.shape
print(weights.shape)
self.HN_GROUP = [HN(matrix, self.H, self.L) for matrix in weights]
print("HN_GROUP init done")
def calculate_single(self, hn_group, hn_in):
return hn_group.calculate(hn_in)
def calculate(self, hn_in: np.ndarray):
hn_out = [None] * len(self.HN_GROUP)
ans = [None] * len(self.HN_GROUP)
with ProcessPoolExecutor(max_workers=CFG.num_workers) as executor:
futures = [
executor.submit(hn_group.calculate, hn_in) for hn_group in self.HN_GROUP
]
for i, future in enumerate(tqdm(futures)):
hn_out[i], ans[i] = future.result()
ans = np.array(ans)
return ans
def run(config: CFG):
activation_name = os.path.join(config.verify_dir, "activation.pkl")
result_name = os.path.join(config.verify_dir, "result.txt")
with open(activation_name, "rb") as f:
hn_in = pickle.load(f)
weights_path = os.path.join(config.verify_dir, config.verify_weights)
with open(weights_path, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
hn_group = HN_GROUP(matrixs)
hn_in = get_bit(hn_in, 7)
hn_out = hn_group.calculate(hn_in)
print(hn_out)
import hllm.optimized.turbo_optimize_hn as turbo_optimize_hn
import numpy as np
def test_graph_coloring():
# 创建一个100x100的随机稀疏邻接矩阵
np.random.seed(41) # 为了结果可重复,设置随机种子
size = 1536
weight = np.random.randint(-1, 2, size=(size, size))
adj_matrix = np.random.randint(0, 2, size=(size, size))
adj_matrix = np.triu(adj_matrix, 1) # 只保留上三角部分
adj_matrix += adj_matrix.T # 对称化矩阵
print("turbo_optimize_hn 模块位置:", turbo_optimize_hn.__file__)
# 调用图着色算法
colors = turbo_optimize_hn.greedy_coloring(adj_matrix, weight)
# 打印结果
# print("节点颜色分配结果:", colors)
print(max(colors))
# 验证结果是否有效
n = len(adj_matrix)
for i in range(n):
for j in range(n):
if adj_matrix[i][j] == 1:
# 相邻节点不应该有相同的颜色
assert (
colors[i] != colors[j] and colors[i] != -1 and colors[j] != -1
), f"相邻节点 {i} 和 {j} 具有相同的颜色!"
print("测试通过!所有相邻节点都有不同的颜色")
if __name__ == "__main__":
test_graph_coloring()
{
{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from pyrilog import VerilogGenerator,ModuleBlock,add_parameter,add_input,add_output,add_assign\n",
"from concurrent.futures import ProcessPoolExecutor, as_completed\n",
"import os\n",
"from tqdm import tqdm\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"class CFG:\n",
" path_dir = \"HN\"\n",
" value_range = [-6, -4, -3, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 3, 4, 6]\n",
" WW = [8] * len(value_range)\n",
"\n",
"\n",
"def calculate_WW(matrix: np.array, value_range):\n",
" WW = [0] * len(value_range)\n",
" for i in range(len(value_range)):\n",
" WW[i] = max([len([x for x in row if abs(x-value_range[i])<=0.01]) for row in matrix])\n",
" return WW\n",
"\n",
"def find_index(arr, target, epsilon=1e-3):\n",
" arr = np.array(arr) # 转换为numpy数组\n",
" diff = np.abs(arr - target) # 计算差值数组\n",
" min_diff = np.min(diff) # 找到最小的差值\n",
" if min_diff < epsilon: # 如果最小差值在允许的误差范围内\n",
" return np.where(diff == min_diff)[0][0] # 返回第一个匹配的索引\n",
" raise ValueError('No match found') # 如果没有找到匹配项,则引发异常"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"def generate_verilog_code(\n",
" matrix,\n",
" HN_id=0,\n",
" H=16,\n",
" L=5,\n",
" value_range=[-1, 1],\n",
" WW=[8,8],\n",
"):\n",
" with VerilogGenerator() as generator:\n",
" with ModuleBlock(f\"HN_{HN_id}\"):\n",
" add_parameter(\"H\", H)\n",
" add_parameter(\"L\", L)\n",
" for i in range(len(value_range)):\n",
" add_parameter(f\"WW_{i}\", WW[i])\n",
" add_input(name=\"HN_in\", width=\"H\")\n",
" for i in range(len(value_range)):\n",
" add_output(\n",
" name=f\"HN_out_{i}\",\n",
" width=f\"WW_{i}\",\n",
" height=\"L\",\n",
" )\n",
" # 内部连线\n",
" for i, layer in enumerate(matrix):\n",
" weight_cnt = [0] * len(value_range)\n",
" for j, weight in enumerate(layer):\n",
" if abs(weight)<1e-3:\n",
" continue\n",
" try:\n",
" index=find_index(value_range, weight)\n",
" except ValueError:\n",
" print(f\"weight {weight} not found\")\n",
" continue\n",
" add_assign(\n",
" f\"HN_out_{index}\",\n",
" [i, weight_cnt[index]],\n",
" \"HN_in\",\n",
" [j],\n",
" )\n",
" weight_cnt[index] += 1\n",
" for i in range(len(weight_cnt)):\n",
" while weight_cnt[i] < WW[i]:\n",
" add_assign(f\"HN_out_{i}\", [i, weight_cnt[i]], \"0\", [])\n",
" weight_cnt[i] += 1\n",
" return generator.generate()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/1536 [00:00<?, ?it/s]\n"
]
},
{
"ename": "NameError",
"evalue": "name 'result' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mBrokenProcessPool\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[24], line 36\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 35\u001b[0m \u001b[38;5;66;03m# print(1)\u001b[39;00m\n\u001b[1;32m---> 36\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 37\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[1;32m~\\AppData\\Roaming\\uv\\python\\cpython-3.11.10-windows-x86_64-none\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n",
"File \u001b[1;32m~\\AppData\\Roaming\\uv\\python\\cpython-3.11.10-windows-x86_64-none\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n",
"\u001b[1;31mBrokenProcessPool\u001b[0m: A process in the process pool was terminated abruptly while the future was running or pending.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[24], line 38\u001b[0m\n\u001b[0;32m 36\u001b[0m result \u001b[38;5;241m=\u001b[39m future\u001b[38;5;241m.\u001b[39mresult()\n\u001b[0;32m 37\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerating \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[43mresult\u001b[49m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m failed with an error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mNameError\u001b[0m: name 'result' is not defined"
]
}
],
"source": [
"def process_task(i, matrix, H, L):\n",
" try:\n",
" WW = calculate_WW(matrix, CFG.value_range)\n",
" file_name = os.path.join(CFG.path_dir, f\"HN_{i}.sv\")\n",
" with open(file_name, \"w\") as f:\n",
" f.write(\n",
" generate_verilog_code(\n",
" matrix,\n",
" HN_id=i,\n",
" H=H,\n",
" L=L,\n",
" value_range=CFG.value_range,\n",
" WW=WW,\n",
" )\n",
" )\n",
" return i # 返回任务ID以显示进度\n",
" except Exception as e:\n",
" print(f\"Generating {i} failed with an error: {e}\")\n",
" return None\n",
"\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" os.makedirs(CFG.path_dir, exist_ok=True)\n",
" with open(r\"C:\\Users\\night\\Documents\\Codes\\H-LLM\\weights\\q_proj.pkl\", \"rb\") as f:\n",
" matrixs = pickle.load(f)\n",
" matrixs = np.transpose(matrixs, (1, 0, 2))\n",
" VN, L, H = matrixs.shape\n",
" with ProcessPoolExecutor(max_workers=8) as executor:\n",
" futures = [\n",
" executor.submit(process_task, i, matrixs[i], H, L) for i in range(VN)\n",
" ]\n",
" for future in tqdm(as_completed(futures), total=VN):\n",
" try:\n",
" # print(1)\n",
" result = future.result()\n",
" except Exception as e:\n",
" print(f\"Generating {result} failed with an error: {e}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from pyrilog import (\n",
" VerilogGenerator,\n",
" ModuleBlock,\n",
" add_parameter,\n",
" add_input,\n",
" add_output,\n",
" add_assign,\n",
" add_wire,\n",
" add_instance,\n",
")\n",
"from concurrent.futures import ProcessPoolExecutor, as_completed\n",
"import os\n",
"from tqdm import tqdm\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class CFG:\n",
" path_dir = \"Sub_wrapper\"\n",
" weights_dir = \"../001-H-LLM/weights\"\n",
" num_workers = 16\n",
" value_range = [-6, -4, -3, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 3, 4, 6]\n",
"\n",
"\n",
"def calculate_WW(matrix: np.array, value_range):\n",
" WW = [0] * len(value_range)\n",
" for i in range(len(value_range)):\n",
" WW[i] = max(\n",
" [len([x for x in row if abs(x - value_range[i]) <= 0.01]) for row in matrix]\n",
" )\n",
" return WW\n",
"\n",
"\n",
"def find_index(arr, target, epsilon=1e-3):\n",
" arr = np.array(arr) # 转换为numpy数组\n",
" diff = np.abs(arr - target) # 计算差值数组\n",
" min_diff = np.min(diff) # 找到最小的差值\n",
" if min_diff < epsilon: # 如果最小差值在允许的误差范围内\n",
" return np.where(diff == min_diff)[0][0] # 返回第一个匹配的索引\n",
" raise ValueError(\"No match found\") # 如果没有找到匹配项,则引发异常"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def generate_module(\n",
" matrix,\n",
" module_name_suffix=\"\",\n",
" H=16,\n",
" L=5,\n",
" value_range=[-1, 1],\n",
" WW=[8, 8],\n",
"):\n",
" with ModuleBlock(f\"{CFG.path_dir}_{module_name_suffix}\") as module:\n",
" # 参数\n",
" add_parameter(\"H\", H)\n",
" add_parameter(\"L\", L)\n",
" for i in range(len(value_range)):\n",
" add_parameter(f\"WW_{i}\", WW[i])\n",
" # 输入输出\n",
" add_input(\"clk\")\n",
" add_input(\"tre_rstn\")\n",
" add_input(\"valid\")\n",
" add_input(\"LM_sel\", \"L\")\n",
" add_input(\"SW_in\",\"H\")\n",
" for i in range(len(value_range)):\n",
" add_output(\n",
" name=f\"WT_{i}_out_S\",\n",
" )\n",
" add_output(\n",
" name=f\"WT_{i}_out_C\",\n",
" )\n",
" # 内部连线\n",
" for i in range(len(value_range)):\n",
" add_wire(\n",
" name=f\"HN_out_{i}\",\n",
" width=f\"WW_{i}\",\n",
" height=\"L\"\n",
" )\n",
" add_wire(\n",
" name=f\"LM_out_{i}\",\n",
" width=f\"WW_{i}\",\n",
" )\n",
" \n",
" # 实例化HN\n",
" hn_params = {\n",
" \"H\": H,\n",
" \"L\": L,\n",
" }\n",
" for i in range(len(value_range)):\n",
" hn_params[f\"WW_{i}\"] = f\"WW_{i}\"\n",
" hn_ports = {\n",
" \"HN_in\": \"SW_in\",\n",
" }\n",
" for i in range(len(value_range)):\n",
" hn_ports[f\"HN_out_{i}\"] = f\"HN_out_{i}\"\n",
" add_instance(\"HN\"+module_name_suffix, \"hn\", hn_params, hn_ports)\n",
" \n",
" # 实例化LM\n",
" lm_params = {\n",
" \"L\": L,\n",
" }\n",
" for i in range(len(value_range)):\n",
" lm_params[f\"WW_{i}\"] = f\"WW_{i}\"\n",
" lm_ports = {\n",
" \"LM_sel\": \"LM_sel\",\n",
" }\n",
" for i in range(len(value_range)):\n",
" lm_ports[f\"LM_in_{i}\"] = f\"HN_out_{i}\"\n",
" lm_ports[f\"LM_out_{i}\"] = f\"LM_out_{i}\"\n",
" add_instance(\"Layer_mux\"+module_name_suffix, \"layer_mux\", lm_params, lm_ports)\n",
"\n",
" # 实例化WT\n",
" wt_params = {}\n",
" for i in range(len(value_range)):\n",
" wt_params[f\"WW_{i}\"] = f\"WW_{i}\"\n",
" wt_ports = {\"clk\":\"clk\",\"tre_rstn\":\"tre_rstn\",\"valid\":\"valid\"}\n",
" for i in range(len(value_range)):\n",
" wt_ports[f\"WT_{i}_in\"] = f\"LM_out_{i}\"\n",
" wt_ports[f\"WT_{i}_out_S\"] = f\"WT_{i}_out_S\"\n",
" wt_ports[f\"WT_{i}_out_C\"] = f\"WT_{i}_out_C\"\n",
" add_instance(\"WT_group\"+module_name_suffix, \"wt_group\", wt_params, wt_ports)\n",
" return module"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def process_task(i, weights_file_name, matrix, H, L):\n",
" try:\n",
" WW = calculate_WW(matrix, CFG.value_range)\n",
" file_dir = os.path.join(CFG.path_dir, weights_file_name)\n",
" os.makedirs(file_dir, exist_ok=True)\n",
" file_name = os.path.join(\n",
" file_dir, f\"{CFG.path_dir}_tp_{weights_file_name}_vc_{i}.sv\"\n",
" )\n",
" with open(file_name, \"w\") as f:\n",
" f.write(\n",
" generate_module(\n",
" matrix,\n",
" module_name_suffix=f\"_tp_{weights_file_name}_vc_{i}\",\n",
" H=H,\n",
" L=L,\n",
" value_range=CFG.value_range,\n",
" WW=WW,\n",
" ).generate()\n",
" )\n",
" return i # 返回任务ID以显示进度\n",
" except Exception as e:\n",
" print(f\"Generating {i} failed with an error: {e}\")\n",
" return None\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" os.makedirs(CFG.path_dir, exist_ok=True)\n",
" for weights_file in os.listdir(CFG.weights_dir):\n",
" if weights_file != \"k.pkl\":\n",
" continue\n",
" weights_path = os.path.join(CFG.weights_dir, weights_file)\n",
" weights_file_name = os.path.splitext(weights_file)[0]\n",
" print(f\"Processing {weights_file_name}\")\n",
" with open(weights_path, \"rb\") as f:\n",
" print(f\"Loading {weights_file_name}\")\n",
" matrixs = pickle.load(f)\n",
" matrixs = np.transpose(matrixs, (1, 0, 2))\n",
" VN, L, H = matrixs.shape\n",
" for i in tqdm(range(VN)):\n",
" process_task(i, weights_file_name, matrixs[i], H, L)\n",
" # with ProcessPoolExecutor(max_workers=CFG.num_workers) as executor:\n",
" # futures = [\n",
" # executor.submit(process_task, i, weights_file_name, matrixs[i], H, L)\n",
" # for i in range(VN)\n",
" # ]\n",
" # for future in tqdm(as_completed(futures), total=VN):\n",
" # try:\n",
" # result = future.result()\n",
" # except Exception as e:\n",
" # print(f\"Generating {result} failed with an error: {e}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
import numpy as np
import numpy as np
import Pyrilog.Pyrilog as pl
import os
class CFG:
path_dir = "TFSM"
H = 16
L = 5
VN = 5
WP = 4
AP = 8
SCW = 11
SCWB = 4
TTW = 16
value_range = [x for x in range(-8, 8) if x != 0]
value_dict = {x: f"{'pos' if x >0 else 'neg'}_{abs(x)}" for x in value_range}
WW = [8] * len(value_range)
def generate_verilog_code(
path_dir,
file_id=0,
H=16,
L=5,
VN=5,
WP=4,
AP=8,
SCW=11,
SCWB=4,
TTW=16,
value_range=[-1, 1],
value_dict={-1: "neg_1", 1: "pos_1"},
WW={-1: 8, 1: 8},
):
file_name = os.path.join(path_dir, f"TFSM_{file_id}.sv")
generator = pl.VerilogGenerator()
module = pl.ModuleBlock(f"TFSM_{file_id}")
# 增加参数
module.add_parameter("H", str(H))
module.add_parameter("L", str(L))
module.add_parameter("VN", str(VN))
module.add_parameter("WP", str(WP))
module.add_parameter("AP", str(AP))
module.add_parameter("SCW", str(SCW))
module.add_parameter("SCWB", str(SCWB))
module.add_parameter("TTW", str(TTW))
for i in value_range:
module.add_parameter(f"WW_{value_dict[i]}", str(WW[i]))
# 增加输入输出
module.add_input("clk")
module.add_input("tree_rstn")
module.add_input("valid")
module.add_input("fsm_rstn")
module.add_input("LM_sel", width="L")
module.add_input("Top_in", width="AP")
module.add_output("WT_result_acc", pl.VAR_TYPE.REG, "TTW", "VN")
module.add_output("result_valid", pl.VAR_TYPE.REG)
module.add_reg("TM_sel", "AP")
module.add_wire("TM_out", "H")
# 实例化 Top_mux
Top_mux_params = {"H": "H", "AP": "AP"}
Top_mux_ports = {"TM_sel": "TM_sel", "TM_in": "TM_in", "TM_out": "TM_out"}
module.add_instance("Top_mux", "top_mux", Top_mux_params, Top_mux_ports)
for i in value_range:
module.add_wire(f"WT_{value_dict[i]}_out_S", height="VN")
module.add_wire(f"WT_{value_dict[i]}_out_C", height="VN")
module.add_reg("tree_rstn")
module.add_reg("mac_rstn")
for i in value_range:
module.add_reg(f"final_S_{value_dict[i]}", "SCW", "VN")
module.add_reg(f"final_C_{value_dict[i]}", "SCW", "VN")
module.add_reg("MAC_in_1", "SCW+2", "VN")
module.add_reg("MAC_in_2", "WP", "VN")
module.add_wire("MAC_out", "TTW+1", "VN")
for i in value_range:
module.add_reg(f"WT_result_{value_dict[i]}", "SCW+2", "VN")
module.add_reg("idx", "SCWB+1")
module.add_reg("state", "3")
module.add_genvar("j")
module.add_integer("i")
with pl.GenerateBlock(module) as generate_block:
with pl.ForBlock(
generate_block, "j=0", "j<VN", "j=j+1", "inst_SW+loop"
) as for_block:
SW_params = {"H": "H", "L": "L"}
for i in value_range:
SW_params[f"WW_{value_dict[i]}"] = f"WW_{value_dict[i]}"
SW_ports = {
"clk": "clk",
"tree_rstn": "tree_rstn",
"valid": "valid",
"LM_sel": "LM_sel",
"SW_in": "TM_out",
}
for i in value_range:
SW_ports[f"WT_out_{value_dict[i]}_S"] = f"WT_{value_dict[i]}_out_S[j]"
SW_ports[f"WT_out_{value_dict[i]}_C"] = f"WT_{value_dict[i]}_out_C[j]"
for_block.add_instance(f"SW_{file_id}", "sub_wrapper", SW_params, SW_ports)
MAC_params = {"W_1": "SCW", "W_2": "WP", "W_0": "TTW"}
MAC_ports = {
"clk": "clk",
"tree_rstn": "tree_rstn",
"MAC_in_1": "MAC_in_1[j]",
"MAC_in_2": "MAC_in_2[j]",
"MAC_out": "MAC_out[j]",
}
for_block.add_instance("MAC", "mac", MAC_params, MAC_ports)
with pl.AlwaysBlock(module, "posedge clk or negedge fsm_rstn") as always_block:
with pl.IfBlock(always_block, "!fsm_rstn") as if_block:
if_block = pl.IfBlock("!fsm_rstn")
if_block.add_body("state <= 0;")
if_block.add_body("idx <= 0;")
if_block.add_body("tree_rstn <= 0;")
if_block.add_body("mac_rstn <= 0;")
if_block.add_body("result_valid <= 0;")
if_block.add_body("TM_sel <= 8'b00000000;")
with pl.ForBlock(if_block, "i=0", "i<VN", "i=i+1") as for_block:
for_block.add_body("MAC_in_1[i] <= 0;")
for_block.add_body("MAC_in_2[i] <= 0;")
with pl.ElseBlock(always_block) as else_block:
with pl.IfBlock(else_block, "state == 0") as if_block:
if_block.add_body("idx <= 0;")
if_block.add_body("tree_rstn <= 0;")
if_block.add_body("result_valid <= 0;")
with pl.ForBlock(if_block, "i=0", "i<VN", "i=i+1") as for_block:
for i in value_range:
for_block.add_body(f"final_S_{value_dict[i]}[i] <= 0;")
for_block.add_body(f"final_C_{value_dict[i]}[i] <= 0;")
for_block.add_body(f"WT_result_{value_dict[i]}[i] <= 0;")
for_block.add_body(f"WT_result_acc[i] <= 0;")
# 写不动了,基本就是一行一行翻译的verilog代码
with pl.IfBlock(if_block, "valid == 1") as if_if_block:
if_if_block.add_body("state <= 1;")
with pl.IfBlock(else_block, "state == 1") as if_block:
with pl.IfBlock(if_block, "valid == 1") as if_if_block:
if_if_block.add_body("tree_rstn <= 1;")
if_if_block.add_body("state <= 2;")
generator.add_module(module)
generator.generate(file_name)
if __name__ == "__main__":
os.makedirs(CFG.path_dir, exist_ok=True)
record_path = os.path.join(CFG.path_dir, "records.txt")
record_f = open(record_path, "w")
record_f.write(f"H={CFG.H}, L={CFG.L}, value_range={CFG.value_range}\n\n")
for i in range(CFG.VN):
generate_verilog_code(
CFG.path_dir,
file_id=i,
H=CFG.H,
L=CFG.L,
value_range=CFG.value_range,
value_dict=CFG.value_dict,
WW=CFG.WW,
)
import numpy as np
import numpy as np
import pickle
import matplotlib.pyplot as plt
length = 3840
file_path = f"weights-fp32-{length}.pkl"
# 载入权重矩阵
with open(file_path, "rb") as f:
weights = pickle.load(f)
# 检验weights的维度
print(f"weight matrix shape: {weights.shape}")
# # new_shape = (2304, 288) # 576组,每组1152个元素
# # weights = weights.reshape(new_shape)
# 确定缩放因子以使用int4范围(-8到7)
max_value = np.max(weights)
min_value = np.min(weights)
# 归一化权重到int4范围
normalized_weights = (weights - min_value) / (max_value - min_value) * 15 - 8
normalized_weights = np.round(normalized_weights)
# 限制值确保其在int4范围内
quantized_weights = np.clip(normalized_weights, -8, 7).T
# 计算每组中各int4值的频数
int4_values = np.arange(-8, 8)
frequency_counts = np.zeros((length, len(int4_values)))
for i in range(length):
frequency_counts[i, :] = np.histogram(
quantized_weights[i], bins=np.arange(-8.5, 8.5)
)[0]
# 计算每个int4取值在所有组中的标准差和极差
std_devs_per_value = np.std(frequency_counts, axis=0)
ranges_per_value = np.ptp(frequency_counts, axis=0)
# 打印结果
print("Standard deviations per int4 value:", std_devs_per_value)
print("Ranges per int4 value:", ranges_per_value)
# 计算每个int4取值在所有组中的最大频次
max_frequencies_per_value = np.max(frequency_counts, axis=0)
# 计算所有取值的最大频次的总和
total_max_frequency_sum = np.sum(max_frequencies_per_value)
# 打印每个取值的最大频次和总和
print("Maximum frequency per int4 value:", max_frequencies_per_value)
print("Sum of maximum frequencies:", total_max_frequency_sum)
# 绘制标准差和极差的图
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.bar(int4_values, std_devs_per_value, color="blue")
plt.title("Standard Deviation of Frequency per int4 Value")
plt.xlabel("int4 Value")
plt.ylabel("Standard Deviation")
plt.subplot(1, 2, 2)
plt.bar(int4_values, ranges_per_value, color="red")
plt.title("Range of Frequency per int4 Value")
plt.xlabel("int4 Value")
plt.ylabel("Range")
plt.tight_layout()
plt.show()
{
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'utils_quant'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtqdm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tqdm\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mutils_quant\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpickle\u001b[39;00m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'utils_quant'"
]
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import safetensors\n",
"import torch\n",
"from tqdm import tqdm\n",
"import utils_quant\n",
"import pickle\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tensors_1536=[]\n",
"tensors_3840=[]\n",
"file_path=\"/lustre/S/huangdi/open_for_out/models/aimo-progress-prize-trained-models/Code-Math-QA-Proof-quant-per-head-fp4-0913/model.safetensors\"\n",
"with safetensors.safe_open(file_path,framework=\"pt\") as f:\n",
" for i,key in enumerate(tqdm(f.keys())):\n",
" # print(key,f.get_tensor(key).shape)\n",
" if i>10:\n",
" break\n",
" tensor=f.get_tensor(key)\n",
" if tensor.ndim==2:\n",
" if len(tensor[0])==1536:\n",
" tensors_1536.extend(tensor.float().tolist())\n",
" else:\n",
" tensors_3840.extend(tensor.float().tolist())\n",
" else:\n",
" if len(tensor)==1536:\n",
" tensors_1536.append(tensor.float().tolist())\n",
" else:\n",
" tensors_3840.append(tensor.float().tolist())\n",
"tensors_1536=np.array(tensors_1536)\n",
"tensors_3840=np.array(tensors_3840)\n",
"# tensors=np.array(tensors,dtype=np.float32)\n",
"# display(tensors_fp32[:5])\n",
"with open(\"weights-fp32-1536-small.pkl\",\"wb\") as f:\n",
" pickle.dump(tensors_1536, f)\n",
"with open(\"weights-fp32-3840-small.pkl\",\"wb\") as f:\n",
" pickle.dump(tensors_3840, f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"from prettytable import PrettyTable\n",
"from config import CFG\n",
"import os\n",
"from multiprocessing import Pool"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def calculate_WW(matrix: np.array, value_range):\n",
" WW = [0] * len(value_range)\n",
" for i in range(len(value_range)):\n",
" WW[i] = max(\n",
" [len([x for x in row if abs(x - value_range[i]) <= 0.01]) for row in matrix]\n",
" )\n",
" return WW\n",
"\n",
"def find_index(arr, target, epsilon=1e-3):\n",
" arr = np.array(arr) # 转换为numpy数组\n",
" diff = np.abs(arr - target) # 计算差值数组\n",
" min_diff = np.min(diff) # 找到最小的差值\n",
" if min_diff < epsilon: # 如果最小差值在允许的误差范围内\n",
" return np.where(diff == min_diff)[0][0] # 返回第一个匹配的索引\n",
" raise ValueError(\"No match found\") # 如果没有找到匹配项,则引发异常\n",
"\n",
"#返回第i位\n",
"def get_bit(num, i):\n",
" if i < 0:\n",
" return 0\n",
" return (num >> i) & 1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class HN:\n",
" def __init__(self, matrix,H,L):\n",
" self.matrix = matrix\n",
" self.H=H\n",
" self.L=L\n",
" \n",
" # def find_index(self, value):\n",
" # return np.searchsorted(CFG.value_range, value)\n",
" \n",
" def calculate(self,HN_in:np.ndarray):\n",
" HN_out = np.zeros((self.L, len(CFG.value_range)), dtype=int)\n",
" ans=np.zeros(self.L)\n",
" matrix_masked = self.matrix * HN_in\n",
" for i, layer in enumerate(matrix_masked):\n",
" for j, value in enumerate(layer):\n",
" if abs(value)<=1e-3:\n",
" continue\n",
" index=find_index(CFG.value_range,value)\n",
" HN_out[i][index]+=1\n",
" ans[i]+=value\n",
" # indices=list(map(self.find_index,layer))\n",
" # np.add.at(HN_out[i],indices,1)\n",
" return HN_out,ans\n",
" \n",
"class HN_GROUP:\n",
" def __init__(self,weights:np.ndarray):\n",
" self.VN,self.L,self.H=weights.shape\n",
" print(weights.shape)\n",
" self.HN_GROUP=[HN(matrix,self.H,self.L) for matrix in weights]\n",
" print(\"HN_GROUP init done\")\n",
" \n",
" \n",
" def calculate_single(args):\n",
" hn, hn_in = args\n",
" return hn.calculate(hn_in)\n",
" \n",
" def calculate(self,hn_in:np.ndarray,layer:int):\n",
" hn_out=[None]*self.VN\n",
" ans=[None]*self.VN\n",
" with Pool() as pool:\n",
" args = [(self.HN_GROUP[i], hn_in) for i in range(self.VN)]\n",
" results = list(tqdm(pool.imap(self.calculate_single, args), total=self.VN))\n",
" for i, result in enumerate(results):\n",
" hn_out[i], ans[i] = result\n",
" ans = [x[layer] for x in ans]\n",
" return ans\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(512, 52, 1536)\n",
"HN_GROUP init done\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/512 [00:00<?, ?it/s]"
]
}
],
"source": [
"if CFG.mode == \"test\":\n",
" weights_name = CFG.test_weights\n",
"elif CFG.mode == \"run\":\n",
" weights_name = CFG.run_weigths\n",
"else:\n",
" raise ValueError(\"Invalid mode\")\n",
"activation_name=os.path.join(CFG.activation_dir,\"activation.pkl\")\n",
"result_name=os.path.join(CFG.results_dir,\"result.txt\")\n",
"with open(activation_name, \"rb\") as f:\n",
" hn_in=pickle.load(f)\n",
"weights_path = os.path.join(CFG.weights_dir, weights_name)\n",
"with open(weights_path, \"rb\") as f:\n",
" matrixs = pickle.load(f)\n",
" matrixs = np.transpose(matrixs, (1, 0, 2))\n",
"\n",
"hn_group=HN_GROUP(matrixs)\n",
"\n",
"hn_in=get_bit(hn_in,7)\n",
"\n",
"hn_group.calculate(hn_in,0)\n",
"# for matrix in matrixs:\n",
"# table=PrettyTable()\n",
"# table.field_names=[str(f) for f in CFG.value_range]\n",
"# print(\"--------------------------------\")\n",
"# print(\"输入\")\n",
"# print(hn_in)\n",
"# print(\"权重\")\n",
"# print(matrix)\n",
"# print(\"结果-单独\")\n",
"# hn_out,ans=hn.calculate(hn_in)\n",
"# table.add_rows(hn_out)\n",
"# print(table)\n",
"# print(\"结果-总和\")\n",
"# print(ans)\n",
"# print()\n",
"# break\n",
" \n",
" \n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
import os
import sys
import pickle
def update_weights_shape(weights):
weights_file = os.path.join(CFG.weights_dir, weights)
with open(weights_file) as f:
weights = pickle.load(f)
shape = weights.shape
return shape
class CFG:
def __init__(self):
self.mode = "run" # "test" or "run"
self.run_weights_batch = [
"down.pkl",
"up.pkl",
"gate.pkl",
"k.pkl",
"o.pkl",
"v.pkl",
"q.pkl",
]
self.run_weights = "down.pkl" # 用于赋值
self.safetensors = "model.safetensors"
self.weights_dir = "001-H-LLM/qwen"
self.mapped_weights_dir = "001-H-LLM/qwen/mapped_weights"
self.verify_generate_activation_on_exist = False
self.num_workers = 64
self.value_range = [-6, -4, -3, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 3, 4, 6]
self.python_path = sys.executable
self.group_number = 32
self.output_dir = "outputs"
os.makedirs(self.weights_dir, exist_ok=True)
os.makedirs(self.output_dir, exist_ok=True)
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pickle\n",
"from tqdm import tqdm\n",
"import matplotlib.pyplot as plt\n",
"from pathlib import Path\n",
"from prettytable import PrettyTable\n",
"from copy import deepcopy"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def eda_weights(weights):\n",
" fp4_values = np.array([-6, -4, -3, -2, -1.5, -1, -0.5, 0.5, 1, 1.5, 2, 3, 4, 6])\n",
" weights=weights.reshape(weights.size//weights.shape[-1],weights.shape[-1])\n",
" frequency_counts = np.zeros((len(weights), len(fp4_values)))\n",
" bins = np.concatenate((fp4_values - 0.01, [fp4_values[-1] + 0.01]))\n",
" for i in tqdm(range(len(weights))):\n",
" frequency_counts[i]=np.histogram(weights[i],bins=bins)[0]\n",
" # 计算每个int4取值在所有组中的标准差和极差\n",
" # display(frequency_counts)\n",
" # std_devs_per_value = np.std(frequency_counts, axis=0)\n",
" # ranges_per_value = np.ptp(frequency_counts, axis=0)\n",
" # 打印结果\n",
"\n",
" # print(\"Standard deviations per int4 value:\", std_devs_per_value)\n",
" # print(\"Ranges per int4 value:\", ranges_per_value)\n",
" # 计算每个int4取值在所有组中的最大频次\n",
" max_frequencies_per_value = np.max(frequency_counts, axis=0)\n",
" # 计算所有取值的最大频次的总和\n",
" total_max_frequency_sum = np.sum(max_frequencies_per_value)\n",
" # 打印每个取值的最大频次和总和\n",
" # print(\"Maximum frequency per int4 value:\", max_frequencies_per_value)\n",
" print(\"Sum of maximum frequencies:\", total_max_frequency_sum)\n",
" \n",
" table=PrettyTable()\n",
" table.field_names=[\"type\"]+[str(f) for f in fp4_values]\n",
" # table.add_row([\"std\"]+[str(round(f,3)) for f in std_devs_per_value])\n",
" # table.add_row([\"range\"]+[str(round(f,3)) for f in ranges_per_value])\n",
" table.add_row([\"max\"]+[str(round(f,3)) for f in max_frequencies_per_value])\n",
" print(table)\n",
" # 绘制标准差和极差的图\n",
" # plt.figure(figsize=(12, 6))\n",
" # plt.subplot(1, 2, 1)\n",
" # plt.bar(fp4_values, std_devs_per_value, color=\"blue\")\n",
" # plt.title(\"Standard Deviation of Frequency per int4 Value\")\n",
" # plt.xlabel(\"int4 Value\")\n",
" # plt.ylabel(\"Standard Deviation\")\n",
"\n",
" # plt.subplot(1, 2, 2)\n",
" # plt.bar(fp4_values, ranges_per_value, color=\"red\")\n",
" # plt.title(\"Range of Frequency per int4 Value\")\n",
" # plt.xlabel(\"int4 Value\")\n",
" # plt.ylabel(\"Range\")\n",
"\n",
" # plt.tight_layout()\n",
" # plt.show()\n",
"\n",
"def eda_weights_52(weights):\n",
" fp4_values = np.array([-6, -4, -3, -2, -1.5, -1, -0.5,0.5, 1, 1.5, 2, 3, 4, 6])\n",
" weights=np.transpose(weights,(1,0,2))#512,52,1536\n",
" frequency_counts = np.zeros((len(weights), 52,len(fp4_values)))\n",
" bins = np.concatenate((fp4_values - 0.01, [fp4_values[-1] + 0.01]))\n",
" for i in tqdm(range(len(weights))):\n",
" for j in range(52):\n",
" frequency_counts[i,j]=np.histogram(weights[i,j],bins=bins)[0]\n",
" # 计算每个int4取值在所有组中的标准差和极差 512,14\n",
" frequency_counts=np.max(frequency_counts,axis=1)\n",
" # std_devs_per_value = np.std(frequency_counts, axis=0)\n",
" # ranges_per_value = np.ptp(frequency_counts, axis=0)\n",
" # 打印结果\n",
"\n",
" # print(\"Standard deviations per int4 value:\", std_devs_per_value)\n",
" # print(\"Ranges per int4 value:\", ranges_per_value)\n",
" # 计算每个int4取值在所有组中的最大频次\n",
" mean_frequencies_per_value = np.mean(frequency_counts, axis=0)\n",
" # 计算所有取值的最大频次的总和\n",
" total_mean_frequency_sum = np.sum(mean_frequencies_per_value)\n",
" # 打印每个取值的最大频次和总和\n",
" # print(\"Maximum frequency per int4 value:\", max_frequencies_per_value)\n",
" print(\"Sum of mean frequencies:\", total_mean_frequency_sum)\n",
" \n",
" table=PrettyTable()\n",
"\n",
" table.field_names=[\"type\"]+[str(f) for f in fp4_values]\n",
" # table.add_row([\"std\"]+[str(round(f,3)) for f in std_devs_per_value])\n",
" # table.add_row([\"range\"]+[str(round(f,3)) for f in ranges_per_value])\n",
" table.add_row([\"mean\"]+[str(round(f,3)) for f in mean_frequencies_per_value])\n",
" print(table)\n",
" # 绘制标准差和极差的图\n",
" # plt.figure(figsize=(12, 6))\n",
" # plt.subplot(1, 2, 1)\n",
" # plt.bar(fp4_values, std_devs_per_value, color=\"blue\")\n",
" # plt.title(\"Standard Deviation of Frequency per int4 Value\")\n",
" # plt.xlabel(\"int4 Value\")\n",
" # plt.ylabel(\"Standard Deviation\")\n",
"\n",
" # plt.subplot(1, 2, 2)\n",
" # plt.bar(fp4_values, ranges_per_value, color=\"red\")\n",
" # plt.title(\"Range of Frequency per int4 Value\")\n",
" # plt.xlabel(\"int4 Value\")\n",
" # plt.ylabel(\"Range\")\n",
"\n",
" # plt.tight_layout()\n",
" # plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"path = Path(\"weights\")\n",
"for file_path in path.rglob('*'):\n",
" if file_path.is_file() and \"proj.pkl\"in str(file_path):\n",
" with open(file_path, \"rb\") as f :\n",
" print(f\"Reading file {file_path}\")\n",
" weights = pickle.load(f)\n",
" print(f\"weight matrix shape: {weights.shape}\")\n",
" eda_weights(deepcopy(weights))\n",
" eda_weights_52(deepcopy(weights))\n",
" print()\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import safetensors\n",
"import torch\n",
"from tqdm import tqdm\n",
"from utils_quant import quant_and_dequant\n",
"import pickle\n",
"from hllm.config import CFG\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 837/837 [04:13<00:00, 3.30it/s]\n",
"100%|██████████| 11/11 [02:13<00:00, 12.17s/it]\n"
]
}
],
"source": [
"weights = {\n",
" \"input_layernorm\": [],\n",
" \"down_proj\": [],\n",
" \"gate_proj\": [],\n",
" \"up_proj\": [],\n",
" \"post_attention_layernorm\": [],\n",
" \"k_proj\": [],\n",
" \"o_proj\": [],\n",
" \"q_proj\": [],\n",
" \"v_proj\": [],\n",
" \"embed_tokens\": [],\n",
" \"model.norm\": [],\n",
"}\n",
"\n",
"\n",
"# 列表中是否有字符串的子串\n",
"def is_substring_in_list(substring, string_list):\n",
" return any(s in substring for s in string_list)\n",
"\n",
"\n",
"ignored_weights = [\n",
" \"embed_tokens.weight\",\n",
" \"post_attention_layernorm.weight\",\n",
" \"activation_quant\",\n",
" \"input_layernorm.weight\",\n",
"]\n",
"\n",
"file_path = \"../001-H-LLM/weights1026/model.safetensors\"\n",
"with safetensors.safe_open(file_path, framework=\"pt\") as f:\n",
" for i, key in enumerate(tqdm(f.keys())):\n",
" if is_substring_in_list(key, ignored_weights):\n",
" continue\n",
" tensor = f.get_tensor(key)\n",
" # print(key,tensor.shape)\n",
" # if i>10:\n",
" # break\n",
" tensor = quant_and_dequant(tensor, 4).tolist()\n",
" for k in weights.keys():\n",
" if k in key:\n",
" weights[k].append(tensor)\n",
"\n",
"for key in tqdm(weights.keys()):\n",
" weights[key] = np.array(weights[key])\n",
" file_path = f\"../001-H-LLM/weights1026/{key}.pkl\"\n",
" with open(file_path, \"wb\") as f:\n",
" pickle.dump(weights[key], f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
import os
import numpy as np
import safetensors
import pickle
from tqdm import tqdm
from hllm.eda.utils_quant import quant_and_dequant
from hllm.config import CFG
# %%
name_dict = {
"down_proj.weight": "down",
"gate_proj.weight": "gate",
"up_proj.weight": "up",
"k_proj.weight": "k",
"o_proj.weight": "o",
"q_proj.weight": "q",
"v_proj.weight": "v",
"model.norm.weight": "norm",
}
weights = {v: [] for v in name_dict.values()}
# 列表中是否有字符串的子串
def is_substring_in_list(substring, string_list):
return any(s in substring for s in string_list)
def run(config: CFG):
file_path = os.path.join(config.weights_dir, config.safetensors)
with safetensors.safe_open(file_path, framework="pt") as f:
for i, key in enumerate(tqdm(f.keys())):
if not is_substring_in_list(key, name_dict.keys()):
continue
tensor = f.get_tensor(key)
tensor = quant_and_dequant(tensor, 4)
for k in name_dict.keys():
if k in key:
weights[name_dict[k]].append(tensor.tolist())
for k in weights.keys():
weights[k] = np.array(weights[k])
file_path = os.path.join(config.weights_dir, f"{k}.pkl")
with open(file_path, "wb") as f:
pickle.dump(weights[k], f)
import numpy as np
import os
from hllm.config import CFG
import pickle
from tqdm import tqdm
def mapping_weights(weights, value_range):
new_weights = np.full_like(weights, -1, dtype=int)
for i in range(len(value_range)):
new_weights[abs(weights - value_range[i]) <= 0.01] = i
return new_weights
def run(config: CFG):
print("Start mapping weights")
path_dir = os.path.join(config.mapped_weights_dir)
os.makedirs(path_dir, exist_ok=True)
value_range = config.value_range
for file in os.listdir(config.weights_dir):
if file in config.run_weights_batch:
with open(os.path.join(config.weights_dir, file), "rb") as f:
print(f"Loading {file}")
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
print(VN, L, H)
new_weights = mapping_weights(matrixs, value_range)
new_weights = np.transpose(new_weights, (1, 0, 2))
with open(os.path.join(path_dir, file), "wb") as f:
pickle.dump(new_weights, f)
print("Mapped weights at", path_dir)
import math
import torch
from torch import nn
def weight_quant(weight, num_bits=1):
dtype = weight.dtype
weight = weight.float()
Qn = -(2 ** (num_bits - 1))
Qp = 2 ** (num_bits - 1) - 1
s = Qp / weight.abs().mean().clamp(min=1e-5)
result = (weight * s).round().clamp(Qn, Qp) / s
return result.type(dtype)
def activation_quant(x, num_bits=8):
dtype = x.dtype
x = x.float()
Qn = -(2 ** (num_bits - 1))
Qp = 2 ** (num_bits - 1) - 1
s = Qp / x.abs().max(dim=-1, keepdim=True).values.clamp(min=1e-5)
result = (x * s).round().clamp(Qn, Qp) / s
return result.type(dtype)
def get_scale_f32(src_amax, dst_max):
S = (src_amax.float()) / dst_max
qscale = 1 / S
dqscale = S
return qscale, dqscale
def round_to_FP4(input):
dst_max = 6.0
emax = 2
emin = 0
p = 2
part = 2 - 2 ** (1 - p)
ab = torch.where(
torch.isinf(input) + torch.isnan(input), torch.ones_like(input) * dst_max, input
)
ab = torch.where(ab > dst_max, torch.ones_like(ab) * dst_max, ab)
ab = torch.where(ab < 2.0 ** (emin) * 2 ** (-p), torch.zeros_like(ab), ab)
E = torch.where(
ab < 2 ** (emin),
torch.ones_like(ab) * (emin),
torch.floor(torch.log2(ab.float())),
)
P = torch.round(ab * 2 ** (-E) * 2 ** (p - 1)) / 2 ** (p - 1)
data = 2**E * P
return data
def quant_and_dequant(data, num_bits):
sign = torch.sign(data)
abs_data = torch.abs(data).float()
amax, index = torch.max(
abs_data, -1, True
) # 这个示例是做的per-channel量化,即对于(M,K)的矩阵,有M个量化参数(M个amax)
qscale, dqscale = get_scale_f32(amax, 6.0)
quant_data = round_to_FP4(abs_data * qscale)
dequant_data = (quant_data * dqscale * sign).to(data.dtype)
return sign * quant_data
return dequant_data
class CLMLinear(nn.Linear):
def __init__(self, *kargs, weight_bits=1, input_bits=8, **kwargs):
super(CLMLinear, self).__init__(*kargs, **kwargs)
"""
RMSNorm is placed outside BitLinear
"""
self.weight_bits = weight_bits
self.input_bits = input_bits
def forward(self, input):
quant_input = (
input + (activation_quant(input, self.input_bits) - input).detach()
)
quant_weight = (
self.weight
+ (quant_and_dequant(self.weight, self.weight_bits) - self.weight).detach()
)
out = nn.functional.linear(quant_input, quant_weight)
if not self.bias is None:
out += self.bias.view(1, -1).expand_as(out)
return out
import os
from hllm.config import CFG
class TCL_dependency:
def __init__(self, config: CFG, name: str, file_name: str, weights_file_name: str,use_weights: bool = True):
self.config = config
self.name = name
self.file_name = file_name
self.weights_file_name = weights_file_name
self.use_weights = use_weights
def __str__(self):
if self.use_weights:
path = os.path.join(
self.config.output_dir,
self.name,
self.weights_file_name,
self.file_name,
)
else:
path = os.path.join(
self.config.output_dir,
self.name,
self.file_name,
)
path = os.path.abspath(path)
return f"{path}\n"
class TCL:
def __init__(self, config: CFG, weights_file_name: str):
self.config = config
self.dependencies = []
self.vlist = ""
self.weights_file_name = weights_file_name
def add_dependency(self, name: str, file_name: str,use_weights: bool = True):
self.dependencies.append(TCL_dependency(self.config, name, file_name, self.weights_file_name,use_weights))
def set_vlist(self, vlist: str):
self.vlist = vlist
def generate(self):
tcl = ""
for dependency in self.dependencies:
tcl += f"{dependency}\n"
unique_lines = sorted(set(tcl.strip().split("\n")))
result = "\n".join(unique_lines).strip()
result = f'set {self.vlist} "\n' + result + '\n"'
return result
mapped_weights
output
Optimized_HN
Optimized_HN_mux
WT_group
Optimized_mux
Mux_wrapper
Mux
build
optimize_HN.egg-info
Sub_wrapper
dist
import os
import json
import pickle
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool
from hllm.config import CFG
from hllm.optimized.turbo_optimize_hn import generate_color_graph, greedy_coloring
def process_weight(args):
index, weight, L, H, value_range, weights_file_name, path_dir = args
node, graph = generate_color_graph(L, H, value_range, weight)
os.makedirs(path_dir, exist_ok=True)
node_file = os.path.join(path_dir, f"info_tp_{weights_file_name}_vc_{index}.json")
with open(node_file, "w") as f:
json.dump({"node": node}, f)
max_color = []
for i in range(len(value_range)):
colors = greedy_coloring(graph[i], node[i])
color_file = os.path.join(
path_dir, f"info_tp_{weights_file_name}_vc_{index}_value_{i}.json"
)
max_color.append(max(colors) + 1)
with open(color_file, "w") as f:
json.dump({"color": colors}, f)
max_color_file = os.path.join(
path_dir, f"info_tp_{weights_file_name}_vc_{index}_ww.json"
)
with open(max_color_file, "w") as f:
json.dump({"ww": max_color}, f)
def run(name: str, config: CFG):
weights_file = os.path.join(config.mapped_weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file_name}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
# print(VN, L, H)
path_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Generating color graph")
args = [
(i, weight, L, H, config.value_range, weights_file_name, path_dir)
for i, weight in enumerate(matrixs)
]
with Pool(config.num_workers) as pool:
list(tqdm(pool.imap(process_weight, args), total=VN))
print("Generating color graph at", path_dir)
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
GenerateBlock,
ForBlock,
add_parameter,
add_input,
add_output,
add_genvar,
add_assign,
add_wire,
add_body,
add_instance,
add_newline,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.utils import calculate_WW
# %%
def generate_module(
module_name,
L,
value_range,
WW,
):
L_width = int(np.ceil(np.log2(L)))
with ModuleBlock(module_name) as module:
# 参数
add_parameter("L", L)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", WW[i])
# 输入输出
add_input("LM_sel", L_width)
for i in range(len(value_range)):
add_input(f"LM_in_{i}", f"WW_{i}", "L")
add_output(f"LM_out_{i}", f"WW_{i}")
# 内部连线
for i in range(len(value_range)):
add_wire(name=f"LM_in_{i}_masked", width=f"WW_{i}", height="L")
add_wire(name=f"LM_in_{i}_masked_T", width="L", height=f"WW_{i}")
add_newline()
# LM_select_loop
add_genvar("i")
with GenerateBlock():
with ForBlock("i=0", "i<L", "i=i+1", "LM_select_loop"):
for j in range(len(value_range)):
add_body(
f"assign LM_in_{j}_masked[i]=LM_in_{j}[i] & {{WW_{j}{{LM_sel[i]}}}};",
)
add_newline()
# LM_transpose_loop_out
add_genvar("j")
add_genvar("k")
with GenerateBlock():
with ForBlock("k=0", "k<L", "k=k+1", "LM_transpose_loop_out"):
for i in range(len(value_range)):
with ForBlock(
"j=0", f"j<WW_{i}", "j=j+1", f"LM_transpose_loop_in_{i}"
):
add_assign(
f"LM_in_{i}_masked_T",
["j", "k"],
f"LM_in_{i}_masked",
["k", "j"],
)
add_newline()
# LM_reduce_or_loop
add_genvar("m")
with GenerateBlock():
for i in range(len(value_range)):
with ForBlock("m=0", f"m<WW_{i}", "m=m+1", f"LM_reduce_or_loop_{i}"):
add_body(f"assign LM_out_{i}[m] = |(LM_in_{i}_masked_T[m]);")
return module
# %%
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG):
try:
WW = calculate_WW(matrix, config.value_range)
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.sv")
module_name = f"{name}_tp_{weights_file_name}_vc_{i}"
with open(file_name, "w") as f:
f.write(
generate_module(
module_name=module_name,
L=L,
value_range=config.value_range,
WW=WW,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import json
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.log import TCL
from hllm.utils import calculate_WW
# %%
def generate_module(
cur_GP=0,
module_name="",
H=16,
L=5,
VN=512,
value_range=[-1, 1],
weights_file_name=None,
config: CFG = None,
ww_list=None,
):
tcl = TCL(config, weights_file_name)
tcl.set_vlist(f"VLIST_tp_{weights_file_name}_gp_{cur_GP}")
L_width = int(np.ceil(np.log2(L)))
with ModuleBlock(module_name) as module:
GN = config.group_number
GP = int(VN / GN)
# 参数
add_parameter("H", H)
add_parameter("L", L)
add_parameter("VN", GN)
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
add_input("CST_LOW")
add_input("LM_sel", L_width)
add_input("SW_in", "H")
for i in range(len(value_range)):
add_output(name=f"WT_{i}_out_S", height="VN")
add_output(name=f"WT_{i}_out_C", height="VN")
# 内部连线
for i in range(GN):
sw_ports = {
"clk": "clk",
"tree_rstn": "tree_rstn",
"valid": "valid",
"CST_LOW": "CST_LOW",
"SW_in": "SW_in",
"LM_sel": "LM_sel",
}
for j in range(len(value_range)):
sw_ports[f"WT_{j}_out_S"] = f"WT_{j}_out_S[{i}]"
sw_ports[f"WT_{j}_out_C"] = f"WT_{j}_out_C[{i}]"
add_instance(
f"Sub_wrapper_tp_{weights_file_name}_vc_{cur_GP*GN+i}",
f"Sub_wrapper_{cur_GP*GN+i}",
None,
sw_ports,
)
tcl.add_dependency(
f"Sub_wrapper",
f"Sub_wrapper_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"WT_group",
f"WT_group_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"Mid_wrapper",
f"Mid_wrapper_tp_{weights_file_name}_gp_{cur_GP}.sv",
)
tcl.add_dependency(
f"Layer_mux",
f"Layer_mux_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"FSM",
f"FSM_tp_{weights_file_name}_gp_{cur_GP}.sv",
)
for j in range(len(value_range)):
tcl.add_dependency(
f"Mux_wrapper",
f"Mux_wrapper_tp_{weights_file_name}_vc_{cur_GP*GN+i}_value_{j}.sv",
)
tcl.add_dependency(
f"Mux",
f"Mux_tp_{weights_file_name}_vc_{cur_GP*GN+i}_value_{j}.sv",
)
for line in ww_list[cur_GP * GN + i]:
tcl.add_dependency(
f"SerialWallaceTree",
f"SerialWallaceTree{line}Input.v",
use_weights=False,
)
return module, tcl
# %%
def process_task(i, name, weights_file_name, ww_list, H, L, VN, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_gp_{i}.sv")
file_name_tcl = os.path.join(
file_dir, f"{name}_tp_{weights_file_name}_gp_{i}.tcl"
)
module_name = f"{name}_tp_{weights_file_name}_gp_{i}"
module, tcl = generate_module(
i,
module_name=module_name,
H=H,
L=L,
VN=VN,
value_range=config.value_range,
weights_file_name=weights_file_name,
config=config,
ww_list=ww_list,
)
with open(file_name, "w") as f:
f.write(module.generate())
with open(file_name_tcl, "w") as f:
f.write(tcl.generate())
return i # 返回任务ID以显示进度
except Exception as e:
print(
f"Generating {i} failed with an error at line {sys.exc_info()[2].tb_lineno}: {e}"
)
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
GP = int(VN / config.group_number)
ww_list = []
ww_files = [
os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{i}_ww.json",
)
for i in range(VN)
]
for ww_file in ww_files:
with open(ww_file, "r") as f:
ww = json.load(f)
ww_list.append(ww["ww"])
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, ww_list, H, L, VN, config
)
for i in range(GP)
]
for future in tqdm(as_completed(futures), total=GP):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import sys
import numpy as np
from pyrilog import (
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_body,
add_newline,
add_reg,
add_wire,
AlwaysBlock,
IfBlock,
ElseBlock,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
import json
# %%
def generate_module(
matrix,
module_name="mux",
H=16,
L=5,
value_range=[-1, 1],
WW=[8, 8],
CUR_VN=0,
CUR_VALUE_INDEX=0,
CUR_CNT=0,
node=None,
color=None,
weights_file_name="",
):
with ModuleBlock(f"{module_name}") as module:
add_input("in", H)
L_width = int(np.ceil(np.log2(L)))
add_input("sel", L_width)
add_output("out")
layer_to_in_map = {}
for i, hn_in_layers in enumerate(node):
if color[i] == -1 or color[i] != CUR_CNT:
continue
for j, hn_in_layer in enumerate(hn_in_layers):
layer_to_in_map[hn_in_layer] = i
add_reg("par_out", L)
with AlwaysBlock("*"):
for i in range(L):
with IfBlock(f"sel == {L_width}'b{i:0{L_width}b}"):
if i in layer_to_in_map:
add_body(f"par_out[{i}]=in[{layer_to_in_map[i]}];")
else:
add_body(f"par_out[{i}]=0;")
with ElseBlock():
add_body(f"par_out[{i}]=0;")
add_assign("out", [], " | ".join([f"par_out[{i}]" for i in range(L)]), [])
return module
# %%
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
node_file = os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{i}.json",
)
node = json.load(open(node_file))["node"]
for j in range(len(config.value_range)):
color_file = os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{i}_value_{j}.json",
)
color = json.load(open(color_file))["color"]
max_mux_port = max(color) + 1
text = ""
for k in range(max_mux_port):
text += generate_module(
matrix,
module_name=f"{name}_tp_{weights_file_name}_vc_{i}_value_{j}_color_{k}",
H=H,
L=L,
value_range=config.value_range,
node=node[j],
color=color,
CUR_VN=i,
CUR_VALUE_INDEX=j,
CUR_CNT=k,
weights_file_name=weights_file_name,
).generate()
text += "\n"
file_name = os.path.join(
file_dir,
f"{name}_tp_{weights_file_name}_vc_{i}_value_{j}.sv",
)
with open(file_name, "w") as f:
f.write(text)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.mapped_weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file_name}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print(f"Generated {name} at {file_dir}")
# %%
# %%
import sys
import numpy as np
from pyrilog import (
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_body,
add_newline,
add_instance,
add_reg,
add_wire,
AlwaysBlock,
IfBlock,
ElseBlock,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
import json
# %%
def generate_module(
module_name="Mux_wrapper",
H=16,
L=5,
value_range=[-1, 1],
CUR_VN=0,
CUR_VALUE_INDEX=0,
max_mux_port=0,
node=None,
color=None,
weights_file_name="",
config: CFG = None,
name="",
):
with ModuleBlock(f"{module_name}") as module:
add_input("in", H)
L_width = int(np.ceil(np.log2(L)))
add_input("sel", L_width)
add_output("out", max_mux_port)
for i in range(max_mux_port):
add_instance(
module_name=f"Mux_tp_{weights_file_name}_vc_{CUR_VN}_value_{CUR_VALUE_INDEX}_color_{i}",
instance_name=f"Mux_{i}",
parameters={},
ports={"in": "in", "sel": "sel", "out": f"out[{i}]"},
)
return module
# %%
def process_task(i, name, weights_file_name, H, L, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
node_file = os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{i}.json",
)
node = json.load(open(node_file))["node"]
for j in range(len(config.value_range)):
color_file = os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{i}_value_{j}.json",
)
color = json.load(open(color_file))["color"]
max_mux_port = max(color) + 1
text = generate_module(
module_name=f"{name}_tp_{weights_file_name}_vc_{i}_value_{j}",
H=H,
L=L,
value_range=config.value_range,
node=node[j],
color=color,
CUR_VN=i,
CUR_VALUE_INDEX=j,
max_mux_port=max_mux_port,
weights_file_name=weights_file_name,
config=config,
name=name,
).generate()
file_name = os.path.join(
file_dir,
f"{name}_tp_{weights_file_name}_vc_{i}_value_{j}.sv",
)
with open(file_name, "w") as f:
f.write(text)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
os.makedirs(config.output_dir, exist_ok=True)
weights_file = os.path.join(config.mapped_weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(process_task, i, name, weights_file_name, H, L, config)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print(f"Generated {name} at {file_dir}")
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
import json
# %%
def generate_module(
matrix,
module_name="",
weights_file_name="",
H=16,
L=5,
VN_index=1,
value_range=[-1, 1],
WW=[8, 8],
name="",
config: CFG = None,
):
L_width = int(np.ceil(np.log2(L)))
module_name_suffix = f"_tp_{weights_file_name}_vc_{VN_index}"
with ModuleBlock(f"{module_name}") as module:
# 参数
add_parameter("H", H)
add_parameter("L", L)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", WW[i])
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
add_input("CST_LOW")
add_input("LM_sel", L_width)
add_input("SW_in", "H")
for i in range(len(value_range)):
add_output(
name=f"WT_{i}_out_S",
)
add_output(
name=f"WT_{i}_out_C",
)
for i in range(len(value_range)):
add_wire(
name=f"LM_out_{i}",
width=f"WW_{i}",
)
# 实例化mux_wrapper
for j in range(len(value_range)):
mw_params = {}
mw_ports = {"in": "SW_in", "sel": "LM_sel", "out": f"LM_out_{j}"}
add_instance(
f"Mux_wrapper_tp_{weights_file_name}_vc_{VN_index}_value_{j}",
f"Mux_wrapper_{j}",
mw_params,
mw_ports,
)
# 实例化WT
wt_params = {}
for i in range(len(value_range)):
wt_params[f"WW_{i}"] = f"WW_{i}"
wt_ports = {"clk": "clk", "tree_rstn": "tree_rstn", "valid": "valid"}
for i in range(len(value_range)):
wt_ports[f"WT_{i}_in"] = f"LM_out_{i}"
wt_ports[f"WT_{i}_out_S"] = f"WT_{i}_out_S"
wt_ports[f"WT_{i}_out_C"] = f"WT_{i}_out_C"
add_instance("WT_group" + module_name_suffix, "WT_group", wt_params, wt_ports)
return module
# %%
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG):
try:
WW = [0] * len(config.value_range)
for j in range(len(config.value_range)):
color_file = os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{i}_value_{j}.json",
)
color = json.load(open(color_file))["color"]
WW[j] = max(color) + 1
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.sv")
with open(file_name, "w") as f:
module_name = f"{name}_tp_{weights_file_name}_vc_{i}"
f.write(
generate_module(
matrix,
module_name=module_name,
H=H,
weights_file_name=weights_file_name,
L=L,
VN_index=i,
value_range=config.value_range,
WW=WW,
name=name,
config=config,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name, config: CFG):
weights_file = os.path.join(config.mapped_weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file_name}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print(f"Generated {name} at {file_dir}")
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.utils import calculate_WW
# %%
def generate_module(
module_name,
H=16,
L=5,
VN=512,
value_range=[-1, 1],
weights_file_name=None,
config: CFG = None,
):
L_width = int(np.ceil(np.log2(L)))
with ModuleBlock(module_name) as module:
GN = config.group_number
GP = int(VN / GN)
# 参数
add_parameter("H", H)
add_parameter("L", L)
add_parameter("VN", VN)
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
add_input("CST_LOW")
add_input("LM_sel", L_width)
add_input("SW_in", "H")
for i in range(len(value_range)):
add_output(name=f"WT_{i}_out_S", height="VN")
add_output(name=f"WT_{i}_out_C", height="VN")
# 内部连线
for i in range(GP):
sw_ports = {
"clk": "clk",
"tree_rstn": "tree_rstn",
"valid": "valid",
"CST_LOW": "CST_LOW",
"SW_in": "SW_in",
"LM_sel": "LM_sel",
}
for j in range(len(value_range)):
sw_ports[f"WT_{j}_out_S"] = f"WT_{j}_out_S[{i*GN+GN-1}:{i*GN}]"
sw_ports[f"WT_{j}_out_C"] = f"WT_{j}_out_C[{i*GN+GN-1}:{i*GN}]"
add_instance(
f"Mid_wrapper_tp_{weights_file_name}_gp_{i}",
f"Mid_wrapper_{i}",
None,
sw_ports,
)
return module
# %%
def process_task(i, name, weights_file_name, H, L, VN, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}.sv")
module_name = f"{name}_tp_{weights_file_name}"
with open(file_name, "w") as f:
f.write(
generate_module(
module_name,
H=H,
L=L,
VN=VN,
value_range=config.value_range,
weights_file_name=weights_file_name,
config=config,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task,
i,
name,
weights_file_name,
H,
L,
VN,
config,
)
for i in range(1)
]
for future in tqdm(as_completed(futures), total=1):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import sys
import numpy as np
from pyrilog import (
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_body,
add_newline,
add_instance,
add_reg,
add_wire,
AlwaysBlock,
IfBlock,
ElseBlock,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
import json
# %%
def generate_module(
module_name="mux_wrapper",
H=16,
L=5,
value_range=[-1, 1],
CUR_VN=0,
weights_file_name="",
config: CFG = None,
name="",
):
with ModuleBlock(f"{module_name}") as module:
ww = []
for i in range(len(value_range)):
color_file = os.path.join(
config.output_dir,
"info",
weights_file_name,
f"info_tp_{weights_file_name}_vc_{CUR_VN}_value_{i}.json",
)
color = json.load(open(color_file))["color"]
max_mux_port = max(color) + 1
ww.append(max_mux_port)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", ww[i])
add_input("clk")
add_input("tree_rstn")
add_input("valid")
for i in range(len(value_range)):
add_input(f"WT_{i}_in", f"WW_{i}")
add_output(
f"WT_{i}_out_S",
)
add_output(
f"WT_{i}_out_C",
)
# 内部连线华莱士树
for i in range(len(value_range)):
wallace_name = f"SerialWallaceTree{ww[i]}Input"
wallace_port = {
"clk": "clk",
"rstn": "tree_rstn",
"valid": "valid",
"addends": f"WT_{i}_in",
"out_S": f"WT_{i}_out_S",
"out_Cout": f"WT_{i}_out_C",
}
add_instance(wallace_name, f"serial_wallace_tree_{i}", {}, wallace_port)
return module
# %%
def process_task(i, name, weights_file_name, H, L, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(
file_dir,
f"{name}_tp_{weights_file_name}_vc_{i}.sv",
)
with open(file_name, "w") as f:
text = generate_module(
module_name=f"{name}_tp_{weights_file_name}_vc_{i}",
H=H,
L=L,
value_range=config.value_range,
CUR_VN=i,
weights_file_name=weights_file_name,
config=config,
name=name,
).generate()
f.write(text)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
os.makedirs(config.output_dir, exist_ok=True)
weights_file = os.path.join(config.mapped_weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file_name}")
with open(weights_file, "rb") as f:
print(f"Loading {weights_file_name}")
matrixs = pickle.load(f)
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(process_task, i, name, weights_file_name, H, L, config)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print(f"Generated {name} at {file_dir}")
# %%
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <set>
#include <stdexcept>
#include <vector>
using namespace std;
namespace py = pybind11;
vector<int> greedyGraphColoring(const vector<vector<int>> &adjMatrix, const vector<vector<int>> &layer)
{
int n = adjMatrix.size();
vector<int> colors(n, -1); // 存储染色结果,初始-1表示未染色
// 检查邻接矩阵有效性
for (const auto &row : adjMatrix)
{
if (row.size() != n)
{
throw invalid_argument("邻接矩阵必须是方阵");
}
}
// 遍历所有节点
for (int node = 0; node < n; ++node)
{
// 如果该节点对应的layer为空,保持颜色为-1
if (layer[node].empty()) {
continue;
}
set<int> usedColors;
// 检查所有相邻节点的已用颜色
for (int neighbor = 0; neighbor < n; ++neighbor)
{
if (adjMatrix[node][neighbor] && colors[neighbor] != -1)
{
usedColors.insert(colors[neighbor]);
}
}
// 寻找最小可用颜色
int color = 0;
while (true)
{
if (usedColors.find(color) == usedColors.end())
{
colors[node] = color;
break;
}
color++;
}
}
return colors;
}
bool hasIntersection(const vector<int> &node1, const vector<int> &node2)
{
bool bucket[100] = {false}; // 初始化桶数组
// 将node1的元素放入桶中
for (int elem : node1)
{
bucket[elem] = true;
}
// 检查node2的元素是否在桶中存在
for (int elem : node2)
{
if (bucket[elem])
{
return true;
}
}
return false;
}
tuple<vector<vector<vector<int>>>, vector<vector<vector<int>>>>
generateColorGraph(int L, int W, const vector<double> &value_range,
py::array_t<int> &matrix)
{
auto buf = matrix.request();
int *ptr = static_cast<int *>(buf.ptr);
// 转置矩阵
vector<vector<int>> transposed(W, vector<int>(L));
for (int i = 0; i < L; i++)
{
for (int j = 0; j < W; j++)
{
transposed[j][i] = ptr[i * W + j];
}
}
// 初始化node和graph
vector<vector<vector<int>>> node(value_range.size(), vector<vector<int>>(W));
vector<vector<vector<int>>> graph(value_range.size(),
vector<vector<int>>(W, vector<int>(W, 0)));
// 构建node
for (int i = 0; i < W; i++)
{
for (int j = 0; j < L; j++)
{
int val = transposed[i][j];
if (val != -1)
{
node[val][i].push_back(j);
}
}
}
// 构建graph
for (size_t i = 0; i < value_range.size(); i++)
{
for (int j = 0; j < W; j++)
{
for (int k = 0; k < W; k++)
{
if (!node[i][j].empty() && !node[i][k].empty())
{
graph[i][j][k] = hasIntersection(node[i][j], node[i][k]) ? 1 : 0;
}
}
}
}
return make_tuple(node, graph);
}
PYBIND11_MODULE(turbo_optimize_hn, m)
{
m.doc() = "图着色贪心算法模块";
m.def("greedy_coloring", &greedyGraphColoring, "基于贪心算法的图着色实现",
py::arg("adj_matrix"), py::arg("layer"));
m.def("generate_color_graph", &generateColorGraph, "生成颜色图", py::arg("L"),
py::arg("W"), py::arg("value_range"), py::arg("matrix"));
}
\ No newline at end of file
# %%
import sys
import numpy as np
from pyrilog import ModuleBlock, add_parameter, add_input, add_output, add_assign
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.utils import calculate_WW, find_index
# %%
def generate_module(
matrix,
module_name="HN",
H=16,
L=5,
value_range=[-1, 1],
WW=[8, 8],
):
# with VerilogGenerator() as generator:
with ModuleBlock(f"{module_name}") as module:
add_parameter("H", H)
add_parameter("L", L)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", WW[i])
add_input("HN_in", "H")
add_input("CST_LOW")
for i in range(len(value_range)):
add_output(
f"HN_out_{i}",
f"WW_{i}",
"L",
)
# 内部连线
for i, layer in enumerate(matrix):
weight_cnt = [0] * len(value_range)
for j, weight in enumerate(layer):
# 跳0
if abs(weight) < 1e-3:
continue
try:
index = find_index(value_range, weight)
except ValueError:
print(f"weight {weight} not found")
continue
add_assign(
f"HN_out_{index}",
[i, weight_cnt[index]],
"HN_in",
[j],
)
weight_cnt[index] += 1
for j in range(len(weight_cnt)):
while weight_cnt[j] < WW[j]:
add_assign(f"HN_out_{j}", [i, weight_cnt[j]], "CST_LOW", [])
weight_cnt[j] += 1
return module
# %%
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG):
try:
WW = calculate_WW(matrix, config.value_range)
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.sv")
module_name = f"{name}_tp_{weights_file_name}_vc_{i}"
with open(file_name, "w") as f:
text = generate_module(
matrix,
module_name=module_name,
H=H,
L=L,
value_range=config.value_range,
WW=WW,
).generate()
f.write(text)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
GenerateBlock,
ForBlock,
add_parameter,
add_input,
add_output,
add_genvar,
add_assign,
add_wire,
add_body,
add_instance,
add_newline,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.utils import calculate_WW
# %%
def generate_module(
module_name,
L,
value_range,
WW,
):
with ModuleBlock(module_name) as module:
# 参数
add_parameter("L", L)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", WW[i])
# 输入输出
add_input("LM_sel", "L")
for i in range(len(value_range)):
add_input(f"LM_in_{i}", f"WW_{i}", "L")
add_output(f"LM_out_{i}", f"WW_{i}")
# 内部连线
for i in range(len(value_range)):
add_wire(name=f"LM_in_{i}_masked", width=f"WW_{i}", height="L")
add_wire(name=f"LM_in_{i}_masked_T", width="L", height=f"WW_{i}")
add_newline()
# LM_select_loop
add_genvar("i")
with GenerateBlock():
with ForBlock("i=0", "i<L", "i=i+1", "LM_select_loop"):
for j in range(len(value_range)):
add_body(
f"assign LM_in_{j}_masked[i]=LM_in_{j}[i] & {{WW_{j}{{LM_sel[i]}}}};",
)
add_newline()
# LM_transpose_loop_out
add_genvar("j")
add_genvar("k")
with GenerateBlock():
with ForBlock("k=0", "k<L", "k=k+1", "LM_transpose_loop_out"):
for i in range(len(value_range)):
with ForBlock(
"j=0", f"j<WW_{i}", "j=j+1", f"LM_transpose_loop_in_{i}"
):
add_assign(
f"LM_in_{i}_masked_T",
["j", "k"],
f"LM_in_{i}_masked",
["k", "j"],
)
add_newline()
# LM_reduce_or_loop
add_genvar("m")
with GenerateBlock():
for i in range(len(value_range)):
with ForBlock("m=0", f"m<WW_{i}", "m=m+1", f"LM_reduce_or_loop_{i}"):
add_body(f"assign LM_out_{i}[m] = |(LM_in_{i}_masked_T[m]);")
return module
# %%
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG):
try:
WW = calculate_WW(matrix, config.value_range)
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.sv")
module_name = f"{name}_tp_{weights_file_name}_vc_{i}"
with open(file_name, "w") as f:
f.write(
generate_module(
module_name=module_name,
L=L,
value_range=config.value_range,
WW=WW,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.log import TCL, TCL_dependency
from hllm.utils import calculate_WW
# %%
def generate_module(
cur_GP=0,
module_name="",
H=16,
L=5,
VN=512,
value_range=[-1, 1],
weights_file_name=None,
config: CFG = None,
ww_list=None,
):
tcl = TCL(config, weights_file_name)
tcl.set_vlist(f"VLIST_tp_{weights_file_name}_gp_{cur_GP}")
with ModuleBlock(module_name) as module:
GN = config.group_number
GP = int(VN / GN)
# 参数
add_parameter("H", H)
add_parameter("L", L)
add_parameter("VN", GN)
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
add_input("CST_LOW")
add_input("LM_sel", "L")
add_input("SW_in", "H")
for i in range(len(value_range)):
add_output(name=f"WT_{i}_out_S", height="VN")
add_output(name=f"WT_{i}_out_C", height="VN")
# 内部连线
for i in range(GN):
sw_ports = {
"clk": "clk",
"tree_rstn": "tree_rstn",
"valid": "valid",
"CST_LOW": "CST_LOW",
"SW_in": "SW_in",
"LM_sel": "LM_sel",
}
for j in range(len(value_range)):
sw_ports[f"WT_{j}_out_S"] = f"WT_{j}_out_S[{i}]"
sw_ports[f"WT_{j}_out_C"] = f"WT_{j}_out_C[{i}]"
add_instance(
f"Sub_wrapper_tp_{weights_file_name}_vc_{cur_GP*GN+i}",
f"Sub_wrapper_{cur_GP*GN+i}",
None,
sw_ports,
)
tcl.add_dependency(
f"HN",
f"HN_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"WT_group",
f"WT_group_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"Layer_mux",
f"Layer_mux_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"Sub_wrapper",
f"Sub_wrapper_tp_{weights_file_name}_vc_{cur_GP*GN+i}.sv",
)
tcl.add_dependency(
f"Mid_wrapper",
f"Mid_wrapper_tp_{weights_file_name}_gp_{cur_GP}.sv",
)
tcl.add_dependency(
f"FSM",
f"FSM_tp_{weights_file_name}_gp_{cur_GP}.sv",
)
for line in ww_list[cur_GP * GN + i]:
tcl.add_dependency(
f"SerialWallaceTree",
f"SerialWallaceTree{line}Input.v",
use_weights=False,
)
return module, tcl
# %%
def process_task(i, name, weights_file_name, ww_list, H, L, VN, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_gp_{i}.sv")
file_name_tcl = os.path.join(
file_dir, f"{name}_tp_{weights_file_name}_gp_{i}.tcl"
)
module_name = f"{name}_tp_{weights_file_name}_gp_{i}"
module, tcl = generate_module(
i,
module_name=module_name,
H=H,
L=L,
VN=VN,
value_range=config.value_range,
weights_file_name=weights_file_name,
config=config,
ww_list=ww_list,
)
with open(file_name, "w") as f:
f.write(module.generate())
with open(file_name_tcl, "w") as f:
f.write(tcl.generate())
return i # 返回任务ID以显示进度
except Exception as e:
print(
f"Generating {i} failed with an error at line {sys.exc_info()[2].tb_lineno}: {e}"
)
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
GP = int(VN / config.group_number)
ww_list = []
ww_files = [
os.path.join(
config.output_dir,
"WW",
weights_file_name,
f"WW_tp_{weights_file_name}_vc_{i}.txt",
)
for i in range(VN)
]
for ww_file in ww_files:
with open(ww_file, "r") as f:
ww = []
for line in f:
ww.append(int(line.strip()))
ww_list.append(ww)
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, ww_list, H, L, VN, config
)
for i in range(GP)
]
for future in tqdm(as_completed(futures), total=GP):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import numpy as np
from pyrilog import (
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.utils import calculate_WW
# %%
def generate_module(
matrix,
module_name,
H=16,
L=5,
value_range=[-1, 1],
WW=[8, 8],
weights_file_name=None,
config: CFG = None,
VN_index=1,
):
module_name_suffix = f"_tp_{weights_file_name}_vc_{VN_index}"
with ModuleBlock(module_name) as module:
# 参数
add_parameter("H", H)
add_parameter("L", L)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", WW[i])
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
add_input("CST_LOW")
add_input("LM_sel", "L")
add_input("SW_in", "H")
for i in range(len(value_range)):
add_output(
name=f"WT_{i}_out_S",
)
add_output(
name=f"WT_{i}_out_C",
)
# 内部连线
# add_wire("CST_LOW")
# add_assign("CST_LOW", [], 0, [])
for i in range(len(value_range)):
add_wire(name=f"HN_out_{i}", width=f"WW_{i}", height="L")
add_wire(
name=f"LM_out_{i}",
width=f"WW_{i}",
)
# 实例化HN
hn_params = {
"H": "H",
"L": "L",
}
for i in range(len(value_range)):
hn_params[f"WW_{i}"] = f"WW_{i}"
hn_ports = {
"HN_in": "SW_in",
"CST_LOW": "CST_LOW",
}
for i in range(len(value_range)):
hn_ports[f"HN_out_{i}"] = f"HN_out_{i}"
add_instance("HN" + module_name_suffix, "HN", hn_params, hn_ports)
# 实例化LM
lm_params = {
"L": L,
}
for i in range(len(value_range)):
lm_params[f"WW_{i}"] = f"WW_{i}"
lm_ports = {
"LM_sel": "LM_sel",
}
for i in range(len(value_range)):
lm_ports[f"LM_in_{i}"] = f"HN_out_{i}"
lm_ports[f"LM_out_{i}"] = f"LM_out_{i}"
add_instance("Layer_mux" + module_name_suffix, "Layer_mux", lm_params, lm_ports)
# 实例化WT
wt_params = {}
for i in range(len(value_range)):
wt_params[f"WW_{i}"] = f"WW_{i}"
wt_ports = {"clk": "clk", "tree_rstn": "tree_rstn", "valid": "valid"}
for i in range(len(value_range)):
wt_ports[f"WT_{i}_in"] = f"LM_out_{i}"
wt_ports[f"WT_{i}_out_S"] = f"WT_{i}_out_S"
wt_ports[f"WT_{i}_out_C"] = f"WT_{i}_out_C"
add_instance("WT_group" + module_name_suffix, "WT_group", wt_params, wt_ports)
return module
# %%
def process_task(i, name, weights_file_name, matrix, H, L, config):
try:
WW = calculate_WW(matrix, config.value_range)
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.sv")
module_name = f"{name}_tp_{weights_file_name}_vc_{i}"
with open(file_name, "w") as f:
f.write(
generate_module(
matrix,
module_name=module_name,
H=H,
L=L,
value_range=config.value_range,
WW=WW,
weights_file_name=weights_file_name,
config=config,
VN_index=i,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name)
print("Files generated in", file_dir)
import argparse
import sys
import math
import os
from tqdm import tqdm
from hllm.config import CFG
def gen_fulladder():
code = """module FullAdder(
input A, // First input bit
input B, // Second input bit
input Cin, // Carry input bit
output S, // Sum output bit
output Cout // Carry output bit
);
assign S = A ^ B ^ Cin;
assign Cout = (A & B) | (B & Cin) | (A & Cin);
endmodule
"""
return code
def gen_wallace_tree_config(num_addends):
full_adder_list = []
remainder_list = []
total_input_list = []
while num_addends > 2:
full_adders_used = num_addends // 3
remaining_addends = num_addends % 3
full_adder_list.append(full_adders_used)
remainder_list.append(remaining_addends)
total_input_list.append(num_addends)
num_addends = full_adders_used * 2 + remaining_addends
return full_adder_list, remainder_list, total_input_list
def gen_wallacetree(num_addends, full_adder_list, remainder_list, total_input_list):
code = ""
cout_cin_code = ""
for i, full_adder_count in enumerate(full_adder_list):
if i != len(full_adder_list) - 1: # the final bit we will manually manage
cout_cin_code += f" output [{full_adder_count} - 1 : 0] L{i}_Cout,\n"
cout_cin_code += f" input [{full_adder_count} - 1 : 0] L{i+1}_Cin,\n"
module_head_code = f"""module WallaceTree{num_addends}Input(
input [{num_addends} - 1 : 0] addends,
{cout_cin_code}
output final_Cout,
output final_S
);
"""
code += module_head_code
for i, (full_adder_count, remainder_count, total_input_count) in enumerate(
zip(full_adder_list, remainder_list, total_input_list)
):
code += f" wire [{total_input_count} - 1 : 0] L{i}_all_inputs;\n"
if i == 0:
code += f" assign L{i}_all_inputs = addends;\n"
else:
last_remainder_count = remainder_list[i - 1]
if last_remainder_count == 0:
concat_code = f"{{L{i-1}_S, L{i}_Cin}}"
else:
concat_code = f"{{L{i-1}_S, L{i}_Cin, L{i-1}_remainder}}"
code += f" assign L{i}_all_inputs = {concat_code};\n"
if remainder_count != 0:
code += f" wire [{remainder_count} - 1 : 0] L{i}_remainder;\n"
code += f" assign L{i}_remainder = L{i}_all_inputs[{total_input_count} - 1 : {total_input_count} - {remainder_count}];\n"
if i != len(full_adder_list) - 1: # otherwise directly assign to output pin
code += f" wire [{full_adder_count} - 1 : 0] L{i}_S;\n"
cout_code = f"L{i}_Cout" if i != len(full_adder_list) - 1 else "final_Cout"
S_code = f"L{i}_S" if i != len(full_adder_list) - 1 else "final_S"
code += f"""\
FullAdder L{i}_adders [{full_adder_count} - 1 : 0](
.A(L{i}_all_inputs[{full_adder_count} * 3 - 1 : {full_adder_count} * 2]),
.B(L{i}_all_inputs[{full_adder_count} * 2 - 1 : {full_adder_count}]),
.Cin(L{i}_all_inputs[{full_adder_count} - 1 : 0]),
.Cout({cout_code}),
.S({S_code})
);
"""
code += "endmodule\n\n"
return code
def gen_serialwallacetree(num_addends, full_adder_list):
code = ""
code += f"""module SerialWallaceTree{num_addends}Input(
input clk,
input rstn,
input valid,
input [{num_addends} - 1 : 0] addends,
output out_S,
output out_Cout
);
"""
for i, full_adder_count in enumerate(full_adder_list):
if i != len(full_adder_list) - 1:
code += f" wire [{full_adder_count} - 1 : 0] L{i}_Cout;\n"
code += f" wire [{full_adder_count} - 1 : 0] L{i+1}_Cin;\n"
code += f" reg [{full_adder_count} - 1 : 0] L{i}_Cout_L{i+1}_Cin_reg;\n"
code += f" assign L{i+1}_Cin = L{i}_Cout_L{i+1}_Cin_reg;\n\n"
cin_cout_assign_code = ""
for i, full_adder_count in enumerate(full_adder_list):
if i != len(full_adder_list) - 1:
cin_cout_assign_code += f" .L{i}_Cout(L{i}_Cout),\n"
cin_cout_assign_code += f" .L{i+1}_Cin(L{i+1}_Cin),\n"
code += " wire final_S, final_Cout;\n"
code += " assign out_S = final_S & valid;\n"
code += " assign out_Cout = final_Cout & valid;\n"
code += f"""\
WallaceTree{num_addends}Input u_WallaceTree{num_addends}Input(
.addends(addends),
{cin_cout_assign_code}
.final_S(final_S),
.final_Cout(final_Cout)
);
"""
reset_code = ""
reg_assign_code = ""
for i, full_adder_count in enumerate(full_adder_list):
if i != len(full_adder_list) - 1:
reset_code += (
f" L{i}_Cout_L{i+1}_Cin_reg <= {full_adder_count}'b0;\n"
)
reg_assign_code += f" L{i}_Cout_L{i+1}_Cin_reg <= L{i}_Cout&{{{full_adder_count}{{valid}}}};\n"
code += f"""\
always @ (posedge clk or negedge rstn) begin
if (!rstn) begin
{reset_code}
end
else begin
{reg_assign_code}
end
end
"""
code += "endmodule\n\n"
return code
def run(name: str, config: CFG):
# Setup argument parser
# parser = argparse.ArgumentParser(
# description="Generate Verilog code for Wallace Tree configurations."
# )
# parser.add_argument(
# "num_addends", type=int, help="Number of addends for the Wallace Tree."
# )
for i in tqdm(range(1, 4000)):
# Generate the configuration for the Wallace Tree
full_adder_list, remainder_list, total_input_list = gen_wallace_tree_config(i)
# Generate the FullAdder module
full_adder_code = gen_fulladder()
# Generate the basic Wallace Tree
wallace_tree_code = gen_wallacetree(
i, full_adder_list, remainder_list, total_input_list
)
serial_wallace_tree_code = gen_serialwallacetree(i, full_adder_list)
# Prepare the output code
if i == 1:
output_code = full_adder_code + wallace_tree_code + serial_wallace_tree_code
else:
output_code = wallace_tree_code + serial_wallace_tree_code
# Output the code to a file
os.makedirs(os.path.join(config.output_dir, name), exist_ok=True)
output_filename = os.path.join(
config.output_dir, name, f"SerialWallaceTree{i}Input.v"
)
with open(output_filename, "w") as file:
file.write(output_code)
file_dir = os.path.join(config.output_dir, name)
print("Files generated in", file_dir)
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.utils import calculate_WW
# %%
def generate_module(
module_name,
H=16,
L=5,
VN=512,
value_range=[-1, 1],
weights_file_name=None,
config: CFG = None,
):
with ModuleBlock(module_name) as module:
GN = config.group_number
GP = int(VN / GN)
# 参数
add_parameter("H", H)
add_parameter("L", L)
add_parameter("VN", VN)
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
add_input("CST_LOW")
add_input("LM_sel", "L")
add_input("SW_in", "H")
for i in range(len(value_range)):
add_output(name=f"WT_{i}_out_S", height="VN")
add_output(name=f"WT_{i}_out_C", height="VN")
# 内部连线
for i in range(GP):
sw_ports = {
"clk": "clk",
"tree_rstn": "tree_rstn",
"valid": "valid",
"CST_LOW": "CST_LOW",
"SW_in": "SW_in",
"LM_sel": "LM_sel",
}
for j in range(len(value_range)):
sw_ports[f"WT_{j}_out_S"] = f"WT_{j}_out_S[{i*GN+GN-1}:{i*GN}]"
sw_ports[f"WT_{j}_out_C"] = f"WT_{j}_out_C[{i*GN+GN-1}:{i*GN}]"
add_instance(
f"Mid_wrapper_tp_{weights_file_name}_gp_{i}",
f"Mid_wrapper_{i}",
None,
sw_ports,
)
return module
# %%
def process_task(i, name, weights_file_name, H, L, VN, config: CFG):
try:
file_dir = os.path.join(config.output_dir, name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}.sv")
module_name = f"{name}_tp_{weights_file_name}"
with open(file_name, "w") as f:
f.write(
generate_module(
module_name,
H=H,
L=L,
VN=VN,
value_range=config.value_range,
weights_file_name=weights_file_name,
config=config,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task,
i,
name,
weights_file_name,
H,
L,
VN,
config,
)
for i in range(1)
]
for future in tqdm(as_completed(futures), total=1):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
# %%
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
GenerateBlock,
ForBlock,
add_parameter,
add_input,
add_output,
add_genvar,
add_assign,
add_wire,
add_body,
add_instance,
add_newline,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from tqdm import tqdm
from hllm.config import CFG
import pickle
from hllm.utils import calculate_WW, find_index
def generate_module(
matrix,
module_name="",
H=16,
L=5,
value_range=[-1, 1],
WW=[8, 8],
):
with ModuleBlock(f"{module_name}") as module:
# 参数)
for i in range(len(value_range)):
add_parameter(f"WW_{i}", WW[i])
# 输入输出
add_input("clk")
add_input("tree_rstn")
add_input("valid")
for i in range(len(value_range)):
add_input(f"WT_{i}_in", f"WW_{i}")
add_output(
f"WT_{i}_out_S",
)
add_output(
f"WT_{i}_out_C",
)
# 内部连线华莱士树
for i in range(len(value_range)):
wallace_name = f"SerialWallaceTree{WW[i]}Input"
wallace_port = {
"clk": "clk",
"rstn": "tree_rstn",
"valid": "valid",
"addends": f"WT_{i}_in",
"out_S": f"WT_{i}_out_S",
"out_Cout": f"WT_{i}_out_C",
}
add_instance(wallace_name, f"serial_wallace_tree_{i}", {}, wallace_port)
return module
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG):
try:
WW = calculate_WW(matrix, config.value_range)
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.sv")
with open(file_name, "w") as f:
f.write(
generate_module(
matrix,
module_name=f"{name}_tp_{weights_file_name}_vc_{i}",
H=H,
L=L,
value_range=config.value_range,
WW=WW,
).generate()
)
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
import sys
import numpy as np
from pyrilog import (
VerilogGenerator,
ModuleBlock,
add_parameter,
add_input,
add_output,
add_assign,
add_wire,
add_instance,
)
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from hllm.config import CFG
from tqdm import tqdm
import pickle
from hllm.log import TCL
from hllm.utils import calculate_WW
def process_task(i, name, weights_file_name, matrix, H, L, config: CFG = None):
try:
WW = calculate_WW(matrix, config.value_range)
file_dir = os.path.join(config.output_dir, name, weights_file_name)
os.makedirs(file_dir, exist_ok=True)
file_name = os.path.join(file_dir, f"{name}_tp_{weights_file_name}_vc_{i}.txt")
with open(file_name, "w") as f:
for ww in WW:
f.write(str(ww) + "\n")
return i # 返回任务ID以显示进度
except Exception as e:
print(f"Generating {i} failed with an error: {e}")
return None
def run(name: str, config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
print(f"Processing {weights_file}")
with open(weights_file, "rb") as f:
matrixs = pickle.load(f)
matrixs = np.transpose(matrixs, (1, 0, 2))
VN, L, H = matrixs.shape
with ProcessPoolExecutor(max_workers=config.num_workers) as executor:
futures = [
executor.submit(
process_task, i, name, weights_file_name, matrixs[i], H, L, config
)
for i in range(VN)
]
for future in tqdm(as_completed(futures), total=VN):
try:
result = future.result()
except Exception as e:
print(f"Generating {result} failed with an error: {e}")
file_dir = os.path.join(config.output_dir, name, weights_file_name)
print("Files generated in", file_dir)
import numpy as np
import os
import pickle
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
from hllm.config import CFG
def calculate_WW(matrix: np.array, value_range):
"""计算每个value_range值在矩阵中每行出现的最大次数"""
WW = [0] * len(value_range)
for i in range(len(value_range)):
WW[i] = max(
[len([x for x in row if abs(x - value_range[i]) <= 0.01]) for row in matrix]
)
return WW
def find_index(arr, target, epsilon=1e-3):
"""在数组中查找最接近目标值的索引"""
arr = np.array(arr) # 转换为numpy数组
diff = np.abs(arr - target) # 计算差值数组
min_diff = np.min(diff) # 找到最小的差值
if min_diff < epsilon: # 如果最小差值在允许的误差范围内
return np.where(diff == min_diff)[0][0] # 返回第一个匹配的索引
raise ValueError("No match found") # 如果没有找到匹配项,则引发异常
import sys
import numpy as np
import pickle
import os
from tqdm import tqdm
from hllm.config import CFG
def to_8bit_binary(val):
if val < 0:
return f"{(1 << 8) + val:08b}"
else:
return f"{val:08b}"
def run(config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
file_dir = os.path.join(config.output_dir, weights_file_name)
filename_pkl = os.path.join(file_dir, "activation.pkl")
filename_txt = os.path.join(file_dir, "activation.txt")
filename_bin_txt = os.path.join(file_dir, "activation-bin.txt")
if os.path.exists(filename_pkl) and not config.verify_generate_activation_on_exist:
print(f"Activation file {filename_pkl} already exists")
return
print(f"Generating activation for {weights_file_name} in {file_dir}")
os.makedirs(file_dir, exist_ok=True)
with open(weights_file, mode="rb") as f:
weights = pickle.load(f)
shape = weights.shape
length = shape[2]
activation = np.random.randint(-128, 128, (1, length))
with open(filename_pkl, "wb") as f:
pickle.dump(activation, f)
with open(filename_txt, "w") as f:
for val in activation[0]:
f.write(f"{val}\n")
f.write("\n")
with open(filename_bin_txt, "w") as f:
for val in activation[0]:
f.write(f"{to_8bit_binary(val)}\n")
f.write("\n")
import sys
import numpy as np
import pickle
import os
from tqdm import tqdm
from hllm.config import CFG
def to_8bit_binary(val):
if val < 0:
return f"{(1 << 8) + val:08b}"
else:
return f"{val:08b}"
def get_bit(num, i):
if i < 0:
return 0
return (num >> i) & 1
def run(config: CFG):
weights_file = os.path.join(config.weights_dir, config.run_weights)
weights_file_name = os.path.splitext(os.path.basename(weights_file))[0]
file_dir = os.path.join(config.output_dir, weights_file_name)
print("开始进行激活测试")
print(f"读取权重文件{weights_file_name}")
with open(weights_file, "rb") as f:
weights = pickle.load(f)
activation_file = os.path.join(file_dir, "activation.pkl")
with open(activation_file, "rb") as f:
activation = pickle.load(f)
results_txt = os.path.join(file_dir, "result.txt")
with open(results_txt, "w") as f:
for layer in weights:
for i in range(8):
activation_bit = get_bit(activation, i)
tem = np.matmul(activation_bit, layer.T)
for val in tem[0]:
f.write(f"{val} ")
f.write("\n")
# with open(result_manual_txt, "w") as f:
# for layer in matrixs:
# tem = np.zeros((1, layer.shape[1]))
# for i, row in enumerate(layer):
# for j, val in enumerate(row):
# tem[0][j] += activation[0][i] * val
# f.write(f"{tem}\n")
print(f"结果写入{results_txt}")
from hllm.config import CFG
def run_origin(config: CFG):
import hllm.origin.generate_layer_mux as generate_layer_mux
import hllm.origin.generate_hn as generate_hn
import hllm.origin.generate_mid_wrapper as generate_mid_wrapper
import hllm.origin.generate_fsm as generate_fsm
import hllm.origin.generate_sub_wrapper as generate_sub_wrapper
import hllm.origin.generate_wallace as generate_wallace
import hllm.origin.generate_wrappers as generate_wrappers
import hllm.origin.generate_wt_group as generate_wt_group
import hllm.origin.generate_ww as generate_ww
config.output_dir = "outputs-qwen/origin"
generate_ww.run(name="WW", config=config)
generate_layer_mux.run(name="Layer_mux", config=config)
generate_hn.run(name="HN", config=config)
generate_mid_wrapper.run(name="Mid_wrapper", config=config)
generate_fsm.run(name="FSM", config=config)
generate_sub_wrapper.run(name="Sub_wrapper", config=config)
generate_wallace.run(name="SerialWallaceTree", config=config)
generate_wrappers.run(name="Wrappers", config=config)
generate_wt_group.run(name="WT_group", config=config)
def run_optimized(config: CFG):
import hllm.origin.generate_wallace as generate_wallace
import hllm.optimized.generate_info as generate_info
import hllm.optimized.generate_mux_wrapper as generate_mux_wrapper
import hllm.optimized.generate_mux as generate_mux
import hllm.optimized.generate_sub_wrapper as generate_sub_wrapper
import hllm.optimized.generate_wt_group as generate_wt_group
import hllm.optimized.generate_mid_wrapper as generate_mid_wrapper
import hllm.optimized.generate_wrappers as generate_wrappers
import hllm.optimized.generate_layer_mux as generate_layer_mux
import hllm.optimized.generate_fsm as generate_fsm
config.output_dir = "outputs-qwen/optimized"
generate_info.run(name="info", config=config)
generate_mux_wrapper.run(name="Mux_wrapper", config=config)
generate_mux.run(name="Mux", config=config)
generate_sub_wrapper.run(name="Sub_wrapper", config=config)
generate_wt_group.run(name="WT_group", config=config)
generate_mid_wrapper.run(name="Mid_wrapper", config=config)
generate_wallace.run(name="SerialWallaceTree", config=config)
generate_wrappers.run(name="Wrappers", config=config)
generate_layer_mux.run(name="Layer_mux", config=config)
generate_fsm.run(name="FSM", config=config)
def run_weights_preprocess(config: CFG):
import hllm.eda.generate_quant_weights as generate_quant_weights
import hllm.eda.mapping_weights as generate_mapping_weights
generate_quant_weights.run(config=config)
generate_mapping_weights.run(config=config)
def run_verify(config: CFG):
import hllm.verify.generate_activation as generate_activation
import hllm.verify.verify_activation as verify_activation
config.output_dir = "outputs-qwen/verify"
generate_activation.run(config=config)
verify_activation.run(config=config)
def batch_run(config: CFG):
for weights in config.run_weights_batch:
config.run_weights = weights
run_origin(config)
run_optimized(config)
run_verify(config)
if __name__ == "__main__":
config = CFG()
# run_weights_preprocess(config)
# run_origin(config)
# run_optimized(config)
# run_verify()
batch_run(config)
\ No newline at end of file
from setuptools import setup, find_packages, Extension
import pybind11
ext_modules = [
Extension(
"hllm.optimized.turbo_optimize_hn", # 注意这里的模块路径要匹配包结构
["hllm/optimized/turbo_optimize_hn.cpp"],
include_dirs=[pybind11.get_include()],
language="c++",
extra_compile_args=["-std=c++11", "-fPIC", "-O3"],
extra_link_args=["-static-libstdc++"],
),
]
setup(
name="hllm",
version="0.1.0",
packages=find_packages(),
install_requires=[
"pybind11>=2.6.0",
],
ext_modules=ext_modules,
)
This diff is collapsed. Click to expand it.
module Mid_wrappers_tp_k #(
parameter H = 16, //这些数值无所谓
parameter L = 2,
parameter VN = 16
) (
input clk,
input tree_rstn,
input valid,
input CST_LOW,
input [L - 1 : 0] LM_sel,
input [H - 1 : 0] SW_in,
output WT_v0_out_S[VN - 1 : 0],
output WT_v0_out_C[VN - 1 : 0],
output WT_v1_out_S[VN - 1 : 0],
output WT_v1_out_C[VN - 1 : 0],
output WT_v2_out_S[VN - 1 : 0],
output WT_v2_out_C[VN - 1 : 0],
output WT_v3_out_S[VN - 1 : 0],
output WT_v3_out_C[VN - 1 : 0],
output WT_v4_out_S[VN - 1 : 0],
output WT_v4_out_C[VN - 1 : 0],
output WT_v5_out_S[VN - 1 : 0],
output WT_v5_out_C[VN - 1 : 0],
output WT_v6_out_S[VN - 1 : 0],
output WT_v6_out_C[VN - 1 : 0],
output WT_v7_out_S[VN - 1 : 0],
output WT_v7_out_C[VN - 1 : 0],
output WT_v8_out_S[VN - 1 : 0],
output WT_v8_out_C[VN - 1 : 0],
output WT_v9_out_S[VN - 1 : 0],
output WT_v9_out_C[VN - 1 : 0],
output WT_v10_out_S[VN - 1 : 0],
output WT_v10_out_C[VN - 1 : 0],
output WT_v11_out_S[VN - 1 : 0],
output WT_v11_out_C[VN - 1 : 0],
output WT_v12_out_S[VN - 1 : 0],
output WT_v12_out_C[VN - 1 : 0],
output WT_v13_out_S[VN - 1 : 0],
output WT_v13_out_C[VN - 1 : 0]
);
Sub_wrapper_tp_k_vc_0 sub_wrapper_0 //名称手动迭代下,还有下面的索引
(
.clk(clk),
.tree_rstn(tree_rstn),
.valid(valid),
.CST_LOW(CST_LOW),
.LM_sel(LM_sel),
.SW_in(SW_in),
.WT_0_out_S(WT_v0_out_S[0]),
.WT_0_out_C(WT_v0_out_C[0]),
.WT_1_out_S(WT_v1_out_S[0]),
.WT_1_out_C(WT_v1_out_C[0]),
.WT_2_out_S(WT_v2_out_S[0]),
.WT_2_out_C(WT_v2_out_C[0]),
.WT_3_out_S(WT_v3_out_S[0]),
.WT_3_out_C(WT_v3_out_C[0]),
.WT_4_out_S(WT_v4_out_S[0]),
.WT_4_out_C(WT_v4_out_C[0]),
.WT_5_out_S(WT_v5_out_S[0]),
.WT_5_out_C(WT_v5_out_C[0]),
.WT_6_out_S(WT_v6_out_S[0]),
.WT_6_out_C(WT_v6_out_C[0]),
.WT_7_out_S(WT_v7_out_S[0]),
.WT_7_out_C(WT_v7_out_C[0]),
.WT_8_out_S(WT_v8_out_S[0]),
.WT_8_out_C(WT_v8_out_C[0]),
.WT_9_out_S(WT_v9_out_S[0]),
.WT_9_out_C(WT_v9_out_C[0]),
.WT_10_out_S(WT_v10_out_S[0]),
.WT_10_out_C(WT_v10_out_C[0]),
.WT_11_out_S(WT_v11_out_S[0]),
.WT_11_out_C(WT_v11_out_C[0]),
.WT_12_out_S(WT_v12_out_S[0]),
.WT_12_out_C(WT_v12_out_C[0]),
.WT_13_out_S(WT_v13_out_S[0]),
.WT_13_out_C(WT_v13_out_C[0])
);
Sub_wrapper_tp_k_vc_1 sub_wrapper_1 (
.clk(clk),
.tree_rstn(tree_rstn),
.valid(valid),
.CST_LOW(CST_LOW),
.LM_sel(LM_sel),
.SW_in(SW_in),
.WT_0_out_S(WT_v0_out_S[1]),
.WT_0_out_C(WT_v0_out_C[1]),
.WT_1_out_S(WT_v1_out_S[1]),
.WT_1_out_C(WT_v1_out_C[1]),
.WT_2_out_S(WT_v2_out_S[1]),
.WT_2_out_C(WT_v2_out_C[1]),
.WT_3_out_S(WT_v3_out_S[1]),
.WT_3_out_C(WT_v3_out_C[1]),
.WT_4_out_S(WT_v4_out_S[1]),
.WT_4_out_C(WT_v4_out_C[1]),
.WT_5_out_S(WT_v5_out_S[1]),
.WT_5_out_C(WT_v5_out_C[1]),
.WT_6_out_S(WT_v6_out_S[1]),
.WT_6_out_C(WT_v6_out_C[1]),
.WT_7_out_S(WT_v7_out_S[1]),
.WT_7_out_C(WT_v7_out_C[1]),
.WT_8_out_S(WT_v8_out_S[1]),
.WT_8_out_C(WT_v8_out_C[1]),
.WT_9_out_S(WT_v9_out_S[1]),
.WT_9_out_C(WT_v9_out_C[1]),
.WT_10_out_S(WT_v10_out_S[1]),
.WT_10_out_C(WT_v10_out_C[1]),
.WT_11_out_S(WT_v11_out_S[1]),
.WT_11_out_C(WT_v11_out_C[1]),
.WT_12_out_S(WT_v12_out_S[1]),
.WT_12_out_C(WT_v12_out_C[1]),
.WT_13_out_S(WT_v13_out_S[1]),
.WT_13_out_C(WT_v13_out_C[1])
);
endmodule
This diff is collapsed. Click to expand it.
module FullAdder(
input A, // First input bit
input B, // Second input bit
input Cin, // Carry input bit
output S, // Sum output bit
output Cout // Carry output bit
);
assign S = A ^ B ^ Cin;
assign Cout = (A & B) | (B & Cin) | (A & Cin);
endmodule
module WallaceTree8Input(
input [8 - 1 : 0] addends,
output [2 - 1 : 0] L0_Cout,
input [2 - 1 : 0] L1_Cin,
output [2 - 1 : 0] L1_Cout,
input [2 - 1 : 0] L2_Cin,
output [1 - 1 : 0] L2_Cout,
input [1 - 1 : 0] L3_Cin,
output final_Cout,
output final_S
);
wire [8 - 1 : 0] L0_all_inputs;
assign L0_all_inputs = addends;
wire [2 - 1 : 0] L0_remainder;
assign L0_remainder = L0_all_inputs[8 - 1 : 8 - 2];
wire [2 - 1 : 0] L0_S;
FullAdder L0_adders [2 - 1 : 0](
.A(L0_all_inputs[2 * 3 - 1 : 2 * 2]),
.B(L0_all_inputs[2 * 2 - 1 : 2]),
.Cin(L0_all_inputs[2 - 1 : 0]),
.Cout(L0_Cout),
.S(L0_S)
);
wire [6 - 1 : 0] L1_all_inputs;
assign L1_all_inputs = {L0_S, L1_Cin, L0_remainder};
wire [2 - 1 : 0] L1_S;
FullAdder L1_adders [2 - 1 : 0](
.A(L1_all_inputs[2 * 3 - 1 : 2 * 2]),
.B(L1_all_inputs[2 * 2 - 1 : 2]),
.Cin(L1_all_inputs[2 - 1 : 0]),
.Cout(L1_Cout),
.S(L1_S)
);
wire [4 - 1 : 0] L2_all_inputs;
assign L2_all_inputs = {L1_S, L2_Cin};
wire [1 - 1 : 0] L2_remainder;
assign L2_remainder = L2_all_inputs[4 - 1 : 4 - 1];
wire [1 - 1 : 0] L2_S;
FullAdder L2_adders [1 - 1 : 0](
.A(L2_all_inputs[1 * 3 - 1 : 1 * 2]),
.B(L2_all_inputs[1 * 2 - 1 : 1]),
.Cin(L2_all_inputs[1 - 1 : 0]),
.Cout(L2_Cout),
.S(L2_S)
);
wire [3 - 1 : 0] L3_all_inputs;
assign L3_all_inputs = {L2_S, L3_Cin, L2_remainder};
FullAdder L3_adders [1 - 1 : 0](
.A(L3_all_inputs[1 * 3 - 1 : 1 * 2]),
.B(L3_all_inputs[1 * 2 - 1 : 1]),
.Cin(L3_all_inputs[1 - 1 : 0]),
.Cout(final_Cout),
.S(final_S)
);
endmodule
module SerialWallaceTree8Input(
input clk,
input rstn,
input valid,
input [8 - 1 : 0] addends,
output out_S,
output out_Cout
);
wire [2 - 1 : 0] L0_Cout;
wire [2 - 1 : 0] L1_Cin;
reg [2 - 1 : 0] L0_Cout_L1_Cin_reg;
assign L1_Cin = L0_Cout_L1_Cin_reg;
wire [2 - 1 : 0] L1_Cout;
wire [2 - 1 : 0] L2_Cin;
reg [2 - 1 : 0] L1_Cout_L2_Cin_reg;
assign L2_Cin = L1_Cout_L2_Cin_reg;
wire [1 - 1 : 0] L2_Cout;
wire [1 - 1 : 0] L3_Cin;
reg [1 - 1 : 0] L2_Cout_L3_Cin_reg;
assign L3_Cin = L2_Cout_L3_Cin_reg;
wire final_S, final_Cout;
assign out_S = final_S & valid;
assign out_Cout = final_Cout & valid;
WallaceTree8Input u_WallaceTree8Input(
.addends(addends),
.L0_Cout(L0_Cout),
.L1_Cin(L1_Cin),
.L1_Cout(L1_Cout),
.L2_Cin(L2_Cin),
.L2_Cout(L2_Cout),
.L3_Cin(L3_Cin),
.final_S(final_S),
.final_Cout(final_Cout)
);
always @ (posedge clk) begin
if (!rstn) begin
L0_Cout_L1_Cin_reg <= 2'b0;
L1_Cout_L2_Cin_reg <= 2'b0;
L2_Cout_L3_Cin_reg <= 1'b0;
end
else if (valid) begin
L0_Cout_L1_Cin_reg <= L0_Cout;
L1_Cout_L2_Cin_reg <= L1_Cout;
L2_Cout_L3_Cin_reg <= L2_Cout;
end
end
endmodule
module Wrappers_tp_k #(
parameter H = 16, //这些数值无所谓
parameter L = 2,
parameter VN = 2
) (
input clk,
input tree_rstn,
input valid,
input CST_LOW,
input [L - 1 : 0] LM_sel,
input [H - 1 : 0] SW_in,
output WT_v0_out_S[VN - 1 : 0],
output WT_v0_out_C[VN - 1 : 0],
output WT_v1_out_S[VN - 1 : 0],
output WT_v1_out_C[VN - 1 : 0],
output WT_v2_out_S[VN - 1 : 0],
output WT_v2_out_C[VN - 1 : 0],
output WT_v3_out_S[VN - 1 : 0],
output WT_v3_out_C[VN - 1 : 0],
output WT_v4_out_S[VN - 1 : 0],
output WT_v4_out_C[VN - 1 : 0],
output WT_v5_out_S[VN - 1 : 0],
output WT_v5_out_C[VN - 1 : 0],
output WT_v6_out_S[VN - 1 : 0],
output WT_v6_out_C[VN - 1 : 0],
output WT_v7_out_S[VN - 1 : 0],
output WT_v7_out_C[VN - 1 : 0],
output WT_v8_out_S[VN - 1 : 0],
output WT_v8_out_C[VN - 1 : 0],
output WT_v9_out_S[VN - 1 : 0],
output WT_v9_out_C[VN - 1 : 0],
output WT_v10_out_S[VN - 1 : 0],
output WT_v10_out_C[VN - 1 : 0],
output WT_v11_out_S[VN - 1 : 0],
output WT_v11_out_C[VN - 1 : 0],
output WT_v12_out_S[VN - 1 : 0],
output WT_v12_out_C[VN - 1 : 0],
output WT_v13_out_S[VN - 1 : 0],
output WT_v13_out_C[VN - 1 : 0]
);
Mid_wrapper_tp_k_gp_0 mid_wrapper_0 //名称手动迭代下,还有下面的索引
(
.clk(clk),
.tree_rstn(tree_rstn),
.valid(valid),
.CST_LOW(CST_LOW),
.LM_sel(LM_sel),
.SW_in(SW_in),
.WT_0_out_S(WT_v0_out_S[15:0]),
.WT_0_out_C(WT_v0_out_C[0]),
.WT_1_out_S(WT_v1_out_S[0]),
.WT_1_out_C(WT_v1_out_C[0]),
.WT_2_out_S(WT_v2_out_S[0]),
.WT_2_out_C(WT_v2_out_C[0]),
.WT_3_out_S(WT_v3_out_S[0]),
.WT_3_out_C(WT_v3_out_C[0]),
.WT_4_out_S(WT_v4_out_S[0]),
.WT_4_out_C(WT_v4_out_C[0]),
.WT_5_out_S(WT_v5_out_S[0]),
.WT_5_out_C(WT_v5_out_C[0]),
.WT_6_out_S(WT_v6_out_S[0]),
.WT_6_out_C(WT_v6_out_C[0]),
.WT_7_out_S(WT_v7_out_S[0]),
.WT_7_out_C(WT_v7_out_C[0]),
.WT_8_out_S(WT_v8_out_S[0]),
.WT_8_out_C(WT_v8_out_C[0]),
.WT_9_out_S(WT_v9_out_S[0]),
.WT_9_out_C(WT_v9_out_C[0]),
.WT_10_out_S(WT_v10_out_S[0]),
.WT_10_out_C(WT_v10_out_C[0]),
.WT_11_out_S(WT_v11_out_S[0]),
.WT_11_out_C(WT_v11_out_C[0]),
.WT_12_out_S(WT_v12_out_S[0]),
.WT_12_out_C(WT_v12_out_C[0]),
.WT_13_out_S(WT_v13_out_S[0]),
.WT_13_out_C(WT_v13_out_C[0])
);
Mid_wrapper_tp_k_gp_1 mid_wrapper_1 (
.clk(clk),
.tree_rstn(tree_rstn),
.valid(valid),
.CST_LOW(CST_LOW),
.LM_sel(LM_sel),
.SW_in(SW_in),
.WT_0_out_S(WT_v0_out_S[1]),
.WT_0_out_C(WT_v0_out_C[1]),
.WT_1_out_S(WT_v1_out_S[1]),
.WT_1_out_C(WT_v1_out_C[1]),
.WT_2_out_S(WT_v2_out_S[1]),
.WT_2_out_C(WT_v2_out_C[1]),
.WT_3_out_S(WT_v3_out_S[1]),
.WT_3_out_C(WT_v3_out_C[1]),
.WT_4_out_S(WT_v4_out_S[1]),
.WT_4_out_C(WT_v4_out_C[1]),
.WT_5_out_S(WT_v5_out_S[1]),
.WT_5_out_C(WT_v5_out_C[1]),
.WT_6_out_S(WT_v6_out_S[1]),
.WT_6_out_C(WT_v6_out_C[1]),
.WT_7_out_S(WT_v7_out_S[1]),
.WT_7_out_C(WT_v7_out_C[1]),
.WT_8_out_S(WT_v8_out_S[1]),
.WT_8_out_C(WT_v8_out_C[1]),
.WT_9_out_S(WT_v9_out_S[1]),
.WT_9_out_C(WT_v9_out_C[1]),
.WT_10_out_S(WT_v10_out_S[1]),
.WT_10_out_C(WT_v10_out_C[1]),
.WT_11_out_S(WT_v11_out_S[1]),
.WT_11_out_C(WT_v11_out_C[1]),
.WT_12_out_S(WT_v12_out_S[1]),
.WT_12_out_C(WT_v12_out_C[1]),
.WT_13_out_S(WT_v13_out_S[1]),
.WT_13_out_C(WT_v13_out_C[1])
);
endmodule
def read_mem_file(filepath):
with open(filepath, 'r') as file:
lines = file.readlines()
# 去除每行数据中的下划线并转换为十进制数
mem_vector = [int(line.strip().replace('_', ''), 2) for line in lines]
return mem_vector
def read_weight_file(filepath):
with open(filepath, 'r') as file:
lines = file.readlines()
# 读取每个权重并转换为整数
weight_vector = [int(line.strip()) for line in lines]
return weight_vector
def read_result_file(filepath):
with open(filepath, 'r') as file:
# 读取第一行并将其作为二进制数进行解释
result_binary_str = file.readline().strip()
# 将二进制数作为有符号数转换为十进制数
result_decimal = int(result_binary_str, 2)
# 如果二进制数是负数,则需要进行二补码转换
if result_decimal >= 2 ** (len(result_binary_str) - 1):
result_decimal -= 2 ** len(result_binary_str)
return result_decimal
def vector_multiplication(mem_vector, weight_vector):
# 对位乘法
product_vector = [m * w for m, w in zip(mem_vector, weight_vector)]
return product_vector
def main():
mem_vector = read_mem_file('F:/another-D/vivao/vivado_project/project_10_wallace_FSM_MUX/mem.txt')
weight_vector = read_weight_file('F:/another-D/vivao/vivado_project/project_10_wallace_FSM_MUX/weight.txt')
# 对位乘法
product_vector = vector_multiplication(mem_vector, weight_vector)
print("对位乘法结果:")
for i, product in enumerate(product_vector):
print(f"mem[{i}] * weight[{i}] = {mem_vector[i]} * {weight_vector[i]} = {product}")
# 总和
total_sum = sum(product_vector)
print(f"\n总和: {total_sum}; 二进制表示: {bin(total_sum)}")
# 读取result.txt中的二进制数并转换为十进制数
result_decimal = read_result_file('F:/another-D/vivao/vivado_project/project_10_wallace_FSM_MUX/result.txt')
print(f"\n从result.txt读取的二进制数对应的十进制值为: {result_decimal}\n")
# 对比result.txt中的值与部分和总和
if result_decimal == total_sum:
print("相同✓\n")
else:
print("不同×\n")
if __name__ == "__main__":
main()
module optimized_mux_tp_o_vc_1_value_3_cnt_0 (
input [1536 - 1:0] in, // 64-bit input signals
input [5:0] sel, // 6-bit binary selector signal
output out // Selected output
);
//参考原有逻辑,然后没得连的就连0
endmodule
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment