Commit 31aaa24a by lancer

add Xoshiro256++ random define USE_FAST_RANDOM use

parent 4ad61d9b
...@@ -1127,7 +1127,11 @@ int64_t BDD_class::BDD_infer(){ ...@@ -1127,7 +1127,11 @@ int64_t BDD_class::BDD_infer(){
// randint64_t = gen(); // randint64_t = gen();
//} //}
//test_input_data[j] = bool((randint64_t >> (zi))%2); //test_input_data[j] = bool((randint64_t >> (zi))%2);
#ifdef USE_FAST_RANDOM
test_input_data[j] = get_bit(test_input_bits, long(zj)*long(parameter_input_bit_width)+j);
#else
test_input_data[j] = test_input_bits[long(zj%parameter_test_ios)*long(parameter_input_bit_width)+j]; test_input_data[j] = test_input_bits[long(zj%parameter_test_ios)*long(parameter_input_bit_width)+j];
#endif
} }
for(int64_t j=0;j<start_depth;j++){ for(int64_t j=0;j<start_depth;j++){
test_input_data[most_influence[j]] = BDD[start_depth][test_bit].mask[most_influence[j]]; test_input_data[most_influence[j]] = BDD[start_depth][test_bit].mask[most_influence[j]];
......
#include"top.h" #include"top.h"
#include"cvt.h" #include"cvt.h"
#include <iostream>
#include <vector>
#include <omp.h>
#include <cstdint>
#include <ctime>
#include <random> // 用于 random_device
#include <chrono> // 用于高精度时间
//电路的parameter Circuit_parameter //电路的parameter Circuit_parameter
int64_t circuit_index = 9999; //电路编号 int64_t circuit_index = 9999; //电路编号
...@@ -92,13 +98,98 @@ node_index* default_start_node_index; ...@@ -92,13 +98,98 @@ node_index* default_start_node_index;
int64_t BSD_execute(int64_t start_node_number, node_index* start_node_index,int64_t variable_order_number, int64_t* variable_order); int64_t BSD_execute(int64_t start_node_number, node_index* start_node_index,int64_t variable_order_number, int64_t* variable_order);
#ifdef USE_FAST_RANDOM
// ==========================================
// 1. 高性能随机数生成器 (Xoshiro256++)
// ==========================================
struct Xoshiro256pp {
uint64_t s[4];
static inline uint64_t rotl(const uint64_t x, int k) {
return (x << k) | (x >> (64 - k));
}
Xoshiro256pp(uint64_t seed) {
// SplitMix64 初始化
for(int i = 0; i < 4; ++i) {
uint64_t z = (seed += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
s[i] = z ^ (z >> 31);
}
}
inline uint64_t next() {
const uint64_t result = rotl(s[0] + s[3], 23) + s[0];
const uint64_t t = s[1] << 17;
s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3];
s[2] ^= t; s[3] = rotl(s[3], 45);
return result;
}
void jump() {
static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c };
uint64_t s0 = 0; uint64_t s1 = 0; uint64_t s2 = 0; uint64_t s3 = 0;
for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
for(int b = 0; b < 64; b++) {
if (JUMP[i] & (1ULL << b)) {
s0 ^= s[0]; s1 ^= s[1]; s2 ^= s[2]; s3 ^= s[3];
}
next();
}
s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
}
};
// ==========================================
// 传入 uint64_t 数组,而非 bool 数组
// ==========================================
void generate_random_bits(uint64_t* packed_buffer, size_t num_uint64s) {
std::random_device rd;
auto now = std::chrono::high_resolution_clock::now().time_since_epoch().count();
uint64_t seed = (uint64_t(rd()) << 32) | rd();
seed ^= now;
Xoshiro256pp master_rng(seed);
#pragma omp parallel
{
Xoshiro256pp local_rng = master_rng;
int tid = omp_get_thread_num();
for(int k = 0; k < tid; ++k) local_rng.jump();
// 这里的循环极其简单:生成一个随机数,直接存入内存
// 不需要任何位拆分操作,速度达到内存带宽极限
#pragma omp for schedule(static)
for (size_t i = 0; i < num_uint64s; ++i) {
packed_buffer[i] = local_rng.next();
}
}
}
#endif
inline bool get_bit(const uint64_t* buffer, size_t index) {
// index / 64 找到所在的 uint64 块
// index % 64 找到块内的偏移
return (buffer[index / 64] >> (index % 64)) & 1ULL;
}
#ifdef USE_FAST_RANDOM
size_t test_total_bits = (size_t)parameter_test_ios * parameter_input_bit_width;
size_t train_total_bits = (size_t)parameter_max_samples * parameter_input_bit_width;
size_t test_uint64_total_bits = (test_total_bits + 63) / 64;
size_t train_uint64_total_bits = (train_total_bits + 63) / 64;
uint64_t* test_input_bits = new uint64_t[ test_uint64_total_bits];
uint64_t* train_input_bits = new uint64_t[train_uint64_total_bits];
#else
bool * test_input_bits = new bool [long(parameter_test_ios)*long(parameter_input_bit_width)]; bool * test_input_bits = new bool [long(parameter_test_ios)*long(parameter_input_bit_width)];
bool * train_input_bits = new bool [long(parameter_max_samples)*long(parameter_input_bit_width)]; bool * train_input_bits = new bool [long(parameter_max_samples)*long(parameter_input_bit_width)];
#endif
void set_random_train(){ void set_random_train(){
#ifdef USE_FAST_RANDOM
generate_random_bits(train_input_bits, train_uint64_total_bits);
#else
random_device rd; random_device rd;
mt19937 gen(rd()); mt19937 gen(rd());
#pragma omp parallel for #pragma omp parallel for
...@@ -107,6 +198,7 @@ void set_random_train(){ ...@@ -107,6 +198,7 @@ void set_random_train(){
for (int64_t zi=0;zi<30;zi++) for (int64_t zi=0;zi<30;zi++)
train_input_bits[j*30+zi] = bool((int(randint64_t >> (zi)))%2); train_input_bits[j*30+zi] = bool((int(randint64_t >> (zi)))%2);
} }
#endif
}; };
class BSD_features{ class BSD_features{
...@@ -189,6 +281,9 @@ void set_default(){ ...@@ -189,6 +281,9 @@ void set_default(){
lineCount ++; lineCount ++;
} }
} }
#ifdef USE_FAST_RANDOM
generate_random_bits(test_input_bits, test_uint64_total_bits);
#else
random_device rd; random_device rd;
mt19937 gen(rd()); mt19937 gen(rd());
#pragma omp parallel for #pragma omp parallel for
...@@ -197,6 +292,7 @@ void set_default(){ ...@@ -197,6 +292,7 @@ void set_default(){
for (int64_t zi=0;zi<30;zi++) for (int64_t zi=0;zi<30;zi++)
test_input_bits[j*30+zi] = bool((randint64_t >> (zi))%2); test_input_bits[j*30+zi] = bool((randint64_t >> (zi))%2);
} }
#endif
cout<<"Finish default setup"; cout<<"Finish default setup";
//io generator来自真值表,不来自写好的文件 //io generator来自真值表,不来自写好的文件
//char* truth_table_name = new char [100]; //char* truth_table_name = new char [100];
......
...@@ -76,13 +76,21 @@ int64_t BDD_class::next_bit_layer_0(int64_t depth){ ...@@ -76,13 +76,21 @@ int64_t BDD_class::next_bit_layer_0(int64_t depth){
//} //}
for (j=0;j<parameter_input_bit_width;j++){ for (j=0;j<parameter_input_bit_width;j++){
if(depth == 0){ if(depth == 0){
#ifdef USE_FAST_RANDOM
mask_input_data_order[i][j] = get_bit(train_input_bits, (i+zz*BSD_samples_influence)*parameter_input_bit_width+j);
#else
mask_input_data_order[i][j] = train_input_bits[(i+zz*BSD_samples_influence)*parameter_input_bit_width+j]; mask_input_data_order[i][j] = train_input_bits[(i+zz*BSD_samples_influence)*parameter_input_bit_width+j];
#endif
} }
else{ else{
if(has_been_unfold[j]){ if(has_been_unfold[j]){
mask_input_data_order[i][j] = BDD_mask_this[which_node_this_layer].mask[j]; mask_input_data_order[i][j] = BDD_mask_this[which_node_this_layer].mask[j];
}else{ }else{
#ifdef USE_FAST_RANDOM
mask_input_data_order[i][j] = get_bit(train_input_bits, (i+zz*BSD_samples_influence)*parameter_input_bit_width+j);
#else
mask_input_data_order[i][j] = train_input_bits[(i+zz*BSD_samples_influence)*parameter_input_bit_width+j]; mask_input_data_order[i][j] = train_input_bits[(i+zz*BSD_samples_influence)*parameter_input_bit_width+j];
#endif
} }
} }
......
...@@ -22,7 +22,11 @@ int64_t BDD_class::set_random_input_data(bool** mask_input_data){ ...@@ -22,7 +22,11 @@ int64_t BDD_class::set_random_input_data(bool** mask_input_data){
// randint64_t = gen(); // randint64_t = gen();
//} //}
//mask_input_data[i][j] = bool((randint64_t >> (zi))%2); //mask_input_data[i][j] = bool((randint64_t >> (zi))%2);
#ifdef USE_FAST_RANDOM
mask_input_data[i][j] = get_bit(train_input_bits, long(i)*long(parameter_input_bit_width)+j);
#else
mask_input_data[i][j] = train_input_bits[long(i)*long(parameter_input_bit_width)+j]; mask_input_data[i][j] = train_input_bits[long(i)*long(parameter_input_bit_width)+j];
#endif
} }
} }
......
#include "head.h" #include "head.h"
#include "io_generator/c432.h" //io_generator中需要包含对PI_WIDTH,PO_WIDTH的全局定义,如: extern const int64_t PI_WIDTH = 36; #include "io_generator/c2670.h" //io_generator中需要包含对PI_WIDTH,PO_WIDTH的全局定义,如: extern const int64_t PI_WIDTH = 36;
//#include "io_generator/rob_bsd.h" //io_generator中需要包含对PI_WIDTH,PO_WIDTH的全局定义,如: extern const int64_t PI_WIDTH = 36; //#include "io_generator/rob_bsd.h" //io_generator中需要包含对PI_WIDTH,PO_WIDTH的全局定义,如: extern const int64_t PI_WIDTH = 36;
...@@ -17,8 +17,8 @@ int64_t parameter_output_bit_width = PO_WIDTH; ...@@ -17,8 +17,8 @@ int64_t parameter_output_bit_width = PO_WIDTH;
extern const int64_t parameter_search_iterations = 10; //最大设计次数 extern const int64_t parameter_search_iterations = 10; //最大设计次数
extern const int64_t parameter_test_ios = 1000000; //测试要求多少样本 extern const int64_t parameter_test_ios = 100000000; //测试要求多少样本
extern const int64_t parameter_max_samples = 10000; //BSD每一个节点最多进行多少次采样,至少为64 extern const int64_t parameter_max_samples = 100000; //BSD每一个节点最多进行多少次采样,至少为64
extern const double parameter_early_stop_accuracy = 1; //允许的错误率,如果完全不允许,设为1; extern const double parameter_early_stop_accuracy = 1; //允许的错误率,如果完全不允许,设为1;
//没有特殊需要不要设到<1,会慢一些。 //没有特殊需要不要设到<1,会慢一些。
//0.5以下无意义,建议至少设到0.8吧. //0.5以下无意义,建议至少设到0.8吧.
...@@ -27,3 +27,4 @@ extern const int64_t parameter_io_file_lines = 2; //在sample_input.set文件 ...@@ -27,3 +27,4 @@ extern const int64_t parameter_io_file_lines = 2; //在sample_input.set文件
extern const int64_t parameter_num_threads = 64; //线程数 extern const int64_t parameter_num_threads = 64; //线程数
#define USE_FAST_RANDOM //使用高性能随机数生成器 (Xoshiro256++)生成test_input_bits和train_input_bits,尤其当parameter_test_ios达到一亿的时候,建议启用
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment