Commit 75c7d05f by lvzhengyang

enlarge the matrix size

parent b693f659
#include "test.h"
#include "device_launch_parameters.h"
#include "cuda_runtime.h"
#include <ctime>
#include "chrono"
using namespace std::chrono;
template <typename T>
void printMat(const T &mat,
......@@ -29,9 +33,9 @@ int main(void) {
std::cout << "Max Threads Batch Per MultiProcessor: | " << devProp.maxThreadsPerMultiProcessor / 32 << std::endl;
std::cout << "---------------------------------------------------------" << std::endl;
int m = 4;
int k = 4;
int n = 4;
int m = 1000;
int k = 600;
int n = 1000;
int nBytes_a = m * k * sizeof(float);
int nBytes_b = k * n * sizeof(float);
int nBytes_c = m * n * sizeof(float);
......@@ -46,14 +50,14 @@ int main(void) {
mat_c = (float *)malloc(nBytes_c);
for (int i = 0; i < m * k; i++) {
mat_a[i] = i;
mat_a[i] = 10;
}
printMat(mat_a, m, k);
// printMat(mat_a, m, k);
for (int i = 0; i < k * n; i++) {
mat_b[i] = i;
mat_b[i] = 10;
}
printMat(mat_b, k, n);
// printMat(mat_b, k, n);
for (int i = 0; i < m * n; i++) {
mat_c[i] = 0.0;
......@@ -64,6 +68,8 @@ int main(void) {
cudaMalloc((void **)&d_b, nBytes_b);
cudaMalloc((void **)&d_c, nBytes_c);
auto start = system_clock::now();
cudaMemcpy((void *)d_a, (void *)mat_a, nBytes_a, cudaMemcpyHostToDevice);
cudaMemcpy((void *)d_b, (void *)mat_b, nBytes_b, cudaMemcpyHostToDevice);
cudaMemcpy((void *)d_c, (void *)mat_c, nBytes_c, cudaMemcpyHostToDevice);
......@@ -78,15 +84,24 @@ int main(void) {
if (cudaStatus != cudaSuccess) {
std::cout << "cudaError: " << cudaGetErrorString(cudaStatus) << std::endl;
}
cudaMemcpy((void *)mat_c, (void *)d_c, nBytes_c, cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
printMat(mat_c, m, n);
auto end = system_clock::now();
auto duration = duration_cast<microseconds>(end - start);
std::cout << "GPU time: "
<< double(duration.count()) * microseconds::period::num / microseconds::period::den << " seconds" << std::endl;
// printMat(mat_c, m, n);
/*
float maxError = 0.0;
for (int i = 0; i < m * n; i++) {
maxError = fmax(maxError, fabs(*(mat_c + i) - 100.0));
}
std::cout << "Max Error: " << maxError << std::endl;
*/
cudaFree(d_a);
cudaFree(d_b);
......@@ -96,7 +111,7 @@ int main(void) {
free(mat_b);
free(mat_c);
test<<<1, 1>>>();
std::cout << "Hello CUDA!" << std::endl;
// test<<<1, 1>>>();
// std::cout << "Hello CUDA!" << std::endl;
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment