Commit c1a78705 authored by 刘劭荣(20软)'s avatar 刘劭荣(20软)

增加CUDA 均衡化

parent 70f4830b
......@@ -93,8 +93,23 @@ void freeAuxiliaryVariablesInGPUMemory()
cudaFree(d_object2D);
}
__global__ void hist(float* object2D, int* range) {
int total_dx = DRRImageSize[0];
int total_dz = DRRImageSize[1];
//Every thread calculates its own id number
long int idx = (blockIdx.x*blockDim.x) + threadIdx.x;
// This checks if the thread number is bigger than the amount of pixels
if (idx >= total_dx * total_dz)
return;
object2D[idx] = (object2D[idx] - range[0]) / (range[1] - range[0]) * 255;
}
__global__ void drrCUDA(float* object2D, cudaTextureObject_t tex_object3D)
__global__ void drrCUDA(float* object2D, cudaTextureObject_t tex_object3D, int* hist)
{
float stepInX[3] = { d_DRR_Parameters[0],d_DRR_Parameters[1],d_DRR_Parameters[2] };
......@@ -353,18 +368,37 @@ __global__ void drrCUDA(float* object2D, cudaTextureObject_t tex_object3D)
}
} //end of the while-loop
float pixval = d12;
float pixval = 255.0-d12;
if (pixval < 0)
pixval = 0;
pixval = 255.;
if (pixval > 255)
pixval = 255;
pixval = 0.;
int tmp = (int)pixval;
atomicAdd(hist + tmp, 1);
//Assign the calculated value for the pixel to its corresponding position in the output array
object2D[idx] = pixval;
}
__global__ void cal_hist(int* hist, int* range) {
range[0] = 0;
range[1] = 0;
for (int i = 0; i < 256; i++) {
if (hist[i] != 0) {
range[0] = i;
break;
}
}
for (int i = 255; i >= 0; i--) {
if (hist[i] != 0) {
range[1] = i;
break;
}
}
}
float* calculateDRRwithCUDA(CUDAParamerters CUDA_Parameters, DRRParameters DRR_Parameters)
{
......@@ -376,9 +410,23 @@ float* calculateDRRwithCUDA(CUDAParamerters CUDA_Parameters, DRRParameters DRR_P
//------------------------------------------------------------
//Launching the threads
drrCUDA << < num_Blocks, num_Threads >> > (d_object2D, tex_object3D);
//------------------------------------------------------------
int cpu_hist[256] = { 0 };
int* gpu_hist;
cudaMalloc((int**)&gpu_hist, 256 * sizeof(int));
cudaMemcpy(gpu_hist, cpu_hist, 256 * sizeof(int), cudaMemcpyHostToDevice);
drrCUDA << < num_Blocks, num_Threads >> > (d_object2D, tex_object3D, gpu_hist);
cudaMemcpy(cpu_hist, gpu_hist, 256 * sizeof(int), cudaMemcpyDeviceToHost);
int* gpu_range;
cudaMalloc((int**)&gpu_range, 2 * sizeof(int));
cal_hist << <1, 1 >> > (gpu_hist, gpu_range);
hist << <num_Blocks, num_Threads >> > (d_object2D, gpu_range);
//------------------------------------------------------------
cudaFree(gpu_hist);
cudaFree(gpu_range);
//Copying the result from the calculations from device to host
long int vectorSize = (int)DRR_Parameters.size[0] * (int)DRR_Parameters.size[1];
float* h_object2D = (float*)malloc(sizeof(float)*vectorSize);
......
......@@ -236,13 +236,16 @@ float* drr::cudaDRR(float rx, float ry, float rz, int dx, int dy, float threshol
image3d.SizeCT[2] = sizeCT[2];
CUDAParamerters cudaPara;
cudaPara.numThreads = 1024;
cudaPara.numBlocks = (int)ceil((float)output_size[0] * output_size[1] / 1024);
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
cudaPara.numThreads = prop.maxThreadsPerBlock;
cudaPara.numBlocks = (int)ceil((float)output_size[0] * output_size[1] / cudaPara.numThreads);
loadOuputVariablesInGPUMemory((int)output_size[0],(int)output_size[1]);
float* object2d = calculateDRRwithCUDA(cudaPara, para);
freeAuxiliaryVariablesInGPUMemory();
return object2d;
......
......@@ -6,6 +6,11 @@
#include <vector>
#include <ctime>
#include <omp.h>
#include "device_functions.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "itkImage.h"
#include "itkImageFileReader.h"
#include "itkImageFileWriter.h"
......
......@@ -18,7 +18,6 @@ void socketManager::get_CUDAdrr(const char * path)
{
if (rd != NULL) {
// 析构 释放GPU内存
rd->~drr();
rd = NULL;
}
rd = new drr(path);
......
This diff is collapsed.
This diff is collapsed.
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\cmath
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\concurrencysal.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\crtdefs.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\cstdlib
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\limits.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\sal.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\use_ansi.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vadefs.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime_new.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime_new_debug.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime_string.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\xkeycheck.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\xtgmath.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\xtr1common
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\yvals.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\yvals_core.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_malloc.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_math.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_memcpy_s.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_memory.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_search.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_stdio_config.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wstdio.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wstdlib.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wstring.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wtime.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\crtdbg.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\errno.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\math.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\stddef.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\stdio.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\stdlib.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\string.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\time.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\builtin_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\channel_descriptor.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\common_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_double_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_double_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\host_config.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\host_defines.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\math_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\math_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\sm_70_rt.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\sm_80_rt.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_device_runtime_api.h
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include\cuda_runtime.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_runtime_api.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_surface_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_texture_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\device_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\device_launch_parameters.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\device_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\driver_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\driver_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\library_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_20_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_20_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_30_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_32_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_32_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_35_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_35_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_60_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_61_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\surface_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\surface_indirect_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\surface_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\texture_fetch_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\texture_indirect_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\texture_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\vector_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\vector_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\vector_types.h
D:\QT\cuda-learning\cuda-learning\cuda-drr-socket\kernel.cu
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\cmath
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\concurrencysal.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\crtdefs.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\cstdlib
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\limits.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\sal.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\use_ansi.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vadefs.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime_new.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime_new_debug.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\vcruntime_string.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\xkeycheck.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\xtgmath.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\xtr1common
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\yvals.h
C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Tools\MSVC\14.16.27023\include\yvals_core.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_malloc.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_math.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_memcpy_s.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_memory.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_search.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wstdlib.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wstring.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\corecrt_wtime.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\crtdbg.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\errno.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\math.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\stddef.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\stdlib.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\string.h
C:\Program Files (x86)\Windows Kits\10\Include\10.0.17134.0\ucrt\time.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\builtin_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\channel_descriptor.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\common_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_double_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_double_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\device_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\host_config.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\host_defines.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\math_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\math_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\sm_70_rt.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\crt\sm_80_rt.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_device_runtime_api.h
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include\cuda_runtime.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_runtime_api.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_surface_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\cuda_texture_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\device_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\device_launch_parameters.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\device_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\driver_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\driver_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\library_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_20_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_20_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_30_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_32_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_32_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_35_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_35_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_60_atomic_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\sm_61_intrinsics.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\surface_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\surface_indirect_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\surface_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\texture_fetch_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\texture_indirect_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\texture_types.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\vector_functions.h
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\vector_functions.hpp
c:\program files\nvidia gpu computing toolkit\cuda\v11.0\include\vector_types.h
D:\QT\cuda-learning\cuda-learning\cuda-drr-socket\cudaEqualization.cu
d:\qt\cuda-learning\cuda-learning\cuda-drr-socket\cudaEqualization.cuh
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment