CUDA 二维数组操作 30

__global__voidkenrel_doublearray(float**_GPU_IN,float**_GPU_RET){//计算线程和数组的映射关系intx=b... __global__ void kenrel_doublearray(float** _GPU_IN , float** _GPU_RET)
{
//计算线程和数组的映射关系
int x = blockIdx.x*blockDim.x + threadIdx.x ;
int y = blockIdx.y*blockDim.y + threadIdx.y ;
_GPU_IN[y][x] = _GPU_IN[y][x]*4;
}
//DoubleArray()
int main()
{
// 方法2.整体拷贝
float **CPU_ORIGN, **CPU_RET; // host端原数据、拷贝回数据
float **GPU, **GPU_RET; // device端数据
int width = 5, height = 3; // 数组的宽度和高度
size_t size = sizeof(float)*width; // 数据的宽度in bytes
size_t pitch;
size_t pitch2;
int row, col;
// 申请内存空间，并初始化
CPU_ORIGN = new float*[height];
CPU_RET = new float*[height];
for(row = 0; row < height; ++row)
{
CPU_ORIGN[row] = new float[width];
CPU_RET[row] = new float[width];
// 初始化数据
for(col = 0; col < width; ++col)
{
CPU_ORIGN[row][col] = (float)(row + col);
CPU_RET[row][col] = 0.0f;
}
}
cudaMallocPitch((void**)&GPU, &pitch, size, height);
cudaMemset2D(GPU, pitch, 0, size, height);
cudaMallocPitch((void**)&GPU_RET, &pitch2, size, height);
cudaMemset2D(GPU_RET, pitch2, 0, size, height);
// 将host端原数据拷贝到device端
cudaMemcpy2D(GPU, pitch, CPU_ORIGN, size, size, height, cudaMemcpyHostToDevice);
cudaMemcpy2D(GPU_RET, pitch2, CPU_ORIGN, size, size, height, cudaMemcpyHostToDevice);
dim3 _block(1,1);
dim3 _grid(width , height);
kenrel_doublearray<<<_grid , _block>>>(GPU , GPU_RET);
// 将device端数据拷贝到host端返回数据
cudaMemcpy2D(CPU_RET, size, GPU_RET, pitch2, size, height, cudaMemcpyDeviceToHost);
}
}
最后数据从设备端拷贝到主机端失败展开

 我来答

1个回答

#热议# 在购买新能源车时，要注意哪些？

Abstract_cai
2013-11-06

知道答主

回答量：19

采纳率：0%

帮助的人：10.1万

我也去答题访问个人页

关注

展开全部

还是你的意思是你的最后一行代码执行时会报错？还是数值不对？
内核函数最后操作_GPU_IN[y][x] = _GPU_IN[y][x]*4; 整个没有用到_GPU_RET。

我不明白你程序的目的。

追问

应该是_GPU_RET[x] = _GPU_IN[y][x]*4,执行到最后一行就直接跳出了，
g:/CUDA编程/Project3/selfStrcut/selfStruct.cu(200) : cudaSafeCall() Runtime API error 30: unknown error.
线程 'Win32 线程' (0x148) 已退出，返回值为 -1 (0xffffffff)。

已赞过 已踩过<

评论收起

推荐律师服务：若未解决您的问题，请您详细描述您的问题，通过百度律临进行免费专业咨询

CUDA 二维数组操作 30

其他类似问题

为你推荐：