在cuda的julia集中ptr[offset函数*4+0]什么意思啊

点击联系发帖人 时间：2015-03-16 01:11

offset

GPU实现julia集曲线 - CSDN博客
&&&&&&&& julia集是满足某个复数计算函数的所有点构成的边界。对于函数参数的所有取值，生成的边界将形成一种不规则的碎片形状，这是数学中最有趣和最漂亮的形状之一。
&&&&&&&& 生成julia的算法非常简单，Julia集的基本算法是,通过一个简单的迭代等式对复平面中的点求值，如果在计算某个点时，迭代等式的计算结果是发散的，那么这个点就不属于Julia集合，相反，如果在迭代等式中计算得到的一系列值都位于某个边界范围之内，那么这个点就属于Julia集合。迭代等式为：
&&&&&& Z(n+1)=Z(n)*Z(n)+C
基于GPU的Julia集的算法如下：
&&&&&&&&&&&
#include&stdlib.h&
#include&stdio.h&
#include&string.h&
#include&math.h&
#include&cutil.h&
#include&cpu_bitmap.h&// 该头文件打不开
#define DIM 128
//基于GPU的Julia集
/*__device__修饰符表示代码将在GPU上而不是主机上运行。只能从其他__device__
函数或者从__gloabl__函数中调用它们。
struct cuComplex
&&&&& &cuComplex(float a,float b):r(a),i(b){}
&&&&&&//成员函数，模的平方
&&&&& __device__ float magnitude2(void)
&&&& {&&&&&&&&return r*r+i*i;&&&& }
&&&&&&&& //成员函数，复数乘法的重载
&&& __device__ cuComplex operator*(const cuComplex&a)
& &&& {&&&&&&&&&&return cuComplex(r*a.r-i*a.i,i*a.i+r*a.i);&&&&&}
//成员函数，复数加法的重载
&&&& __device__ cuComplex operator+(const cuComplex& a)
&&& {&&&&&&&& return cuComplex(r=a.r,i+a.i);&&&&&}
//函数功能，将像素转换为得数
__device__ int julia(int x,int y)
&&& const float scale=1.5;//放大系数
&&& float jx=scale*(float)(DIM/2-x)/(DIM/2);
&&& float jy=scale*(float)(DIM/2-y)/(DIM/2);
&& &cuComplex c(-0.8,0.156);//迭代函数中常量的定义
&&& cuComplex a(jx,jy);//迭代函数中Z变量的初值
&&& int i=0;
&&& for(i=0;i&200;i++)
&&&& &a=a*a+c;
&&&&& &if(a.magnitude2()&1000)//模的平方大于1000的话，则说明发散，不属于julia集
&&&&&& &return 0;
&&& return 1;//Z属于julia集，则返加1
/*第一步：将kernel声明为一个__global__ 类型的函数。线程格每一维的
大小与图像每一维大小是相等的，因些在（0，0）
和（DIM-1，DIM-1）之间的每个像素点都能获得一个线程块
第二步：得到输出缓冲区ptr中的线性偏移，这个偏移是通中另一个
内置变量girdDim来计算的，对所有的线程块来说，gidDim是一个常数，
用来保存线程格第一维的大小.在示例中，gridDim的值是(DIM,DIM), 因此，
将行索引乘以线程格的宽，再加上列索引，就得到了ptr中的唯一索引，
其取值范围为（DIM*DIM-1）。
最后分析判断某个点是否属于Julia集的代码。
__global__ void kernel(unsigned char *ptr)
&& //将threadIdx/BlockIdx映身到像素位置
&& &int x=blockIdx.x;
&&& int y=blockIdx.y;
&& &int offset=x+y*gridDim.x;
&& &//计算这个位置上的值
&&& int juliaValue=julia(x,y);
&&&& ptr[offset*4+0]=255*juliaV
&&&& ptr[offset*4+1]=0;
&&& &ptr[offset*4+2]=0;
&&&& ptr[offset*4+3]=255;
int main(void)
&&&&&& CPUBitmap bitmap(DIM,DIM);//定义位图对象
&&&&&& unsigned char *dev_//用来保存设备上数据的副本。
&&&&& &//申请内存空间
&&&&&& HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap,bitmap.image_size()));
&&&&&& dim3 grid(DIM,DIM);//声明二维的线程格，每个线程都要执行kernel函数的一个副本。
&&&&&& kernel&&&grid,1&&&(dev_bitmap);//将dim3的变量grid传递给CUDA运行时
&&&&& //将处理结果复制回主机，复制方向中指定为从设备到主机
&&&& &HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(),dev_bitmap,bitmap.image_size(),cudaMemcpyDeviceToHost));
&&&&& bitmap.display_and_exit();
&&&& &cudaFree(dev_bitmap);
&&&&&&&& 程序写完了，但在编译时，由于不认cpu_bitmap头文件，所以本程序没能运行。#include &cuda_runtime.h&
#include &osg/Image&
const int DIM = 1024;
typedef struct cuComplex
__device__ cuComplex(float a, float b)
__device__ float magnitude2(void)
return r * r + i *
__device__ cuComplex operator * (const cuComplex & a)
return cuComplex(r * a.r - i * a.i, i * a.r + r * a.i);
__device__ cuComplex operator + (const cuComplex & a)
return cuComplex(r + a.r, i + a.i);
__device__ int julia(int x, int y)
const float scale = 1.5;
float jx = scale * (float)(DIM/2 - x)/(DIM/2);
float jy = scale * (float)(DIM/2 -y)/(DIM/2);
cuComplex c(-0.8, 0.156);
cuComplex a(jx, jy);
for (int i = 0; i & 200; ++i)
a = a * a +
if (a.magnitude2() & 1024)
__global__ void kernel(unsigned char * ptr)
int x = blockIdx.x;
int y = blockIdx.y;
int offset = x + y * gridDim.x;
int juliaValue = julia(x, y);
ptr[offset * 4 + 0] = 255 * juliaV
ptr[offset * 4 + 1] = 0;
ptr[offset * 4 + 2] = 0;
ptr[offset * 4 + 3] = 255;
extern "C" void SetUp(osg::Image * image)
unsigned char * dev_
cudaMalloc((void **)&dev_bitmap, DIM * DIM * 4);
dim3 grid(DIM, DIM);
kernel&&&grid, 1&&&(dev_bitmap);
cudaMemcpy(image-&data(), dev_bitmap, DIM * DIM * 4, cudaMemcpyDeviceToHost);
cudaFree(dev_bitmap);
#include &osgViewer/Viewer&
#include &osg/Texture2D&
#include &osg/Image&
#include &osgDB/WriteFile&
#include &osgViewer/ViewerEventHandlers&
#pragma comment(lib, "osgViewerd.lib")
#pragma comment(lib, "osgDBd.lib")
#pragma comment(lib, "osgd.lib")
#pragma comment(lib, "osgGAd.lib")
const int DIM = 1024;
osg::ref_ptr&osg::Geode& CreateQuad()
osg::ref_ptr&osg::Geode& geode = new osg::G
osg::ref_ptr&osg::Geometry& geometry = new osg::G
osg::ref_ptr&osg::Vec3Array& vArray = new osg::Vec3A
osg::ref_ptr&osg::Vec2Array& tArray = new osg::Vec2A
osg::ref_ptr&osg::Vec3Array& nArray = new osg::Vec3A
vArray-&push_back(osg::Vec3(-1.0, 0.0, -1.0));
vArray-&push_back(osg::Vec3(1.0, 0.0, -1.0));
vArray-&push_back(osg::Vec3(1.0, 0.0, 1.0));
vArray-&push_back(osg::Vec3(-1.0, 0.0, 1.0));
tArray-&push_back(osg::Vec2(0.0, 0.0));
tArray-&push_back(osg::Vec2(1.0, 0.0));
tArray-&push_back(osg::Vec2(1.0, 1.0));
tArray-&push_back(osg::Vec2(0.0, 1.0));
nArray-&push_back(osg::Vec3(0.0, 1.0, 0.0));
geometry-&setVertexArray(vArray.get());
geometry-&setTexCoordArray(0, tArray.get());
geometry-&setNormalArray(nArray.get());
geometry-&setNormalBinding(osg::Geometry::BIND_OVERALL);
geometry-&addPrimitiveSet(new osg::DrawArrays(osg::PrimitiveSet::QUADS, 0, vArray-&size()));
geode-&addDrawable(geometry.get());
return geode.get();
osg::ref_ptr&osg::StateSet& CreateStateSet(osg::Image * image)
osg::ref_ptr&osg::StateSet& stateSet = new osg::StateS
osg::ref_ptr&osg::Texture2D& texture = new osg::Texture2D();
texture-&setImage(image);
stateSet-&setTextureAttributeAndModes(0, texture.get(), osg::StateAttribute::ON);
return stateSet.get();
extern "C" void SetUp(osg::Image * image);
int main()
osg::ref_ptr&osgViewer::Viewer& viewer = new osgViewer::V
osg::ref_ptr&osg::Image& image = new osg::I
image-&allocateImage(DIM, DIM, 1, GL_RGBA, GL_UNSIGNED_BYTE);
SetUp(image.get());
osg::ref_ptr&osg::StateSet& stateSet = CreateStateSet(image.get());
osg::ref_ptr&osg::Geode& quad = CreateQuad();
quad-&setStateSet(stateSet.get());
viewer-&setSceneData(quad.get());
viewer-&getCamera()-&getOrCreateStateSet()-&setMode(GL_LIGHTING, osg::StateAttribute::OFF);
viewer-&addEventHandler(new osgViewer::StatsHandler);
viewer-&setUpViewInWindow(35, 35, 1024, 800);
viewer-&run();
阅读(...) 评论()CUDA高性能GPU计算Julia集代码 - 下载频道
- CSDN.NET
&&&&CUDA高性能GPU计算Julia集代码
CUDA高性能GPU计算Julia集代码
CUDA高性能GPU计算Julia集代码，可运行CUDA 7.0编译的
若举报审核通过，可奖励20下载分
被举报人：
joey_berlin
举报的资源分：
请选择类型
资源无法下载
资源无法使用
标题与实际内容不符
含有危害国家安全内容
含有反动色情等内容
含广告内容
版权问题，侵犯个人或公司的版权
*详细原因：
您可能还需要
开发技术下载排行《GPU高性能编程CUDA实战中文》中第四章的julia实验
时间： 18:58:22
&&&& 阅读：205
&&&& 评论：
&&&& 收藏：0
标签：&&&&&&&&&&&&&&&&&&&&&&&&&&&在整个过程中出现了各种问题，我先将我调试好的真个项目打包，提供。
2 * Copyright
NVIDIA Corporation.
All rights reserved.
4 * NVIDIA Corporation and its licensors retain all intellectual property and
5 * proprietary rights in and to this software and related documentation.
6 * Any use, reproduction, disclosure, or distribution of this software
7 * and related documentation without an express license agreement from
8 * NVIDIA Corporation is strictly prohibited.
10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
11 * associated with this source code for terms and conditions that govern
12 * your use of this NVIDIA software.
16 #include &GL\glut.h&
17 #include "cuda.h"
18 #include "cuda_runtime.h"
19 #include "device_launch_parameters.h"
20 #include "../common/book.h"
21 #include "../common/cpu_bitmap.h"
23 #define DIM 1000
25 struct cuComplex {
__device__ cuComplex(float a, float b) : r(a), i(b)
__device__ float magnitude2(void) {
return r * r + i *
__device__ cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
__device__ cuComplex operator+(const cuComplex& a) {
return cuComplex(r + a.r, i + a.i);
40 __device__ int julia(int x, int y) {
const float scale = 1.5;
float jx = scale * (float)(DIM / 2 - x) / (DIM / 2);
float jy = scale * (float)(DIM / 2 - y) / (DIM / 2);
cuComplex c(-0.8, 0.156);
cuComplex a(jx, jy);
int i = 0;
for (i = 0; i&200; i++) {
a = a * a +
if (a.magnitude2() & 1000)
58 __global__ void kernel(unsigned char *ptr) {
// map from blockIdx to pixel position
int x = blockIdx.x;
int y = blockIdx.y;
int offset = x + y * gridDim.x;
// now calculate the value at that position
int juliaValue = julia(x, y);
ptr[offset * 4 + 0] = 255 * juliaV
ptr[offset * 4 + 1] = 0;
ptr[offset * 4 + 2] = 0;
ptr[offset * 4 + 3] = 255;
72 // globals needed by the update routine
73 struct DataBlock {
unsigned char
77 int main(void) {
CPUBitmap bitmap(DIM, DIM, &data);
unsigned char
HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, bitmap.image_size()));
data.dev_bitmap = dev_
grid(DIM, DIM);
kernel && &grid, 1 && &(dev_bitmap);
HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap,
bitmap.image_size(),
cudaMemcpyDeviceToHost));
HANDLE_ERROR(cudaFree(dev_bitmap));
bitmap.display_and_exit();
&期间出现的问题：
calling a host function("cuComplex::cuComplex") from a __device__/__global__ function("julia") is not allowed
calling a host function("cuComplex::cuComplex") from a __device__/__global__ function("julia") is not allowed
calling a host function("cuComplex::cuComplex") from a __device__/__global__ function("cuComplex::operator *") is not allowed
calling a host function("cuComplex::cuComplex") from a __device__/__global__ function("cuComplex::operator +") is not allowed
这个原因是在原著中提供的代码有问题，原著中结构体中的代码为
cuComplex(float a, float b) : r(a), i(b)
&将其修改如下即可：
__device__ cuComplex(float a, float b) : r(a), i(b)
error LNK2019: 无法解析的外部符号 ___glutInitWithExit@12，该符号在函数 _glutInit_ATEXIT_HACK@8 中被引用 1&GEARS.obj : error LNK2019: 无法解析的外部符号 ___gl
这个原因是我的OpenGL文件没有引对
#include &GL\glut.h&
其中glut.h文件要在下面的路径下
C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\include\GL
如果GL文件夹不在，要手动创建，结构如下图所示：
为了运行示例代码，需要抽取可运行的部分，同时为了减少手动修改的麻烦，也要注意各各个文件目录的层次关系，我的截图如下：
千辛万苦走下来就为了下面这张图：&
确实挺好看的。赞一个！
&标签：&&&&&&&&&&&&&&&&&&&&&&&&&&&
哥们你QQ多少？我在第四章遇到点小问题卡的走不动了&我QQ。。。。
&&&& &&&&&&
&& && && &&
版权所有鲁ICP备号-4
打开技术之扣，分享程序人生！3225人阅读
最近在学习OpenGL，过程中需要使用CUDA进行并行计算。因此，需要解决OpenGL与CUDA的交互问题。学习记录如下：
Step1. 共享数据区
想到交互，不难想到通信，数据共享等词语。这里使用的是共享数据的方式来完成OpenGL与CUDA的交互。而OpenGL与CUDA都有着自己独特的类型定义。因此，对于共享的数据区，我们需要给它起两个不同的名字，分别为OpenGL以及CUDA服务
GLuint bufferO
cudaGraphicsResource *
Step2.将显卡设备与OpenGL关联（已废除）
注：在CUDA5.0版本以及以后的版本，不需要此操作。参考NVIDIA官方文档如下：
&cudaGLSetGLDevice
( int&&device&)
function is deprecated as of CUDA 5.0.This function is deprecated and should no longer be used. It is no longer necessary to associate a CUDA device with an
OpenGL context in order to achieve maximum interoperability performance.
具体的设置代码为：
cudaDeviceP
memset(&prop, 0, sizeof(cudaDeviceProp));
prop.major = 1;
prop.minor = 0;
cudaChooseDevice(&dev, &prop);
cudaGLSetGLDevice(dev);
Step3. 初始化OpenGL
#define DIM 512
glutInit(argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
glutInitWindowSize(DIM, DIM);
glutCreateWindow(&bitmap&);
glewInit();
这里需要注意的是：
需要使用opengl扩展库：glew32.dll。若您没有glew32扩展库， &(其安装方式与glut，freeglut等相同)（重要）需要在opengl初始化代码最后加上：glewInit()，否则会在后面执行到glGenBuffers报运行时错误：0xC0000005: Access violation.使用glew库需要： #include &gl/glew.h&，且其声明的位置尽量放在代码最顶端，否则编译报错。具体示例代码，
—————————————————— & 华丽的分割线 ————————————————————
到此为止，基本的准备工作就完成了。下面开始实际的工作。
共享数据缓冲区是在CUDA C核函数和 OpenGL渲染操作之间实现互操作的关键部分。为了实现两者之间的数据传递，我们首先需要创建一个缓冲区。
Step4. 使用OpenGL API创建数据缓冲区
const GLubyte*
a = glGetString(GL_EXTENSIONS);
glGenBuffers(1, &bufferObj);//生成一个缓冲区句柄
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);//将句柄绑定到像素缓冲区（即缓冲区存放的数据类型为：PBO）
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, DIM*DIM*4, NULL, GL_DYNAMIC_DRAW_ARB);//申请内存空间并设置相关属性以及初始值
这里，我们使用如下枚举值：GL_PIXEL_UNPACK_BUFFER_ARB表示指定缓冲区存储的内容是一个Pixel Buffer Object（PBO）
GL_DYNAMIC_DRAW_ARB表示应用程序将会对缓冲区进行修改
这时，可能你会疑问，前两句代码是干嘛的？
因为，GL_PIXEL_UNPACK_BUFFER_ARB对glew扩展库的版本有要求，所以最好检查一下当前环境是否支持GL_PIXEL_UNPACK_BUFFER_ARB枚举值。
可以看到，我的环境是支持GL_ARB_pixel_buffer_object的，如果您的环境不支持该枚举值，可能需要您更新glew扩展库版本。
Step5. 把缓冲区分享给CUDA
由于我们的目的是要使用CUDA的并行计算能力，所以CUDA必须要有权利访问共享数据缓冲区。
要实现该操作，需要将缓冲区句柄注册为一个图形资源，即Graphics Resource；然后“分享给CUDA”
cudaGraphicsGLRegisterBuffer(&resource, bufferObj, cudaGraphicsMapFlagsNone)
代码中的resource即之前定义的：
cudaGraphicsResource *
方法cudaGraphicsGLRegisterBuffer的参数3表示缓冲区属性，它有以下三种可选值：
Specifies no hints about how this resource will be used. It is therefore assumed that this resource will be read from and written to by CUDA. This is the default value.
Specifies that CUDA will not write to this resource.（只读）
Specifies that CUDA will not read from this resource and will write over the entire contents of the resource, so none of the data previously stored in the resource will be preserved.（只写）
Step6. 让CUDA映射共享资源，并获取相对于显卡而言的设备指针
uchar4* devP
cudaGraphicsMapResources(1, &resource, NULL);
cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &size, resource);
CUDA官方文档中这样描述：CUDA在访问图形接口（比如openGL）的共享资源之前，需要首先对其进行映射(map)，然后才可以访问共享数据区，CUDA对资源的访问过程中，OpenGL不能对该数据区其进行任何操作，直到CUDA对数据区解除映射（unmap）为止。
Nvidia的原文描述如下：
Map graphics resources for access by CUDA. Maps the&count&graphics resources in&resources&for
access by CUDA.
The resources in&resources&may be accessed by CUDA until they are unmapped. The graphics
API from which&resources&were registered should not access any resources while they
are mapped by CUDA. If an application does so, the results are undefined.
映射完成后，我们需要获得缓冲区对于显卡（设备）而言的指针，即代码中的 devPtr。没有设备指针，我们怎么进行并行计算呢。
Step7. 执行CUDA核函数
dim3 grids(DIM/16, DIM/16);
dim3 threads(16, 16);
kernel_opengl&&&grids, threads&&&(devPtr);
一个简单的核函数kernel_opengl的定义如下：
__global__ void kernel_opengl(uchar4* ptr){
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin(abs(fx*100) - abs(fy*100));
ptr[offset].x = 0;
ptr[offset].y =
ptr[offset].z = 0;
ptr[offset].w = 255;
此时，执行完核函数，CUDA的使命也就完成了。它的产出就是：缓冲区的数据已经被更新了~~
Step8. 解除CUDA对共享缓冲区的映射
cudaGraphicsUnmapResources(1, &resource, NULL)
如果不解除映射，那么OpenGL将没有权限访问共享数据区，因此也就没有办法完成图像的渲染显示了。
Step9. 调用OpenGL API显示
glutKeyboardFunc(key_func);
glutDisplayFunc(draw_func);
glutMainLoop();
其中，显示回调函数为：
static void draw_func(void){
glDrawPixels(DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glutSwapBuffers();
}乍一看，可能感觉会比较奇怪。因为draw_func里面没有使用到缓冲区句柄bufferObj，那么数据如何会显示呢？
因为，之前的代码：
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj);该调用将共享缓冲区指定为一个像素源，OpenGL驱动程序随后会在所有对glDrawPixels()的调用中使用这个像素源。这也就说明了我们为什么使用glDrawPixels来绘制图形了。
通过查看glDrawPixels的文档：
void glDrawPixels(
GLsizei width,
GLsizei height,
GLenum format,
GLenum type,
const GLvoid *pixels
);不难发现其最后一个参数为一个缓冲区指针。如果没有任何缓冲区被指定为GL_PIXEL_UNPACK_BUFFER_ARB源，那么OpenGL将从这个参数指定的缓冲区进行数据复制并显示。但在本例中，我们已经将共享数据缓冲区指定为GL_PIXEL_UNPACK_BUFFER_ARB。此时，该参数含义将变为：已绑定缓冲区内的偏移量，由于我们要绘制整个缓冲区，因此这便宜量就是0.
—————————————————————— 华丽的分割线 ————————————————
最终，运行程序，得到指定的结果。
最后，给出
感谢您的阅读~~
* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
访问：284691次
积分：2326
积分：2326
排名：第7821名
原创：23篇
评论：232条
(2)(1)(1)(9)(4)(7)(2)}

杰西卡呢吗信息网

在cuda的julia集中ptr[offset函数*4+0]什么意思啊

我要回帖

更多关于 offset 的文章

更多推荐