初试PyOpenGL四 (Python+OpenGL)GPU粒子系统与基本碰撞

　　这篇相当于是对前三篇的总结，基本效果如下：

　　在初试PyOpenGL一 (Python+OpenGL)讲解Pyopengl环境搭建，网格，球体，第一与第三人称摄像机的实现。在初试PyOpenGL二 (Python+OpenGL)基本地形生成与高度检测里以用高程图生成地形以及以球体做三人称漫游。初试PyOpenGL三 (Python+OpenGL)GPGPU基本运算与乒乓技术里实现了基本的GPGPU运算。

　　我认为比较完善的GPU粒子系统应该如下，粒子初始化可以放在CPU里，但是相关数据运算首先要放在GPU里，并且运算后的数据也应该放在显存里，而不是内存里。故用第三篇实现GPU粒子系统不满足，因为他数据是存放在纹理中，要放入VBO里，必需先读取经过内存,然后存放入显存里，这里虽然运算是放入GPU了，但是数据要经过显存-内存-显存的过程，产生不必要的消耗，并且，因为数据是存放在纹理的像素里，故限定在片断着色器中，这二个限制导致第三篇里的内容不能用来实现GPU粒子系统，而是用来实现一些需要结合CPU与GPU结合处理的运算。

　　在这里，我们采用OpenGL 里的Transform Feedback,和第三篇采用FBO结合浮点纹理不同，Transform Feedback简单来说，传入一个VBO，经过GPU运算后，放入另一个VBO中，注意二点，操作都是针对VBO，也就是针对显存，故不需要经过CPU与内存，还有一点就是在Transform Feedback里，一个缓存不能同时作为输入和输出。

　　首先来看一下简单的例子介绍Transform Feedback的基本应用，首先指出一点，GLSL3.0与GLSL4.0的Transform Feedback写法有些区别，手上分别有支持3.0与4.0的显示，但是为了更好的兼容性，选择3.0的写法，相应代码和着色器代码如下：

 tf_v = """

         #version

         in float inValue;

         out float outValue;

         out float out2;

         void main() {

             outValue = inValue+3.0;

             out2 = 1.0;

         }"""

简单变换反馈的着色器

         this.tfProgram = glCreateProgram()

         this.tfProgram = ShaderProgram(this.tfProgram)

         tfvshader = shaders.compileShader(tf_v,GL_VERTEX_SHADER)

         glAttachShader(this.tfProgram,tfvshader)

         LP_LP_c_char = POINTER(POINTER(c_char))

         ptrs = (c_char_p * 2)('outValue', 'out2')

         print ptrs,len(ptrs)

         c_array = cast(ptrs, LP_LP_c_char)

         glTransformFeedbackVaryings(this.tfProgram, len(ptrs), c_array, GL_INTERLEAVED_ATTRIBS)

         glLinkProgram(this.tfProgram)

         this.tfProgram.invalue = glGetAttribLocation(this.tfProgram,"inValue")

着色器基本参数设置

 class transformFeedback(common):

     def __init__(this,pro):

         data = [1.0, 2.0, 3.0, 4.0, 5.0]

         data1 = [1.0] * 5

         this.vbo = vbo.VBO(ny.array(data,'f'))

         this.tbo = vbo.VBO(ny.array(data1,'f'))

         glUseProgram(pro)

         pi = pro.invalue

         #this.vbo = glGenBuffers(1)

         #glBindBuffer(GL_ARRAY_BUFFER, this.vbo)

         #output data

         this.tbo = glGenBuffers(1)

         glBindBuffer(GL_ARRAY_BUFFER, this.tbo)

         glBufferData(GL_ARRAY_BUFFER, 40, None, GL_STATIC_DRAW)

         #input data

         this.vbo.bind()

         glEnableVertexAttribArray(pi)

         #in pyopengl,the glVertexAttribPointer last two params must not be 0,0

         glVertexAttribPointer(pi,1,GL_FLOAT,False,4*1,this.vbo)

         glEnable(GL_RASTERIZER_DISCARD)

         glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, this.tbo)

         glBeginTransformFeedback(GL_POINTS)

         glDrawArrays(GL_POINTS, 0, 5)

         glEndTransformFeedback()

         glDisable(GL_RASTERIZER_DISCARD)

         glDisableVertexAttribArray(pi)

         glFlush()

         glBindBuffer(GL_ARRAY_BUFFER, this.tbo)

         buffer = (ctypes.c_float * 10)()

         #get buffer pointer

         point = ctypes.cast(buffer, ctypes.POINTER(ctypes.c_float))

         glGetBufferSubData(GL_ARRAY_BUFFER, 0, 10 * 4,point)

         #convert pointer to array

         array = ny.ctypeslib.as_array(point,(10,))

         print "tf",array

         bf = glMapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER,GL_READ_WRITE)

         pointv = ctypes.cast(bf, ctypes.POINTER(ctypes.c_float))

         arrayv = ny.ctypeslib.as_array(pointv,(5,))

         print "tfv",arrayv

         glUnmapBuffer(GL_ARRAY_BUFFER)

Transform Feedback基本流程

　　着色器里代码很简单，传入一个float数据，返回二个float数据，上面我们传入一个数组，[1.0, 2.0, 3.0, 4.0, 5.0]，经过着色器里简单运算，分别返回这个数据加3值，与一个固定值1.0.然后在transformFeedback我们为了验证正确与否，需要读取VBO里的数据。在这里，pyopengl可以使用glGetBufferSubData与glMapBuffer来得到VBO里的数据，需要注意的是，python与c之间的一些指针，数据的转换，引入ctype,声明ctype类型的数组，然后转换成对应的指针，填充这个数组后，然后转换把指针转化成numpy里的数组.得到的数据如下：

初试PyOpenGL四 (Python+OpenGL)GPU粒子系统与基本碰撞

　　可以看到，传出的数据是4，1，5，1，6，1，7，1，8，1，对比传入的是1.0, 2.0, 3.0, 4.0, 5.0。验证正确。

　　下面我们以上面的例子来实现我们的粒子系统，这里先入相关Python代码。

 class particleSystem(object):

     def __init__(this,len=1):

         this.length = len

         this.cparticles = [0.0] * 7 * len

         this.nparticles = [0.0] * 7 * len

         this.index = 0

         this.center = 0.0,0.0

         this.currenttime = 0.0

         this.height = 2.0

         this.init1()

         this.createVAO()

     def init1(this):

         #pos(x,y,z),vel(x,y,z),time

         for i in range(this.length):

             ind = i * 7

             px,py,pz,tt = ind,ind + 1,ind + 2,ind + 6

             vx,vy,vz = ind + 3,ind + 4,ind + 5

             this.cparticles[px] = 0.0

             this.cparticles[py] = 3.0

             this.cparticles[pz] = random.uniform(0,5)

             this.cparticles[vx] = random.random()

             this.cparticles[vy] = 0.0

             this.cparticles[vz] = 0.0

             this.cparticles[tt] = random.uniform(1.0,40.0)#random.uniform(0, 3 * this.height)

     def createVAO(this):

         this.currvbo = vbo.VBO(ny.array(this.cparticles,'f'))

         this.nextvbo = vbo.VBO(ny.array(this.nparticles,'f'))

     def render(this,program):

         ind = this.index % 2

         span = time.time() - this.currenttime if this.currenttime != 0.0 else 0.0

         invbo,outvbo = (this.currvbo,this.nextvbo) if ind == 0 else (this.nextvbo,this.currvbo)

         #gpu compute.

         print span

         glUseProgram(program)

         glUniform1f(program.span, span)

         glUniform1f(program.live, 40)

         this.update(invbo,outvbo)

         glUseProgram(0)

         #draw particle.

         glColor(0.5,0.8,0.9)

         glPointSize(3.0)

         outvbo.bind()

         glVertexPointer(3,GL_FLOAT,28,outvbo)

         glDrawArrays(GL_POINTS, 0, this.length)

         outvbo.unbind()

         this.index = this.index + 1

         this.currenttime = time.time()

     def update(this,fvbo,svbo):

         #fvbo->shader(GPU)->svbo,should svbo and fvbo both bind.

         svbo.bind()

         fvbo.bind()

         glEnableVertexAttribArray(0)

         glEnableVertexAttribArray(1)

         glEnableVertexAttribArray(2)

         glVertexAttribPointer(0,3,GL_FLOAT,False,4 * 7,fvbo)

         glVertexAttribPointer(1,3,GL_FLOAT,False,4 * 7,fvbo + 12)

         glVertexAttribPointer(2,1,GL_FLOAT,False,4 * 7,fvbo + 24)

         glEnable(GL_RASTERIZER_DISCARD)

         glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER,0,svbo)

         glBeginTransformFeedback(GL_POINTS)

         glDrawArrays(GL_POINTS, 0, this.length)

         glEndTransformFeedback()

         glDisable(GL_RASTERIZER_DISCARD)

         glDisableVertexAttribArray(0)

         glDisableVertexAttribArray(1)

         glDisableVertexAttribArray(2)

         fvbo.unbind()

         #query gpu data is chage?

         #svbo.bind()

         #bf = glMapBuffer(GL_ARRAY_BUFFER,GL_READ_WRITE)

         #pointv = ctypes.cast(bf, ctypes.POINTER(ctypes.c_float))

         #arrayv = ny.ctypeslib.as_array(pointv,(70,))

         #print "tfv",arrayv

         #glUnmapBuffer(GL_ARRAY_BUFFER)

粒子系统乒乓

　　结合前面的例子和上文中的乒乓来看，粒子在这里我们每个定义七个数据，前三个用来表示他的位置，后三个用来表示他的速度，最后一个用来表示他在显存里的存活时间。在update就是把数据从一个缓存经过GPU运算放入另一个缓存的过程，例如第一桢，我们传入fvbo,然后数据输出到svbo.在第二桢里，数据就从svbo经过GPU传入到fvbo，第三，第四分别如第一，第二。这样就能实现如第三篇中的乒乓技术。然后在显示render里，我们就用当前输出的缓存里的数据简单的输出显示，本文只是介绍用法，实现如雪花，雨滴，瀑布等特效需要对相关初始化粒子，着色器代码，添加纹理做更改，但是基本处理还是如上。

　　下面是着色器代码，实现粒子与球的碰撞，也有与地面的交互。代码如下：

 particle_v = """

         #version

         in vec3 pos;

         in vec3 vel;

         in float time;

         uniform float span;

         uniform vec2 planeSacle;

         uniform sampler2D plane;

         uniform vec3 sphere;

         uniform float live;

         out vec3 outpos;

         out vec3 outvel;

         out float outtime;

         void main() {

             outpos = pos + vel*span;

             vec2 uv = vec2(pos.xz/planeSacle + vec2(0.5,0.5));

             uv.y = 1.0 - uv.y;

             float hight = texture2D(plane, uv).r;

             vec3 tvel = vel;

             //sphere collision

             float radius = sphere.y;

             vec3 sphereh = sphere + vec3(0.0,hight,0.0);

             if(distance(outpos,sphereh) <= radius)

             {

                 tvel = reflect(vel,normalize(outpos-sphereh))/2.0;

             }

             tvel = tvel + vec3(0.0,-0.5,0.0)*span;  

             //ground collision

             if(hight > outpos.y)

             {

                 outpos.y = hight;

                 tvel = vec3(max(vel.x-span*1.1,0.0),0.0,max(vel.z - span*1.1,0.0));

             }

             //update particle live

             outtime = time + span;

             if(outtime>=live)

             {

                 outpos = vec3(0.0,3.0,hight*5.0);

                 outtime = 0.0;

                 tvel = vec3(hight,0.0,0.0);

             }

             outvel = tvel;

         }"""

粒子系统着色器代码

　　整个过程比较简单，也只考虑一些基本的碰撞，比如球的速度也应该影响碰撞后粒子的方向，但是这里只考虑粒子碰撞球后反射的方向，与地面的碰撞后，不会反弹，会慢慢停止向前移动。

　　最后一些相关着色器的参数设置代码。　　

         this.particleProgram = glCreateProgram()

         this.particleProgram = ShaderProgram(this.particleProgram)

         particleshader = shaders.compileShader(particle_v,GL_VERTEX_SHADER)

         glAttachShader(this.particleProgram,particleshader)

         LP_LP_c_char = POINTER(POINTER(c_char))

         ptrs = (c_char_p * 3)('outpos', 'outvel','outtime')

         c_array = cast(ptrs, LP_LP_c_char)

         glTransformFeedbackVaryings(this.particleProgram, len(ptrs), c_array, GL_INTERLEAVED_ATTRIBS)

         glLinkProgram(this.particleProgram)

         this.particleProgram.pos = glGetAttribLocation(this.particleProgram,"pos")

         this.particleProgram.vel = glGetAttribLocation(this.particleProgram,"vel")

         this.particleProgram.time = glGetAttribLocation(this.particleProgram,"time")

         this.particleProgram.span = glGetUniformLocation(this.particleProgram,"span")

         this.particleProgram.live = glGetUniformLocation(this.particleProgram,"live")

         this.particleProgram.plane = glGetUniformLocation(this.particleProgram,"plane")

         this.particleProgram.planeSacle = glGetUniformLocation(this.particleProgram,"planeSacle")

         this.particleProgram.sphere = glGetUniformLocation(this.particleProgram,"sphere")

粒子系统参数设置

　　在本文中，试着用了5千W个粒子，发现初始化很慢，花了十几秒，但是桢数和5000个粒子基本没有差别，从这里可以看出，GPU并行处理的强大之处。

　　完整代码：PythonGPU粒子系统.zip　操作方式EDSF前后左右移动，WR分别向上与向下，鼠标右键加移动鼠标控制方向，V切换第一人称与第三人称。UP与DOWN切换前面操作的移动幅度。