/* * * Example code to demonstrate the benefits of PBOs for fast * transfers to the GPU. * * (c) 2006 Dominik Göddeke, University of Dortmund * dominik.goeddeke@math.uni-dortmund.de * * For details, please refer to the tutorial available at * www.mathematik.uni-dortmund.de/~goeddeke/gpgpu/tutorial3.html * */ #include #include #include #include #include #include #include #include /********************************************************* * macros, problem size definition etc. * *********************************************************/ // PBO macro (see spec for details) #define BUFFER_OFFSET(i) ((char *)NULL + (i)) // array size N is texsize*texsize static const int texSize = 1025; static const int N = texSize*texSize; // hard-coded for shader + Cg, do not change this static const int numArraysPerChunk = 9; // input data size, default setting results in approx 1.6 GB of phys mem. static const int numChunks = 20; // number of iterations per chunk to emulate kernel workload static const int numKernelSteps = 1; // number of iterations the test is repeated, to reduce timing noise. static const int numIterations = 10; // uncomment the following to check for GL and Cg errors // Note that enabling this creates an (artificial) sync point (by glGetError()) //#define DEBUGDEBUG // // Enable this #define to enable PBOs for glTexSubImage() and glReadPixels(). // Otherwise, PBOs are not used. // #define USE_PBO // my own memcheck macro, only works on linux. Use with care, this will // slow down the application significantly. #define MEMCHECK() system("top -b -n1 | grep test | awk '{print $5\" \"$6}'") /****************************************************************************/ // // forward declaration of some functions // void cgErrorCallback(void); void checkErrors(const char *label); void copy(double* in, float* out); void copy(double* in, void* out); void copy(void* in, double* out); /*****************************************************/ // // fragment program: out = accum + vec // const char* kernelSource = " float main( "\ " uniform samplerRECT accum, " \ " uniform samplerRECT vec1, " \ " uniform samplerRECT vec2, " \ " uniform samplerRECT vec3, " \ " uniform samplerRECT vec4, " \ " uniform samplerRECT vec5, " \ " uniform samplerRECT vec6, " \ " uniform samplerRECT vec7, " \ " uniform samplerRECT vec8, " \ " uniform samplerRECT vec9, " \ " in float4 coords:TEXCOORD0) : COLOR { " \ " return texRECT(accum,coords.xy) + " \ " texRECT(vec1,coords.xy) + " \ " texRECT(vec2,coords.xy) + " \ " texRECT(vec3,coords.xy) + " \ " texRECT(vec4,coords.xy) + " \ " texRECT(vec5,coords.xy) + " \ " texRECT(vec6,coords.xy) + " \ " texRECT(vec7,coords.xy) + " \ " texRECT(vec8,coords.xy) + " \ " texRECT(vec9,coords.xy); } "; #ifdef DEBUGDEBUG // // Cg error callback // void cgErrorCallback(void) { CGerror lastError = cgGetError(); if(lastError) printf(cgGetErrorString(lastError)); exit(-1); } // // GL error checking // void checkErrors(const char *label) { GLenum errCode; const GLubyte *errStr; if ((errCode = glGetError()) != GL_NO_ERROR) { errStr = gluErrorString(errCode); printf("%s: OpenGL ERROR ",label); printf((char*)errStr); printf("\n"); exit(-2); } } #endif // // Conversion functions from double to float, the versions // using the void pointers are used with PBOs. // For details, see below where PBOs are actually used. // Note that I rely on the compiler to memalign these properly. // Whoever knows a way to do this operation for variable N // (not compile-time constant) in the most efficient way, // please drop me a note. // void copy(double* in, float* out) { for (int i=0; i