/* * * Example code to demonstrate the benefits of PBOs for fast * transfers to the GPU. * * (c) 2006 Dominik Göddeke, University of Dortmund * dominik.goeddeke@math.uni-dortmund.de * * Initial GLSL port courtesy of Mike Hudson. * * For details, please refer to the tutorial available at * www.mathematik.uni-dortmund.de/~goeddeke/gpgpu/tutorial3.html * */ #include #include #include #include #include #include /********************************************************* * macros, problem size definition etc. * *********************************************************/ // PBO macro (see spec for details) #define BUFFER_OFFSET(i) ((char *)NULL + (i)) // array size N is texsize*texsize static const int texSize = 1025; static const int N = texSize*texSize; // hard-coded for shader + Cg, do not change this static const int numArraysPerChunk = 9; // input data size, default setting results in approx 1.6 GB of phys mem. static const int numChunks = 20; // number of iterations per chunk to emulate kernel workload static const int numKernelSteps = 1; // number of iterations the test is repeated, to reduce timing noise. static const int numIterations = 10; // uncomment the following to check for GL and Cg errors // Note that enabling this creates an (artificial) sync point (by glGetError()) //#define DEBUGDEBUG // // Enable this #define to enable PBOs for glTexSubImage() and glReadPixels(). // Otherwise, PBOs are not used. // #define USE_PBO // my own memcheck macro, only works on linux. Use with care, this will // slow down the application significantly. #define MEMCHECK() system("top -b -n1 | grep test | awk '{print $5\" \"$6}'") /****************************************************************************/ // // forward declaration of some functions // void checkErrors(const char *label); void copy(double* in, float* out); void copy(double* in, void* out); void copy(void* in, double* out); /*****************************************************/ // // fragment program: out = accum + vec // const char* kernelSource = \ " uniform samplerRect array1; " \ " uniform samplerRect array2; " \ " uniform samplerRect array3; " \ " uniform samplerRect array4; " \ " uniform samplerRect array5; " \ " uniform samplerRect array6; " \ " uniform samplerRect array7; " \ " uniform samplerRect array8; " \ " uniform samplerRect array9; " \ " uniform samplerRect accum ; " \ "void main(void) { " \ " gl_FragColor.x = " \ " textureRect(accum, gl_TexCoord[0].st).x + " \ " textureRect(array1, gl_TexCoord[0].st).x + " \ " textureRect(array2, gl_TexCoord[0].st).x + " \ " textureRect(array3, gl_TexCoord[0].st).x + " \ " textureRect(array4, gl_TexCoord[0].st).x + " \ " textureRect(array5, gl_TexCoord[0].st).x + " \ " textureRect(array6, gl_TexCoord[0].st).x + " \ " textureRect(array7, gl_TexCoord[0].st).x + " \ " textureRect(array8, gl_TexCoord[0].st).x + " \ " textureRect(array9, gl_TexCoord[0].st).x; }"; #ifdef DEBUGDEBUG // // error checking for GLSL // void printInfoLogs(GLuint obj, GLuint shader) { int infologLength = 0; int charsWritten = 0; char *infoLog; glGetProgramiv(obj, GL_INFO_LOG_LENGTH, &infologLength); if (infologLength > 1) { infoLog = (char *)malloc(infologLength); glGetProgramInfoLog(obj, infologLength, &charsWritten, infoLog); printf(infoLog); printf("\n"); free(infoLog); } glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infologLength); if (infologLength > 1) { infoLog = (char *)malloc(infologLength); glGetShaderInfoLog(shader, infologLength, &charsWritten, infoLog); printf(infoLog); printf("\n"); free(infoLog); } } // // GL error checking // void checkErrors(const char *label) { GLenum errCode; const GLubyte *errStr; if ((errCode = glGetError()) != GL_NO_ERROR) { errStr = gluErrorString(errCode); printf("%s: OpenGL ERROR ",label); printf((char*)errStr); printf("\n"); exit(-2); } } #endif // // Conversion functions from double to float, the versions // using the void pointers are used with PBOs. // For details, see below where PBOs are actually used. // Note that I rely on the compiler to memalign these properly. // Whoever knows a way to do this operation for variable N // (not compile-time constant) in the most efficient way, // please drop me a note. // void copy(double* in, float* out) { for (int i=0; i