/* * * Example code to demonstrate the benefits of PBOs for fast * transfers to the GPU. * * (c) 2006 Dominik Göddeke, University of Dortmund * dominik.goeddeke@math.uni-dortmund.de * * For details, please refer to the tutorial available at * www.mathematik.uni-dortmund.de/~goeddeke/gpgpu/tutorial3.html * */ #include #include #include #include #include #include #include #include #include // for memcpy /********************************************************* * macros, problem size definition etc. * *********************************************************/ // PBO macro (see spec for details) #define BUFFER_OFFSET(i) ((char *)NULL + (i)) // array size N is texsize*texsize static const int texSize = 1025; static const int N = texSize*texSize; // hard-coded for shader + Cg, do not change this static const int numArraysPerChunk = 9; // input data size, default setting results in approx 1.6 GB of phys mem. static const int numChunks = 20; // number of iterations per chunk to emulate kernel workload static const int numKernelSteps = 1; // number of iterations the test is repeated, to reduce timing noise. static const int numIterations = 10; // uncomment the following to check for GL and Cg errors // Note that enabling this creates an (artificial) sync point (by glGetError()) //#define DEBUGDEBUG // // Enable this #define to enable PBOs for glTexSubImage() and glReadPixels(). // Otherwise, PBOs are not used. // #define USE_PBO // my own memcheck macro, only works on linux. Use with care, this will // slow down the application significantly. #define MEMCHECK() system("top -b -n1 | grep test | awk '{print $5\" \"$6}'") /****************************************************************************/ // // forward declaration of some functions // void cgErrorCallback(void); void checkErrors(const char *label); void copy(float* in, float* out); void copy(float* in, void* out); void copy(void* in, float* out); /*****************************************************/ // // fragment program: out = accum + vec // const char* kernelSource = " float main( "\ " uniform samplerRECT accum, " \ " uniform samplerRECT vec1, " \ " uniform samplerRECT vec2, " \ " uniform samplerRECT vec3, " \ " uniform samplerRECT vec4, " \ " uniform samplerRECT vec5, " \ " uniform samplerRECT vec6, " \ " uniform samplerRECT vec7, " \ " uniform samplerRECT vec8, " \ " uniform samplerRECT vec9, " \ " in float4 coords:TEXCOORD0) : COLOR { " \ " return texRECT(accum,coords.xy) + " \ " texRECT(vec1,coords.xy) + " \ " texRECT(vec2,coords.xy) + " \ " texRECT(vec3,coords.xy) + " \ " texRECT(vec4,coords.xy) + " \ " texRECT(vec5,coords.xy) + " \ " texRECT(vec6,coords.xy) + " \ " texRECT(vec7,coords.xy) + " \ " texRECT(vec8,coords.xy) + " \ " texRECT(vec9,coords.xy); } "; #ifdef DEBUGDEBUG // // Cg error callback // void cgErrorCallback(void) { CGerror lastError = cgGetError(); if(lastError) printf(cgGetErrorString(lastError)); exit(-1); } // // GL error checking // void checkErrors(const char *label) { GLenum errCode; const GLubyte *errStr; if ((errCode = glGetError()) != GL_NO_ERROR) { errStr = gluErrorString(errCode); printf("%s: OpenGL ERROR ",label); printf((char*)errStr); printf("\n"); exit(-2); } } #endif // // Conversion functions from float to float, the versions // using the void pointers are used with PBOs. // I rely on the compiler to inline these, they are left in // as separate functions to keep the various versions of the // tutorial consistent // void copy(float* in, float* out) { memcpy(out,in,N*sizeof(float)); } void copy(float* in, void* out) { memcpy(out,in,N*sizeof(float)); } void copy(void* in, float* out) { memcpy(out,in,N*sizeof(float)); } // // this does all the relevant things // int main(int argc, char **argv) { assert (numArraysPerChunk == 9); // because kernel is hard coded // // set up glut to get valid GL context and // get extension entry points // glutInit (&argc, argv); glutCreateWindow("STREAMING TUTORIAL"); glewInit(); // // viewport transform for 1:1 'pixel=texel=data' mapping // glMatrixMode(GL_PROJECTION); glLoadIdentity(); gluOrtho2D(0.0,texSize,0.0,texSize); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glViewport(0,0,texSize,texSize); // // create FBO and bind it // GLuint fb; glGenFramebuffersEXT(1,&fb); glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,fb); // // create a whole bunch of single precision data // printf("---------------------------------------------\n"); printf("Creating input data, if the following amount \n"); printf("is greater than the physically available RAM,\n"); printf("press CTRL-C immediately to avoid HD paging. \n"); // input arrays double memrequirements = (numChunks*numArraysPerChunk + numChunks)*N*sizeof(float); // single precision textures memrequirements += (numArraysPerChunk + 2) * N * sizeof(float); // convert to megabytes memrequirements /= (1024.0 * 1024.0 * 1024.0); printf("Allocating %.2f GB.\n",memrequirements); #ifdef USE_PBO printf("PBO transfers are enabled.\n"); #else printf("PBO transfers are disabled.\n"); #endif // array to store intial value and final result, initially zeroed float* data = new float[N]; for (int i=0; i