/***** * glrender.cc * John Bowman, Orest Shardt, and Supakorn "Jamie" Rassameemasmuang * Render 3D Bezier paths and surfaces. *****/ #include #include #include #include #include #include #include #include #if !defined(_WIN32) #include #include #endif #include "common.h" #include "locate.h" #include "seconds.h" #include "statistics.h" #include "exithandlers.h" #ifdef HAVE_GL #include "glrender.h" #include "tr.h" #include "shaders.h" #include "GLTextures.h" #include "EXRFiles.h" #include #include "picture.h" #include "bezierpatch.h" #include "bbox3.h" #include "drawimage.h" #include "interact.h" #include "fpu.h" #include "renderBase.h" extern uint32_t CLZ(uint32_t a); const string SHADERS="shaders/GL/"; using settings::locateFile; using utils::stopWatch; using namespace settings; using namespace glm; namespace camp { // Note: getProjViewMat(), getViewMat(), and getNormMat() are defined in // vkrender.cc to avoid multiple definition errors when both renderers are compiled. // IBL texture objects (defined in initIBL()) camp::GLTexture2 iblbrdfTex; camp::GLTexture2 irradianceTex; camp::GLTexture3 reflTexturesTex; // GLFW window globals - kept in camp namespace for type compatibility string Action; using utils::statistics; statistics S; camp::GLTexture2 fromEXR(string const& EXRFile, camp::GLTexturesFmt const& fmt, GLint const& textureNumber) { IEXRFile fil(EXRFile); return camp::GLTexture2 {fil.getData(),fil.size(),textureNumber,fmt}; } camp::GLTexture3 fromEXR3( mem::vector const& EXRFiles, camp::GLTexturesFmt const& fmt, GLint const& textureNumber) { // 3d reflectance textures std::vector data; size_t count=EXRFiles.size(); int wi=0, ht=0; for(string const& EXRFile : EXRFiles) { IEXRFile fil3(EXRFile); std::tie(wi,ht)=fil3.size(); size_t imSize=4*wi*ht; std::copy(fil3.getData(),fil3.getData()+imSize,std::back_inserter(data)); } return camp::GLTexture3 { data.data(), std::tuple(wi,ht,static_cast(count)),textureNumber, fmt }; } void initIBL() { camp::GLTexturesFmt fmt; fmt.internalFmt=GL_RGB16F; string imageDir=locateFile(getSetting("imageDir"))+"/"; string imagePath=imageDir+getSetting("image")+"/"; irradianceTex=fromEXR(imagePath+"diffuse.exr",fmt,1); camp::GLTexturesFmt fmtRefl; fmtRefl.internalFmt=GL_RG16F; iblbrdfTex=fromEXR(imageDir+"refl.exr",fmtRefl,2); camp::GLTexturesFmt fmt3; fmt3.internalFmt=GL_RGB16F; fmt3.wrapS=GL_REPEAT; fmt3.wrapR=GL_CLAMP_TO_EDGE; fmt3.wrapT=GL_CLAMP_TO_EDGE; mem::vector files; mem::string prefix=imagePath+"refl"; for(unsigned int i=0; i <= 10; ++i) { mem::stringstream mss; mss << prefix << i << ".exr"; files.emplace_back(mss.str()); } reflTexturesTex=fromEXR3(files,fmt3,3); } void *glrenderWrapper(void *a); void noShaders() { cerr << "GLSL shaders not found." << endl; exit(-1); } void AsyGLRender::initComputeShaders() { string sum1=locateFile(SHADERS+"sum1.glsl"); string sum2=locateFile(SHADERS+"sum2.glsl"); string sum3=locateFile(SHADERS+"sum3.glsl"); if(sum1.empty() || sum2.empty() || sum3.empty()) noShaders(); std::vector shaders(1); std::vector shaderParams; shaders[0]=ShaderfileModePair(sum1.c_str(),GL_COMPUTE_SHADER); ostringstream s,s2; s << "LOCALSIZE " << localSize << "u" << endl; shaderParams.push_back(s.str().c_str()); s2 << "BLOCKSIZE " << blockSize << "u" << endl; shaderParams.push_back(s2.str().c_str()); GLuint rc=compileAndLinkShader(shaders,shaderParams,true,false,true,false); if(rc == 0) { GPUindexing=false; // Compute shaders are unavailable. if(settings::verbose > 2) cout << "No compute shader support" << endl; } else { // glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT,0,&maxgroups); // maxgroups=min(1024,maxgroups/(GLint) (localSize*blockSize)); sum1Shader=rc; shaders[0]=ShaderfileModePair(sum2.c_str(),GL_COMPUTE_SHADER); sum2Shader=compileAndLinkShader(shaders,shaderParams,true,false, true); shaders[0]=ShaderfileModePair(sum3.c_str(),GL_COMPUTE_SHADER); sum3Shader=compileAndLinkShader(shaders,shaderParams,true,false, true); } } void AsyGLRender::initBlendShader() { string screen=locateFile(SHADERS+"screen.glsl"); string blend=locateFile(SHADERS+"blend.glsl"); if(screen.empty() || blend.empty()) noShaders(); std::vector shaders(2); std::vector shaderParams; ostringstream s; s << "ARRAYSIZE " << maxSize << "u" << endl; shaderParams.push_back(s.str().c_str()); if(GPUindexing) shaderParams.push_back("GPUINDEXING"); if(GPUcompress) shaderParams.push_back("GPUCOMPRESS"); shaders[0]=ShaderfileModePair(screen.c_str(),GL_VERTEX_SHADER); shaders[1]=ShaderfileModePair(blend.c_str(),GL_FRAGMENT_SHADER); blendShader=compileAndLinkShader(shaders,shaderParams,ssbo); } void AsyGLRender::setBuffers() { glGenVertexArrays(1,&vao); // Bind VAO once and leave it bound for all subsequent draw operations glBindVertexArray(vao); materialData.renderCount=0; colorData.renderCount=0; triangleData.renderCount=0; transparentData.renderCount=0; // Create materials uniform buffer glGenBuffers(1, &materialsBuffer); #ifdef HAVE_SSBO glGenBuffers(1, &offsetBuffer); if(GPUindexing) glGenBuffers(1, &globalSumBuffer); glGenBuffers(1, &feedbackBuffer); glGenBuffers(1, &countBuffer); if(GPUcompress) { glGenBuffers(1, &indexBuffer); glGenBuffers(1, &elementsBuffer); } glGenBuffers(1, &fragmentBuffer); glGenBuffers(1, &depthBuffer); glGenBuffers(1, &opaqueBuffer); glGenBuffers(1, &opaqueDepthBuffer); #endif } void AsyGLRender::initShaders() { fpu_trap(false); // Work around FE_INVALID in Mesa/libgallium during shader compilation Nlights = nlights == 0 ? 0 : std::max(Nlights, nlights); nmaterials = materials.size(); string zero=locateFile(SHADERS+"zero.glsl"); string compress=locateFile(SHADERS+"compress.glsl"); string vertex=locateFile(SHADERS+"vertex.glsl"); string count=locateFile(SHADERS+"count.glsl"); string fragment=locateFile(SHADERS+"fragment.glsl"); string screen=locateFile(SHADERS+"screen.glsl"); if(zero.empty() || compress.empty() || vertex.empty() || fragment.empty() || screen.empty() || count.empty()) noShaders(); // Only try compute shaders if GPUindexing is explicitly enabled if(GPUindexing) { initComputeShaders(); } std::vector shaders(2); std::vector shaderParams; if(ibl) { shaderParams.push_back("USE_IBL"); initIBL(); } shaders[0]=ShaderfileModePair(vertex.c_str(),GL_VERTEX_SHADER); #ifdef HAVE_SSBO if(GPUindexing) shaderParams.push_back("GPUINDEXING"); if(GPUcompress) shaderParams.push_back("GPUCOMPRESS"); shaders[1]=ShaderfileModePair(count.c_str(),GL_FRAGMENT_SHADER); countShader=compileAndLinkShader(shaders,shaderParams, true,false,false,true); if(countShader) shaderParams.push_back("HAVE_SSBO"); #else countShader=0; #endif ssbo=countShader; if(!ssbo) { glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); } interlock=ssbo && getSetting("GPUinterlock"); if(isNVIDIA30xx((const char*)glGetString(GL_RENDERER))) interlock = false; if(!ssbo && settings::verbose > 2) cout << "No SSBO support; order-independent transparency unavailable" << endl; shaders[1]=ShaderfileModePair(fragment.c_str(),GL_FRAGMENT_SHADER); shaderParams.push_back("MATERIAL"); if(orthographic) shaderParams.push_back("ORTHOGRAPHIC"); ostringstream lights,materials,opaque; lights << "Nlights " << Nlights; shaderParams.push_back(lights.str().c_str()); materials << "Nmaterials " << nmaterials; shaderParams.push_back(materials.str().c_str()); shaderParams.push_back("WIDTH"); pixelShader=compileAndLinkShader(shaders,shaderParams,ssbo); shaderParams.pop_back(); shaderParams.push_back("NORMAL"); if(interlock) shaderParams.push_back("HAVE_INTERLOCK"); materialShader[0]=compileAndLinkShader(shaders,shaderParams, ssbo,interlock,false,true); if(interlock && !materialShader[0]) { shaderParams.pop_back(); interlock=false; materialShader[0]=compileAndLinkShader(shaders,shaderParams,ssbo); if(settings::verbose > 2) cout << "No fragment shader interlock support" << endl; } shaderParams.push_back("OPAQUE"); materialShader[1]=compileAndLinkShader(shaders,shaderParams,ssbo); shaderParams.pop_back(); shaderParams.push_back("COLOR"); colorShader[0]=compileAndLinkShader(shaders,shaderParams,ssbo, interlock); shaderParams.push_back("OPAQUE"); colorShader[1]=compileAndLinkShader(shaders,shaderParams,ssbo); shaderParams.pop_back(); shaderParams.push_back("GENERAL"); if(mode != DRAWMODE_NORMAL) shaderParams.push_back("WIREFRAME"); generalShader[0]=compileAndLinkShader(shaders,shaderParams,ssbo, interlock); shaderParams.push_back("OPAQUE"); generalShader[1]=compileAndLinkShader(shaders,shaderParams,ssbo); shaderParams.pop_back(); shaderParams.push_back("TRANSPARENT"); transparentShader=compileAndLinkShader(shaders,shaderParams,ssbo, interlock); shaderParams.clear(); if(ssbo) { if(GPUindexing) shaderParams.push_back("GPUINDEXING"); shaders[0]=ShaderfileModePair(screen.c_str(),GL_VERTEX_SHADER); shaders[1]=ShaderfileModePair(compress.c_str(),GL_FRAGMENT_SHADER); compressShader=compileAndLinkShader(shaders,shaderParams,ssbo); if(GPUindexing) shaderParams.pop_back(); else { shaders[1]=ShaderfileModePair(zero.c_str(),GL_FRAGMENT_SHADER); zeroShader=compileAndLinkShader(shaders,shaderParams,ssbo); } maxSize=1; initBlendShader(); } lastshader=-1; if(vao == 0) setBuffers(); fpu_trap(settings::trap()); } void AsyGLRender::deleteComputeShaders() { glDeleteProgram(sum1Shader); glDeleteProgram(sum2Shader); glDeleteProgram(sum3Shader); } void AsyGLRender::deleteBlendShader() { glDeleteProgram(blendShader); } void AsyGLRender::deleteShaders() { if(ssbo) { deleteBlendShader(); if(GPUindexing) deleteComputeShaders(); else glDeleteProgram(zeroShader); glDeleteProgram(countShader); glDeleteProgram(compressShader); } if (transparentShader != 0) glDeleteProgram(transparentShader); for(unsigned int opaque=0; opaque < 2; ++opaque) { if (generalShader[opaque] != 0) glDeleteProgram(generalShader[opaque]); if (colorShader[opaque] != 0) glDeleteProgram(colorShader[opaque]); if (materialShader[opaque] != 0) glDeleteProgram(materialShader[opaque]); } if (pixelShader != 0) glDeleteProgram(pixelShader); } void AsyGLRender::resizeBlendShader(GLuint maxDepth) { maxSize=ceilpow2(maxDepth); deleteBlendShader(); initBlendShader(); } void AsyGLRender::drawFrame() { if((nlights == 0 && Nlights > 0) || nlights > Nlights || materials.size() > nmaterials) { deleteShaders(); initShaders(); } // Apply srgb setting each frame so changes take effect dynamically if(getSetting("srgb")) glEnable(GL_FRAMEBUFFER_SRGB); else glDisable(GL_FRAMEBUFFER_SRGB); // Set viewport before clearing (in case it wasn't set) // Skip during export - trBeginTile handles viewport for tiling if(!exporting) glViewport(0, 0, Width, Height); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Use member variables from AsyGLRender (following Vulkan pattern) if(xmin >= xmax || ymin >= ymax || Zmin >= Zmax) return; drawBuffers(); if(queueExport) { queueExport=false; Export(); } } // Return x divided by y rounded up to the nearest integer. int ceilquotient(int x, int y) { return (x+y-1)/y; } void AsyGLRender::Export(int) { size_t ndata=3*fullWidth*fullHeight; if(ndata == 0) return; glReadBuffer(GL_BACK_LEFT); glPixelStorei(GL_PACK_ALIGNMENT,1); glFinish(); exporting=true; try { unsigned char *data=new unsigned char[ndata]; if(data) { TRcontext *tr=trNew(); int width=ceilquotient(fullWidth, ceilquotient(fullWidth,std::min(maxTileWidth,Width))); int height=ceilquotient(fullHeight, ceilquotient(fullHeight, std::min(maxTileHeight,Height))); if(settings::verbose > 1) cout << "Exporting " << Prefix << " as " << fullWidth << "x" << fullHeight << " image" << " using tiles of size " << width << "x" << height << endl; unsigned border=std::min(std::min(1,(width-1)/2),(height-1)/2); trTileSize(tr,width,height,border); trImageSize(tr,fullWidth,fullHeight); trImageBuffer(tr,GL_RGB,GL_UNSIGNED_BYTE,data); setDimensions(fullWidth,fullHeight,X/Width*fullWidth,Y/Width*fullWidth); size_t count=0; if(haveScene) { (orthographic ? trOrtho : trFrustum)(tr,xmin,xmax,ymin,ymax,-Zmax,-Zmin); do { trBeginTile(tr); remesh=true; redraw=true; prepareScene(); drawFrame(); lastshader=-1; ++count; } while (trEndTile(tr)); } else {// clear screen and return redraw=true; prepareScene(); drawFrame(); } if(settings::verbose > 1) cout << count << " tile" << (count != 1 ? "s" : "") << " drawn" << endl; trDelete(tr); picture pic; drawRawImage *Image=NULL; if(haveScene) { double w=oWidth; double h=oHeight; double Aspect=((double) fullWidth)/fullHeight; if(w > h*Aspect) w=(int) (h*Aspect+0.5); else h=(int) (w/Aspect+0.5); // Render an antialiased image. Image=new drawRawImage(data,fullWidth,fullHeight, transform(0.0,0.0,w,0.0,0.0,h), antialias); pic.append(Image); } pic.shipout(NULL,Prefix,Format,false,ViewExport); if(Image) delete Image; delete[] data; } } catch(handled_error const&) { } catch(std::bad_alloc&) { outOfMemory(); } // Restore viewport and redraw full scene so back buffer has correct content // (matches Vulkan pattern which renders export to separate framebuffer) glViewport(0, 0, Width, Height); setProjection(); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); remesh=true; redraw=true; prepareScene(); drawBuffers(); #ifdef HAVE_PTHREAD if(threads && readyAfterExport) { readyAfterExport=false; threadMgr.endwait(threadMgr.readySignal,threadMgr.readyLock); } #endif exporting=false; initSSBO=true; } // Return the greatest power of 2 less than or equal to n. inline unsigned int floorpow2(unsigned int n) { n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; return n-(n >> 1); } void quit() { gl->quit(); } void AsyGLRender::cycleMode() { // Call base class to handle mode cycling and ibl AsyRender::cycleMode(); if(ssbo) initSSBO=true; switch(mode) { case DRAWMODE_NORMAL: // regular nlights=nlights0; // Restore original number of lights lastshader=-1; glPolygonMode(GL_FRONT_AND_BACK,GL_FILL); break; case DRAWMODE_OUTLINE: // outline nlights=0; // Force shader recompilation glPolygonMode(GL_FRONT_AND_BACK,GL_LINE); break; case DRAWMODE_WIREFRAME: // wireframe Nlights=1; // Force shader recompilation break; } } bool NVIDIA() { #ifdef GL_SHADING_LANGUAGE_VERSION const char *GLSL_VERSION=(const char *) glGetString(GL_SHADING_LANGUAGE_VERSION); #else const char *GLSL_VERSION=""; #endif return string(GLSL_VERSION).find("NVIDIA") != string::npos; } string getLightIndex(size_t const& index, string const& fieldName) { ostringstream buf; buf << "lights[" << index << "]." << fieldName; return Strdup(buf.str()); } string getCenterIndex(size_t const& index) { ostringstream buf; buf << "Centers[" << index << "]"; return Strdup(buf.str()); } template void registerBuffer(const std::vector& buffervector, GLuint& bufferIndex, bool copy, GLenum type=GL_ARRAY_BUFFER) { if(!buffervector.empty()) { if(bufferIndex == 0) { glGenBuffers(1,&bufferIndex); copy=true; } glBindBuffer(type,bufferIndex); if(copy) glBufferData(type,buffervector.size()*sizeof(T), buffervector.data(),GL_STATIC_DRAW); } } void AsyGLRender::clearCount() { glUseProgram(zeroShader); lastshader=zeroShader; glUniform1ui(glGetUniformLocation(zeroShader,"width"),Width); fpu_trap(false); // Work around FE_INVALID glDrawArrays(GL_TRIANGLES, 0, 3); fpu_trap(settings::trap()); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } void AsyGLRender::compressCount() { glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); glUseProgram(compressShader); lastshader=compressShader; glUniform1ui(glGetUniformLocation(compressShader,"width"),Width); fpu_trap(false); // Work around FE_INVALID glDrawArrays(GL_TRIANGLES, 0, 3); fpu_trap(settings::trap()); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); } void AsyGLRender::partialSums(bool readSize) { // Compute partial sums on the GPU glUseProgram(sum1Shader); glDispatchCompute(g,1,1); glUseProgram(sum2Shader); glUniform1ui(glGetUniformLocation(sum2Shader,"blockSize"), ceilquotient(g,localSize)); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(1,1,1); glUseProgram(sum3Shader); glUniform1ui(glGetUniformLocation(sum3Shader,"final"),elements-1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(g,1,1); } void AsyGLRender::resizeFragmentBuffer() { if(GPUindexing) { glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); glBindBuffer(GL_SHADER_STORAGE_BUFFER,feedbackBuffer); GLuint *feedback=(GLuint *) glMapBuffer(GL_SHADER_STORAGE_BUFFER,GL_READ_ONLY); GLuint maxDepth=feedback[0]; if(maxDepth > maxSize) resizeBlendShader(maxDepth); fragments=feedback[1]; glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); } if(fragments > maxFragments) { // Initialize the alpha buffer maxFragments=11*fragments/10; glBindBuffer(GL_SHADER_STORAGE_BUFFER,fragmentBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,maxFragments*sizeof(vec4), NULL,GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,4,fragmentBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,depthBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,maxFragments*sizeof(GLfloat), NULL,GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,5,depthBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,feedbackBuffer); } } void AsyGLRender::refreshBuffers() { GLuint zero=0; pixels=(Width+1)*(Height+1); if(initSSBO) { processors=1; GLuint Pixels; if(GPUindexing) { GLuint G=ceilquotient(pixels,groupSize); Pixels=groupSize*G; GLuint globalSize=localSize*ceilquotient(G,localSize); glBindBuffer(GL_SHADER_STORAGE_BUFFER,globalSumBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,globalSize*sizeof(GLuint),NULL, GL_DYNAMIC_READ); glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R32UI,GL_RED_INTEGER, GL_UNSIGNED_INT,&zero); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,3,globalSumBuffer); } else Pixels=pixels; glBindBuffer(GL_SHADER_STORAGE_BUFFER,offsetBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,(Pixels+2)*sizeof(GLuint), NULL,GL_DYNAMIC_DRAW); glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R32UI,GL_RED_INTEGER, GL_UNSIGNED_INT,&zero); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,0,offsetBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,countBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,(Pixels+2)*sizeof(GLuint), NULL,GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,2,countBuffer); if(GPUcompress) { GLuint one=1; glBindBuffer(GL_ATOMIC_COUNTER_BUFFER,elementsBuffer); glBufferData(GL_ATOMIC_COUNTER_BUFFER,sizeof(GLuint),&one, GL_DYNAMIC_DRAW); glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER,0,elementsBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,indexBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,pixels*sizeof(GLuint), NULL,GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,1,indexBuffer); } glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R32UI,GL_RED_INTEGER, GL_UNSIGNED_INT,&zero); // Clear count or index buffer glBindBuffer(GL_SHADER_STORAGE_BUFFER,opaqueBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,pixels*sizeof(vec4),NULL, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,6,opaqueBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,opaqueDepthBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint)+pixels*sizeof(GLfloat),NULL, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,7,opaqueDepthBuffer); const GLfloat zerof=0.0; glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R32F,GL_RED,GL_FLOAT,&zerof); glBindBuffer(GL_SHADER_STORAGE_BUFFER,feedbackBuffer); glBufferData(GL_SHADER_STORAGE_BUFFER,2*sizeof(GLuint),NULL, GL_DYNAMIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER,8,feedbackBuffer); glBindBuffer(GL_SHADER_STORAGE_BUFFER,feedbackBuffer); initSSBO=false; } // Determine the fragment offsets if(exporting && GPUindexing && !GPUcompress) { glBindBuffer(GL_SHADER_STORAGE_BUFFER,countBuffer); glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R32UI,GL_RED_INTEGER, GL_UNSIGNED_INT,&zero); glBindBuffer(GL_SHADER_STORAGE_BUFFER,feedbackBuffer); } if(!interlock) { drawBuffer(lineData,countShader,false,1); drawBuffer(materialData,countShader,false,4); drawBuffer(colorData,countShader,true,4); drawBuffer(triangleData,countShader,true,4); } glDepthMask(GL_FALSE); // Don't write to depth buffer glDisable(GL_MULTISAMPLE); drawBuffer(transparentData,countShader,true,4); glEnable(GL_MULTISAMPLE); glDepthMask(GL_TRUE); // Write to depth buffer if(GPUcompress) { compressCount(); GLuint *p=(GLuint *) glMapBuffer(GL_ATOMIC_COUNTER_BUFFER,GL_READ_WRITE); elements=GPUindexing ? p[0] : p[0]-1; p[0]=1; glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER); if(elements == 0) return; } else elements=pixels; if(GPUindexing) { // Ensure fragment shader writes to countBuffer are visible to compute shaders. glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT); g=ceilquotient(elements,groupSize); elements=groupSize*g; if(settings::verbose > timePartialSumVerbosity) { static bool first=true; if(first) { partialSums(); first=false; } unsigned int N=10000; stopWatch Timer; for(unsigned int i=0; i < N; ++i) partialSums(); glFinish(); double T=Timer.seconds()/N; cout << "elements=" << elements << endl; cout << "Tmin (ms)=" << T*1e3 << endl; cout << "Megapixels/second=" << elements/T/1e6 << endl; } partialSums(true); } else { size_t size=elements*sizeof(GLuint); // Compute partial sums on the CPU glBindBuffer(GL_SHADER_STORAGE_BUFFER,countBuffer); GLuint *p=(GLuint *) glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0,size+sizeof(GLuint), GL_MAP_READ_BIT); GLuint maxDepth=p[0]; GLuint *count=p+1; glBindBuffer(GL_SHADER_STORAGE_BUFFER,offsetBuffer); GLuint *offset=(GLuint *) glMapBufferRange(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint),size, GL_MAP_WRITE_BIT); size_t Offset=offset[0]=count[0]; for(size_t i=1; i < elements; ++i) offset[i]=Offset += count[i]; fragments=Offset; glBindBuffer(GL_SHADER_STORAGE_BUFFER,offsetBuffer); glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); glBindBuffer(GL_SHADER_STORAGE_BUFFER,countBuffer); glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); if(exporting) { glBindBuffer(GL_SHADER_STORAGE_BUFFER,countBuffer); glClearBufferData(GL_SHADER_STORAGE_BUFFER,GL_R32UI,GL_RED_INTEGER, GL_UNSIGNED_INT,&zero); } else clearCount(); if(maxDepth > maxSize) resizeBlendShader(maxDepth); } lastshader=-1; } void AsyGLRender::setUniformsOpenGL(GLint shader) { bool normal=shader != pixelShader; if(shader != lastshader) { glUseProgram(shader); // Cache uniform locations when shader changes projViewLoc = glGetUniformLocation(shader,"projViewMat"); viewMatLoc = glGetUniformLocation(shader,"viewMat"); normMatLoc = glGetUniformLocation(shader,"normMat"); if(normal) glUniform1ui(glGetUniformLocation(shader,"width"),Width); } glUniformMatrix4fv(projViewLoc,1,GL_FALSE, value_ptr(mat4(projViewMat))); glUniformMatrix4fv(viewMatLoc,1,GL_FALSE, value_ptr(mat4(viewMat))); if(normal) glUniformMatrix3fv(normMatLoc,1,GL_FALSE, value_ptr(mat3(normMat))); if(shader == countShader) { lastshader=shader; return; } if(shader != lastshader) { lastshader=shader; glUniform1ui(glGetUniformLocation(shader,"nlights"),nlights); for(size_t i=0; i < nlights; ++i) { triple Lighti=Lights[i]; double *Diffusei= LightsDiffuse+4*i; glUniform3f(glGetUniformLocation(shader, getLightIndex(i,"direction").c_str()), (GLfloat) Lighti.getx(),(GLfloat) Lighti.gety(), (GLfloat) Lighti.getz()); glUniform3f(glGetUniformLocation(shader, getLightIndex(i,"color").c_str()), (GLfloat) Diffusei[0],(GLfloat) Diffusei[1], (GLfloat) Diffusei[2]); } if(settings::getSetting("ibl")) { iblbrdfTex.setUniform(glGetUniformLocation(shader, "reflBRDFSampler")); irradianceTex.setUniform(glGetUniformLocation(shader, "diffuseSampler")); reflTexturesTex.setUniform(glGetUniformLocation(shader, "reflImgSampler")); } } // Bind global materials buffer GLuint binding=0; GLuint blockindex=glGetUniformBlockIndex(shader,"MaterialBuffer"); if(blockindex != GL_INVALID_INDEX) { glUniformBlockBinding(shader,blockindex,binding); bool copy=shouldUpdateBuffers; registerBuffer(materials, materialsBuffer, copy, GL_UNIFORM_BUFFER); shouldUpdateBuffers=false; glBindBufferBase(GL_UNIFORM_BUFFER, binding, materialsBuffer); } } void AsyGLRender::drawBuffer(VertexBuffer& data, GLint shader, bool color, unsigned int drawType) // drawType: 0=GL_POINTS, 1=GL_LINES, 4=GL_TRIANGLES { if(data.indices.empty()) return; // Check for OpenGL errors before drawing if(settings::verbose > 3) { GLenum err = glGetError(); if(err != GL_NO_ERROR) { cerr << "drawBuffer: OpenGL error at start: " << err << endl; } } bool normal=shader != pixelShader; bool copy = (remesh || data.renderCount < 1) && !copied; // Get persistent GL buffer handles for this VertexBuffer instance auto& glBuf = glBuffers[&data]; // Upload vertex data using persistent buffer if(color) { registerBuffer(data.colorVertices, glBuf.vertexBuffer, copy, GL_ARRAY_BUFFER); } else if(normal) { registerBuffer(data.materialVertices, glBuf.vertexBuffer, copy, GL_ARRAY_BUFFER); } else { registerBuffer(data.pointVertices, glBuf.vertexBuffer, copy, GL_ARRAY_BUFFER); } // Upload index data using persistent buffer registerBuffer(data.indices, glBuf.indexBuffer, copy, GL_ELEMENT_ARRAY_BUFFER); setUniformsOpenGL(shader); // Position attribute (3 floats) if(color) { glVertexAttribPointer(positionAttrib, 3, GL_FLOAT, GL_FALSE, sizeof(ColorVertex), (void *) offsetof(ColorVertex, position)); } else if(normal) { glVertexAttribPointer(positionAttrib, 3, GL_FLOAT, GL_FALSE, sizeof(MaterialVertex), (void *) offsetof(MaterialVertex, position)); } else { glVertexAttribPointer(positionAttrib, 3, GL_FLOAT, GL_FALSE, sizeof(PointVertex), (void *) offsetof(PointVertex, position)); } glEnableVertexAttribArray(positionAttrib); if(normal && nlights > 0) { // Normal attribute (3 floats) glVertexAttribPointer(normalAttrib, 3, GL_FLOAT, GL_FALSE, color ? sizeof(ColorVertex) : sizeof(MaterialVertex), (void *) offsetof(MaterialVertex, normal)); glEnableVertexAttribArray(normalAttrib); } else if(!normal) { // Width attribute for points (1 float) glVertexAttribPointer(widthAttrib, 1, GL_FLOAT, GL_FALSE, sizeof(PointVertex), (void *) offsetof(PointVertex, width)); glEnableVertexAttribArray(widthAttrib); } // Material index attribute (1 int) if(color) { glVertexAttribIPointer(materialAttrib, 1, GL_INT, sizeof(ColorVertex), (void *) offsetof(ColorVertex, material)); } else if(normal) { glVertexAttribIPointer(materialAttrib, 1, GL_INT, sizeof(MaterialVertex), (void *) offsetof(MaterialVertex, material)); } else { glVertexAttribIPointer(materialAttrib, 1, GL_INT, sizeof(PointVertex), (void *) offsetof(PointVertex, material)); } glEnableVertexAttribArray(materialAttrib); if(color) { // Color attribute (4 floats) glVertexAttribPointer(colorAttrib, 4, GL_FLOAT, GL_FALSE, sizeof(ColorVertex), (void *) offsetof(ColorVertex, color)); glEnableVertexAttribArray(colorAttrib); } fpu_trap(false); // Work around FE_INVALID glDrawElements(drawType, data.indices.size(), GL_UNSIGNED_INT, (void *) 0); fpu_trap(settings::trap()); // Disable attribute arrays but keep VAO bound for next draw call glDisableVertexAttribArray(positionAttrib); if(normal && nlights > 0) glDisableVertexAttribArray(normalAttrib); if(!normal) glDisableVertexAttribArray(widthAttrib); glDisableVertexAttribArray(materialAttrib); if(color) glDisableVertexAttribArray(colorAttrib); glBindBuffer(GL_UNIFORM_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } void AsyGLRender::drawPoints() { drawBuffer(pointData,pixelShader,false,0); // GL_POINTS pointData.renderCount++; pointData.clear(); } void AsyGLRender::drawLines() { drawBuffer(lineData,materialShader[Opaque],false,1); // GL_LINES lineData.renderCount++; lineData.clear(); } void AsyGLRender::drawMaterials() { drawBuffer(materialData,materialShader[Opaque]); // default GL_TRIANGLES materialData.renderCount++; materialData.clear(); } void AsyGLRender::drawColors() { drawBuffer(colorData,colorShader[Opaque],true); // default GL_TRIANGLES colorData.renderCount++; colorData.clear(); } void AsyGLRender::drawTriangles() { drawBuffer(triangleData,generalShader[Opaque],true); // default GL_TRIANGLES triangleData.renderCount++; triangleData.clear(); } void AsyGLRender::aBufferTransparency() { // Collect transparent fragments glDepthMask(GL_FALSE); // Disregard depth drawBuffer(transparentData,transparentShader,true); glDepthMask(GL_TRUE); // Respect depth // Blend transparent fragments glDisable(GL_DEPTH_TEST); glUseProgram(blendShader); lastshader=blendShader; glUniform1ui(glGetUniformLocation(blendShader,"width"),Width); glUniform4f(glGetUniformLocation(blendShader,"background"), Background[0],Background[1],Background[2], Background[3]); fpu_trap(false); // Work around FE_INVALID glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDrawArrays(GL_TRIANGLES,0,3); fpu_trap(settings::trap()); glEnable(GL_DEPTH_TEST); } void AsyGLRender::drawTransparent() { if(ssbo) { glDisable(GL_MULTISAMPLE); aBufferTransparency(); glEnable(GL_MULTISAMPLE); } else { sortTriangles(); glDepthMask(GL_FALSE); // Don't write to depth buffer drawBuffer(transparentData,transparentShader,true,4); glDepthMask(GL_TRUE); // Write to depth buffer } transparentData.renderCount++; transparentData.clear(); } void AsyGLRender::drawBuffers() { copied=false; Opaque=transparentData.indices.empty(); bool transparent=!Opaque; if(ssbo) { if(transparent) { refreshBuffers(); if(!interlock) { resizeFragmentBuffer(); copied=true; } } } drawPoints(); drawLines(); drawMaterials(); drawColors(); drawTriangles(); if(transparent) { if(ssbo) copied=true; if(interlock) resizeFragmentBuffer(); drawTransparent(); } Opaque=0; } AsyGLRender::~AsyGLRender() { if (this->View) { ::glfwDestroyWindow(getRenderWindow()); glfwWindow = nullptr; } // Cleanup OpenGL resources glDeleteProgram(pixelShader); for(int i=0; i<2; ++i) { glDeleteProgram(materialShader[i]); glDeleteProgram(colorShader[i]); glDeleteProgram(generalShader[i]); } glDeleteProgram(countShader); glDeleteProgram(transparentShader); glDeleteProgram(blendShader); glDeleteProgram(zeroShader); glDeleteProgram(compressShader); glDeleteProgram(sum1Shader); glDeleteProgram(sum2Shader); glDeleteProgram(sum3Shader); if(vao) glDeleteVertexArrays(1, &vao); if(offsetBuffer) glDeleteBuffers(1, &offsetBuffer); if(indexBuffer) glDeleteBuffers(1, &indexBuffer); if(elementsBuffer) glDeleteBuffers(1, &elementsBuffer); if(countBuffer) glDeleteBuffers(1, &countBuffer); if(globalSumBuffer) glDeleteBuffers(1, &globalSumBuffer); if(fragmentBuffer) glDeleteBuffers(1, &fragmentBuffer); if(depthBuffer) glDeleteBuffers(1, &depthBuffer); if(opaqueBuffer) glDeleteBuffers(1, &opaqueBuffer); if(opaqueDepthBuffer) glDeleteBuffers(1, &opaqueDepthBuffer); if(feedbackBuffer) glDeleteBuffers(1, &feedbackBuffer); // Cleanup persistent vertex/index buffers from VertexBuffer instances for (auto& [vb, glBuf] : glBuffers) { if (glBuf.vertexBuffer) glDeleteBuffers(1, &glBuf.vertexBuffer); if (glBuf.indexBuffer) glDeleteBuffers(1, &glBuf.indexBuffer); } glBuffers.clear(); } void AsyGLRender::render(RenderFunctionArgs const& args) { #if !defined(_WIN32) setenv("XMODIFIERS","",true); #endif lastshader = -1; copyRenderArgs(args); nlights0 = nlights; // Save original for mode restoration pair maxtile=getSetting("maxtile"); maxTileWidth=(int) maxtile.getx(); maxTileHeight=(int) maxtile.gety(); if(maxTileWidth <= 0) maxTileWidth=1024; if(maxTileHeight <= 0) maxTileHeight=768; #ifdef HAVE_PTHREAD static bool initializedView=false; #endif if(!initialized) Fitscreen=1; if(!(initialized && interact::interactive)) { antialias=settings::getSetting("antialias") > 1; Aspect = args.width/args.height; initDisplay(args.width, args.height); // Force a hard viewport limit to work around direct rendering bugs. // Alternatively, one can use -glOptions=-indirect (with a performance // penalty). { pair maxViewport = settings::getSetting("maxviewport"); int maxWidth = maxViewport.getx() > 0 ? (int)ceil(maxViewport.getx()) : screenWidth; int maxHeight = maxViewport.gety() > 0 ? (int)ceil(maxViewport.gety()) : screenHeight; if(maxWidth <= 0) maxWidth = max(maxHeight, 2); if(maxHeight <= 0) maxHeight = max(maxWidth, 2); if(screenWidth <= 0) screenWidth=maxWidth; else screenWidth=min(screenWidth,maxWidth); if(screenHeight <= 0) screenHeight=maxHeight; else screenHeight=min(screenHeight,maxHeight); } } havewindow = View && threads; maxFragments = 0; clearMaterials(); shouldUpdateBuffers=true; initialized=true; #ifdef HAVE_PTHREAD if(threads && initializedView) { if(View) { // Called from asymain thread, main thread handles rendering hideWindow=false; threadMgr.messageQueue.enqueue(RendererMessage::updateRenderer); } else readyAfterExport=queueExport=true; return; } #endif // Create GLFW window BEFORE OpenGL initialization if(!glfwWindow) { // For non-View rendering, hide the window during creation to prevent flash if(!View) glfwWindowHint(GLFW_VISIBLE, 0); else { // Enable multisampling only for visible windows (matches reference GLUT behavior) Int multisample = getSetting("multisample"); if(multisample > 1) glfwWindowHint(GLFW_SAMPLES, multisample); } fpu_trap(false); // Work around FE_INVALID in Mesa/libgallium during GL initialization GLFWwindow* newWindow = glfwCreateRenderWindow(Width, Height, title.empty() ? Prefix.c_str() : title.c_str(), this); if(newWindow == nullptr) { cerr << "Failed to create GLFW window" << endl; exit(-1); } glfwWindow = newWindow; // Make context current before GLEW initialization (matching reference pattern) glfwMakeContextCurrent(newWindow); // Initialize GLEW immediately after context creation (matching reference pattern) glewExperimental = GL_TRUE; GLenum glewErr = glewInit(); if(glewErr != GLEW_OK) { const char *glVer = (const char *)glGetString(GL_VERSION); if(glVer == NULL) { cerr << "GLEW initialization error: " << glewGetErrorString(glewErr) << endl; exit(-1); } } // Set swap interval based on vsync setting (0 = no vsync, 1 = vsync) glfwRendererSwapInterval(getSetting("vsync") ? 1 : 0); const char *GLSL_VERSION=(const char *)glGetString(GL_SHADING_LANGUAGE_VERSION); if(GLSL_VERSION) GLSLversion=(int) (100*atof(GLSL_VERSION)+0.5); if(settings::verbose > 2) cout << "GLSL version " << GLSL_VERSION << " (GLSLversion=" << GLSLversion << ")" << endl; // Check multisampling { int samples = 0; glGetIntegerv(GL_SAMPLES, &samples); if(settings::verbose > 1 && samples > 1) cout << "Multisampling enabled with sample width " << samples << endl; } fpu_trap(settings::trap()); } #if defined(HAVE_COMPUTE_SHADER) GPUindexing=getSetting("GPUindexing"); GPUcompress=getSetting("GPUcompress"); #else GPUindexing=false; GPUcompress=false; #endif // Initialize GPU compute parameters if(GPUindexing) { localSize = settings::getSetting("GPUlocalSize"); checkpow2(localSize,"GPUlocalSize"); blockSize = settings::getSetting("GPUblockSize"); checkpow2(blockSize,"GPUblockSize"); groupSize = localSize * blockSize; } glClearColor(Background[0], Background[1], Background[2], Background[3]); if(View) { if(!getSetting("fitscreen")) Fitscreen=0; firstFit=true; fitscreen(); #ifdef HAVE_PTHREAD initializedView = true; #endif } glEnable(GL_DEPTH_TEST); mode = DRAWMODE_WIREFRAME; cycleMode(); ViewExport = View; havewindow = initialized && threads; // Enter main loop or export if(View) { // Process pending resize events from fitscreen() before entering the main loop, // so the framebuffer has reached its target size before the first frame renders. glfwPollEvents(); mainLoop(); } else { update(); display(); if(threads) { exportHandler(0); #ifdef HAVE_PTHREAD if(!pthread_equal(pthread_self(), threadMgr.mainthread)) threadMgr.endwait(threadMgr.readySignal, threadMgr.readyLock); #endif } else { exportHandler(0); quit(); } } } // RenderCallbacks interface implementation void AsyGLRender::onMouseButton(int button, int action, int mods) { AsyRender::onMouseButton(button, action, mods); } void AsyGLRender::onFramebufferResize(int width, int height) { AsyRender::onFramebufferResize(width, height); } void AsyGLRender::onScroll(double xoffset, double yoffset) { AsyRender::onScroll(xoffset, yoffset); } void AsyGLRender::onCursorPos(double xpos, double ypos) { AsyRender::onCursorPos(xpos, ypos); } void AsyGLRender::onKey(int key, int scancode, int action, int mods) { AsyRender::onKey(key, scancode, action, mods); } void AsyGLRender::onWindowFocus(int focused) {} void AsyGLRender::onClose() { exitHandler(0); } /** * Swap front and back buffers (GLFW-specific implementation). */ void AsyGLRender::swapBuffers() { glfwSwapBuffers(getRenderWindow()); } void frustum(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble nearVal, GLdouble farVal) { gl->frustum(left, right, bottom, top, nearVal, farVal); } void ortho(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble nearVal, GLdouble farVal) { gl->ortho(left, right, bottom, top, nearVal, farVal); } void AsyGLRender::update() { capzoom(); redraw=true; AsyRender::update(); } GLFWwindow* AsyGLRender::getRenderWindow() const { return glfwWindow; } void AsyGLRender::exportHandler(int) { readyAfterExport=true; Export(0); } void AsyGLRender::reshape(int width, int height) { // Call base class to handle dimension updates and projection AsyRender::reshape(width, height); // OpenGL-specific: update viewport and mark SSBO for reinitialization glViewport(0, 0, Width, Height); if(ssbo) initSSBO = true; } } // namespace camp #endif // HAVE_GL