From 528302cc8952497513b44423dfca6fcf4857760c Mon Sep 17 00:00:00 2001 From: Alessandro Pasotti Date: Fri, 13 Jul 2018 15:11:21 +0200 Subject: [PATCH] [opencl] Use fast formula for hillshade Also optimize cl buffers --- src/analysis/raster/qgshillshadefilter.cpp | 26 ++++++++++++ src/analysis/raster/qgshillshadefilter.h | 11 ++++++ src/analysis/raster/qgsninecellfilter.cpp | 46 ++++++++++++++++++++++ 3 files changed, 83 insertions(+) diff --git a/src/analysis/raster/qgshillshadefilter.cpp b/src/analysis/raster/qgshillshadefilter.cpp index 90cafa37dcb..3862ac94092 100644 --- a/src/analysis/raster/qgshillshadefilter.cpp +++ b/src/analysis/raster/qgshillshadefilter.cpp @@ -70,6 +70,7 @@ void QgsHillshadeFilter::setLightAngle( float angle ) mSinZenithRad = std::sin( angle * static_cast( M_PI ) / 180.0f ); } +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c #ifdef HAVE_OPENCL void QgsHillshadeFilter::addExtraRasterParams( std::vector ¶ms ) @@ -82,3 +83,28 @@ void QgsHillshadeFilter::addExtraRasterParams( std::vector ¶ms ) } #endif +======= +void QgsHillshadeFilter::addExtraRasterParams( std::vector ¶ms ) +{ + float azimuthRad = -1 * mLightAzimuth * M_PI / 180.0; + float zenithRad = std::max( 0.0f, 90.0f - mLightAngle ) * M_PI / 180.0; + float cosZenithRad = std::cos( zenithRad ); + float cos_az_mul_cos_alt_mul_z = std::cos( azimuthRad ) * cosZenithRad * mZFactor; + float sin_az_mul_cos_alt_mul_z = std::sin( azimuthRad ) * cosZenithRad * mZFactor; + float cos_az_mul_cos_alt_mul_z_mul_254 = 254.0 * cos_az_mul_cos_alt_mul_z; + float sin_az_mul_cos_alt_mul_z_mul_254 = 254.0 * sin_az_mul_cos_alt_mul_z; + float square_z = mZFactor * mZFactor; + float sin_altRadians_mul_254 = 254.0 * std::sin( zenithRad ); + + // For fast formula from GDAL DEM + params.push_back( cos_az_mul_cos_alt_mul_z_mul_254 ); // 5 + params.push_back( sin_az_mul_cos_alt_mul_z_mul_254 ); // 6 + params.push_back( square_z ); // 7 + params.push_back( sin_altRadians_mul_254 ); // 8 + /*/ Slow formula + params.push_back( azimuthRad ); // 9 + params.push_back( zenithRad ); // 10 + */ + +} +>>>>>>> [opencl] Use fast formula for hillshade diff --git a/src/analysis/raster/qgshillshadefilter.h b/src/analysis/raster/qgshillshadefilter.h index 86d03d63583..f477e68d546 100644 --- a/src/analysis/raster/qgshillshadefilter.h +++ b/src/analysis/raster/qgshillshadefilter.h @@ -44,15 +44,21 @@ class ANALYSIS_EXPORT QgsHillshadeFilter: public QgsDerivativeFilter void setLightAngle( float angle ); private: +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c #ifdef HAVE_OPENCL +======= +>>>>>>> [opencl] Use fast formula for hillshade const QString openClProgramBaseName() const override { return QStringLiteral( "hillshade" ); } +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c #endif +======= +>>>>>>> [opencl] Use fast formula for hillshade float mLightAzimuth; float mLightAngle; // Precalculate for speed: @@ -67,6 +73,11 @@ class ANALYSIS_EXPORT QgsHillshadeFilter: public QgsDerivativeFilter void addExtraRasterParams( std::vector ¶ms ) override; #endif + + // QgsNineCellFilter interface + private: + + void addExtraRasterParams( std::vector ¶ms ) override; }; #endif // QGSHILLSHADEFILTER_H diff --git a/src/analysis/raster/qgsninecellfilter.cpp b/src/analysis/raster/qgsninecellfilter.cpp index cb397ef686c..1973859393d 100644 --- a/src/analysis/raster/qgsninecellfilter.cpp +++ b/src/analysis/raster/qgsninecellfilter.cpp @@ -261,21 +261,30 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee addExtraRasterParams( rasterParams ); std::size_t bufferSize( sizeof( float ) * ( xSize + 2 ) ); +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f std::size_t inputSize( sizeof( float ) * ( xSize ) ); ======= >>>>>>> [opencl] Reduce memory footprint and optimize +======= + std::size_t inputSize( sizeof( float ) * ( xSize ) ); +>>>>>>> [opencl] Use fast formula for hillshade cl::Buffer rasterParamsBuffer( queue, rasterParams.begin(), rasterParams.end(), true, false, nullptr ); cl::Buffer scanLine1Buffer( ctx, CL_MEM_READ_ONLY, bufferSize, nullptr, nullptr ); cl::Buffer scanLine2Buffer( ctx, CL_MEM_READ_ONLY, bufferSize, nullptr, nullptr ); cl::Buffer scanLine3Buffer( ctx, CL_MEM_READ_ONLY, bufferSize, nullptr, nullptr ); +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f cl::Buffer *scanLineBuffer[3] = {&scanLine1Buffer, &scanLine2Buffer, &scanLine3Buffer}; cl::Buffer resultLineBuffer( ctx, CL_MEM_WRITE_ONLY, inputSize, nullptr, nullptr ); ======= cl::Buffer resultLineBuffer( ctx, CL_MEM_WRITE_ONLY, sizeof( float ) * xSize, nullptr, nullptr ); >>>>>>> [opencl] Reduce memory footprint and optimize +======= + cl::Buffer *scanLineBuffer[3] = {&scanLine1Buffer, &scanLine2Buffer, &scanLine3Buffer}; + cl::Buffer resultLineBuffer( ctx, CL_MEM_WRITE_ONLY, inputSize, nullptr, nullptr ); +>>>>>>> [opencl] Use fast formula for hillshade // Create a program from the kernel source cl::Program program( QgsOpenClUtils::buildProgram( ctx, source, QgsOpenClUtils::ExceptionBehavior::Throw ) ); @@ -289,12 +298,18 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee cl::Buffer & > ( program, "processNineCellWindow" ); +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f // Rotate buffer index std::vector rowIndex = {0, 1, 2}; ======= >>>>>>> [opencl] Reduce memory footprint and optimize +======= + // Rotate buffer index + std::vector rowIndex = {0, 1, 2}; + +>>>>>>> [opencl] Use fast formula for hillshade // values outside the layer extent (if the 3x3 window is on the border) are sent to the processing method as (input) nodata values for ( int i = 0; i < ySize; ++i ) { @@ -310,18 +325,24 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee if ( i == 0 ) { +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f // Fill scanline 1 with (input) nodata for the values above the first row and // feed scanline2 with the first actual data row ======= // Fill scanline 1 with (input) nodata for the values above the first row and feed scanline2 with the first row >>>>>>> [opencl] Reduce memory footprint and optimize +======= + // Fill scanline 1 with (input) nodata for the values above the first row and + // feed scanline2 with the first actual data row +>>>>>>> [opencl] Use fast formula for hillshade for ( int a = 0; a < xSize + 2 ; ++a ) { scanLine[a] = mInputNodataValue; } queue.enqueueWriteBuffer( scanLine1Buffer, CL_TRUE, 0, bufferSize, scanLine.get() ); +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f // Read scanline2: first real raster row if ( GDALRasterIO( rasterBand, GF_Read, 0, i, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None ) @@ -346,14 +367,23 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee ======= // Read scanline2 if ( GDALRasterIO( rasterBand, GF_Read, 0, 0, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None ) +======= + // Read scanline2: first real raster row + if ( GDALRasterIO( rasterBand, GF_Read, 0, i, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None ) +>>>>>>> [opencl] Use fast formula for hillshade { QgsDebugMsg( "Raster IO Error" ); } queue.enqueueWriteBuffer( scanLine2Buffer, CL_TRUE, 0, bufferSize, scanLine.get() ); +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c // Read scanline3 if ( GDALRasterIO( rasterBand, GF_Read, 0, 0, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None ) >>>>>>> [opencl] Reduce memory footprint and optimize +======= + // Read scanline3: second real raster row + if ( GDALRasterIO( rasterBand, GF_Read, 0, i + 1, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None ) +>>>>>>> [opencl] Use fast formula for hillshade { QgsDebugMsg( "Raster IO Error" ); } @@ -362,6 +392,7 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee else { // Normally fetch only scanLine3 and move forward one row +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f // Read scanline 3, fill the last row with nodata values if it's the last iteration ======= @@ -370,12 +401,16 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee // Read scanline 3 >>>>>>> [opencl] Reduce memory footprint and optimize +======= + // Read scanline 3, fill the last row with nodata values if it's the last iteration +>>>>>>> [opencl] Use fast formula for hillshade if ( i == ySize - 1 ) //fill the row below the bottom with nodata values { for ( int a = 0; a < xSize + 2; ++a ) { scanLine[a] = mInputNodataValue; } +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0 } @@ -383,6 +418,9 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee // Overwrite from input, skip first and last ======= queue.enqueueWriteBuffer( scanLine3Buffer, CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0 +======= + queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0 +>>>>>>> [opencl] Use fast formula for hillshade } else // Overwrite from input, skip first and last >>>>>>> [opencl] Reduce memory footprint and optimize @@ -391,6 +429,7 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee { QgsDebugMsg( "Raster IO Error" ); } +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0 } @@ -414,6 +453,9 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee >>>>>>> Use OpenCL command queue ======= queue.enqueueWriteBuffer( scanLine3Buffer, CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0 +======= + queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0 +>>>>>>> [opencl] Use fast formula for hillshade } } >>>>>>> [opencl] Reduce memory footprint and optimize @@ -429,11 +471,15 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee rasterParamsBuffer ); +<<<<<<< a73bbbad21629d81b9b1d4217a096a930473eb5c <<<<<<< 3bad167572f04c553d1e3d60f9c15d3f8511365f queue.enqueueReadBuffer( resultLineBuffer, CL_TRUE, 0, inputSize, resultLine.get() ); ======= queue.enqueueReadBuffer( resultLineBuffer, CL_TRUE, 0, xSize * sizeof( float ), resultLine.get() ); >>>>>>> [opencl] Reduce memory footprint and optimize +======= + queue.enqueueReadBuffer( resultLineBuffer, CL_TRUE, 0, inputSize, resultLine.get() ); +>>>>>>> [opencl] Use fast formula for hillshade if ( GDALRasterIO( outputRasterBand, GF_Write, 0, i, xSize, 1, resultLine.get(), xSize, 1, GDT_Float32, 0, 0 ) != CE_None ) {