Use OpenCL command queue

This commit is contained in:
Alessandro Pasotti 2018-04-19 11:42:05 +02:00
parent 16a49cddaa
commit b09df53354
8 changed files with 112 additions and 266 deletions

View File

@ -4,13 +4,19 @@ __kernel void processNineCellWindow( __global float *scanLine1,
__global float *scanLine2,
__global float *scanLine3,
__global float *resultLine,
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
__global float *rasterParams // mInputNodataValue, mOutputNodataValue, mZFactor, mCellSizeX, mCellSizeY
)
{
=======
__global float *rasterParams
) {
>>>>>>> Use OpenCL command queue
// Get the index of the current element
const int i = get_global_id(0);
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
if ( scanLine2[i+1] == rasterParams[0] )
{
resultLine[i] = rasterParams[1];
@ -36,5 +42,38 @@ __kernel void processNineCellWindow( __global float *scanLine1,
{
resultLine[i] = 180.0f + atan2pi( derX, derY ) * 180.0f;
}
=======
// Do the operation
//return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX))
float derX = calcFirstDer( scanLine1[i], scanLine2[i], scanLine3[i],
scanLine1[i+1], scanLine2[i+1], scanLine3[i+1],
scanLine1[i+2], scanLine2[i+2], scanLine3[i+2],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[3]
);
//return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
float derY = calcFirstDer( scanLine1[i+2], scanLine1[i+1], scanLine1[i],
scanLine2[i+2], scanLine2[i+1], scanLine2[i],
scanLine3[i+2], scanLine3[i+1], scanLine3[i],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[4]
);
if ( derX == rasterParams[1] || derY == rasterParams[1] ||
( derX == 0.0f && derY == 0.0f) )
{
resultLine[i] = rasterParams[1];
}
else
{
// 180.0 / M_PI = 57.29577951308232
float aspect = atan2( derX, derY ) * 57.29577951308232;
if ( aspect < 0 )
resultLine[i] = 90.0f - aspect;
else if (aspect > 90.0f)
// 360 + 90 = 450
resultLine[i] = 450.0f - aspect;
else
resultLine[i] = 90.0 - aspect;
>>>>>>> Use OpenCL command queue
}
}

View File

@ -4,13 +4,19 @@ __kernel void processNineCellWindow( __global float *scanLine1,
__global float *scanLine2,
__global float *scanLine3,
__global float *resultLine,
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
__global float *rasterParams // mInputNodataValue, mOutputNodataValue, mZFactor, mCellSizeX, mCellSizeY
)
{
=======
__global float *rasterParams
) {
>>>>>>> Use OpenCL command queue
// Get the index of the current element
const int i = get_global_id(0);
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
if ( scanLine2[i+1] == rasterParams[0] )
{
resultLine[i] = rasterParams[1];
@ -38,5 +44,31 @@ __kernel void processNineCellWindow( __global float *scanLine1,
res = atanpi( res );
resultLine[i] = res * 180.0f;
}
=======
// Do the operation
//return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX))
float derX = calcFirstDer( scanLine1[i], scanLine2[i], scanLine3[i],
scanLine1[i+1], scanLine2[i+1], scanLine3[i+1],
scanLine1[i+2], scanLine2[i+2], scanLine3[i+2],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[3]
);
//return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
float derY = calcFirstDer( scanLine1[i+2], scanLine1[i+1], scanLine1[i],
scanLine2[i+2], scanLine2[i+1], scanLine2[i],
scanLine3[i+2], scanLine3[i+1], scanLine3[i],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[4]
);
if ( derX == rasterParams[1] || derY == rasterParams[1] )
{
resultLine[i] = rasterParams[1];
}
else
{
float res = sqrt( derX * derX + derY * derY );
res = atanpi( res );
resultLine[i] = res * 180.0;
>>>>>>> Use OpenCL command queue
}
}

View File

@ -1,114 +0,0 @@
float calcFirstDer( float x11, float x21, float x31, float x12, float x22, float x32, float x13, float x23, float x33,
float mInputNodataValue, float mOutputNodataValue, float mZFactor, float mCellSize )
{
//the basic formula would be simple, but we need to test for nodata values...
//X: return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX));
//Y: return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
int weight = 0;
float sum = 0;
//first row
if ( x31 != mInputNodataValue && x11 != mInputNodataValue ) //the normal case
{
sum += ( x31 - x11 );
weight += 2;
}
else if ( x31 == mInputNodataValue && x11 != mInputNodataValue && x21 != mInputNodataValue ) //probably 3x3 window is at the border
{
sum += ( x21 - x11 );
weight += 1;
}
else if ( x11 == mInputNodataValue && x31 != mInputNodataValue && x21 != mInputNodataValue ) //probably 3x3 window is at the border
{
sum += ( x31 - x21 );
weight += 1;
}
//second row
if ( x32 != mInputNodataValue && x12 != mInputNodataValue ) //the normal case
{
sum += 2.0f * ( x32 - x12 );
weight += 4;
}
else if ( x32 == mInputNodataValue && x12 != mInputNodataValue && x22 != mInputNodataValue )
{
sum += 2.0f * ( x22 - x12 );
weight += 2;
}
else if ( x12 == mInputNodataValue && x32 != mInputNodataValue && x22 != mInputNodataValue )
{
sum += 2.0f * ( x32 - x22 );
weight += 2;
}
//third row
if ( x33 != mInputNodataValue && x13 != mInputNodataValue ) //the normal case
{
sum += ( x33 - x13 );
weight += 2;
}
else if ( x33 == mInputNodataValue && x13 != mInputNodataValue && x23 != mInputNodataValue )
{
sum += ( x23 - x13 );
weight += 1;
}
else if ( x13 == mInputNodataValue && x33 != mInputNodataValue && x23 != mInputNodataValue )
{
sum += ( x33 - x23 );
weight += 1;
}
if ( weight == 0 )
{
return mOutputNodataValue;
}
return sum / ( weight * mCellSize ) * mZFactor;
}
__kernel void processNineCellWindow( __global float *scanLine1,
__global float *scanLine2,
__global float *scanLine3,
__global float *resultLine,
__global float *rasterParams
) {
// Get the index of the current element
const int i = get_global_id(0);
// Do the operation
//return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX))
float derX = calcFirstDer( scanLine1[i], scanLine2[i], scanLine3[i],
scanLine1[i+1], scanLine2[i+1], scanLine3[i+1],
scanLine1[i+2], scanLine2[i+2], scanLine3[i+2],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[3]
);
//return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
float derY = calcFirstDer( scanLine1[i+2], scanLine1[i+1], scanLine1[i],
scanLine2[i+2], scanLine2[i+1], scanLine2[i],
scanLine3[i+2], scanLine3[i+1], scanLine3[i],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[4]
);
if ( derX == rasterParams[1] || derY == rasterParams[1] ||
( derX == 0.0f && derY == 0.0f) )
{
resultLine[i] = rasterParams[1];
}
else
{
// 180.0 / M_PI = 57.29577951308232
float aspect = atan2( derX, derY ) * 57.29577951308232;
if ( aspect < 0 )
resultLine[i] = 90.0f - aspect;
else if (aspect > 90.0f)
// 360 + 90 = 450
resultLine[i] = 450.0f - aspect;
else
resultLine[i] = 90.0 - aspect;
}
}

View File

@ -217,10 +217,20 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
// Prepare context and queue
cl::Context ctx = QgsOpenClUtils::context();
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
cl::CommandQueue queue( ctx );
//keep only three scanlines in memory at a time, make room for initial and final nodata
QgsOpenClUtils::CPLAllocator<float> scanLine( xSize + 2 );
=======
cl::Context::setDefault( ctx );
cl::CommandQueue queue( ctx );
//keep only three scanlines in memory at a time, make room for initial and final nodata
QgsOpenClUtils::CPLAllocator<float> scanLine1( xSize + 2 );
QgsOpenClUtils::CPLAllocator<float> scanLine2( xSize + 2 );
QgsOpenClUtils::CPLAllocator<float> scanLine3( xSize + 2 );
>>>>>>> Use OpenCL command queue
QgsOpenClUtils::CPLAllocator<float> resultLine( xSize );
// Cast to float (because double just crashes on some GPUs)
@ -289,7 +299,18 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
{
QgsDebugMsg( "Raster IO Error" );
}
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
queue.enqueueWriteBuffer( scanLine2Buffer, CL_TRUE, 0, bufferSize, scanLine.get() );
=======
}
else
{
//normally fetch only scanLine3 and release scanline 1 if we move forward one row
scanLine1.reset( scanLine2.release() );
scanLine2.reset( scanLine3.release() );
scanLine3.reset( xSize + 2 );
}
>>>>>>> Use OpenCL command queue
// Read scanline3: second real raster row
if ( GDALRasterIO( rasterBand, GF_Read, 0, i + 1, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
@ -320,6 +341,23 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
}
}
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
=======
// Set first and last extra colums to nodata
scanLine1[0] = scanLine1[xSize + 1] = mInputNodataValue;
scanLine2[0] = scanLine2[xSize + 1] = mInputNodataValue;
scanLine3[0] = scanLine3[xSize + 1] = mInputNodataValue;
// TODO: There is room for further optimization here: instead of replacing the buffers
// we could just replace just hthe new one (the top row) and switch the order
// of buffer arguments in the kernell call.
errorCode = cl::enqueueWriteBuffer( scanLine1Buffer, CL_TRUE, 0,
sizeof( float ) * ( xSize + 2 ), scanLine1.get() );
errorCode = cl::enqueueWriteBuffer( scanLine2Buffer, CL_TRUE, 0,
sizeof( float ) * ( xSize + 2 ), scanLine2.get() );
errorCode = cl::enqueueWriteBuffer( scanLine3Buffer, CL_TRUE, 0,
sizeof( float ) * ( xSize + 2 ), scanLine3.get() );
>>>>>>> Use OpenCL command queue
kernel( cl::EnqueueArgs(
queue,

View File

@ -1,106 +0,0 @@
float calcFirstDer( float x11, float x21, float x31, float x12, float x22, float x32, float x13, float x23, float x33,
float mInputNodataValue, float mOutputNodataValue, float mZFactor, float mCellSize )
{
//the basic formula would be simple, but we need to test for nodata values...
//X: return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX));
//Y: return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
int weight = 0;
float sum = 0;
//first row
if ( x31 != mInputNodataValue && x11 != mInputNodataValue ) //the normal case
{
sum += ( x31 - x11 );
weight += 2;
}
else if ( x31 == mInputNodataValue && x11 != mInputNodataValue && x21 != mInputNodataValue ) //probably 3x3 window is at the border
{
sum += ( x21 - x11 );
weight += 1;
}
else if ( x11 == mInputNodataValue && x31 != mInputNodataValue && x21 != mInputNodataValue ) //probably 3x3 window is at the border
{
sum += ( x31 - x21 );
weight += 1;
}
//second row
if ( x32 != mInputNodataValue && x12 != mInputNodataValue ) //the normal case
{
sum += 2.0f * ( x32 - x12 );
weight += 4;
}
else if ( x32 == mInputNodataValue && x12 != mInputNodataValue && x22 != mInputNodataValue )
{
sum += 2.0f * ( x22 - x12 );
weight += 2;
}
else if ( x12 == mInputNodataValue && x32 != mInputNodataValue && x22 != mInputNodataValue )
{
sum += 2.0f * ( x32 - x22 );
weight += 2;
}
//third row
if ( x33 != mInputNodataValue && x13 != mInputNodataValue ) //the normal case
{
sum += ( x33 - x13 );
weight += 2;
}
else if ( x33 == mInputNodataValue && x13 != mInputNodataValue && x23 != mInputNodataValue )
{
sum += ( x23 - x13 );
weight += 1;
}
else if ( x13 == mInputNodataValue && x33 != mInputNodataValue && x23 != mInputNodataValue )
{
sum += ( x33 - x23 );
weight += 1;
}
if ( weight == 0 )
{
return mOutputNodataValue;
}
return sum / ( weight * mCellSize ) * mZFactor;
}
__kernel void processNineCellWindow( __global float *scanLine1,
__global float *scanLine2,
__global float *scanLine3,
__global float *resultLine,
__global float *rasterParams
) {
// Get the index of the current element
const int i = get_global_id(0);
// Do the operation
//return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX))
float derX = calcFirstDer( scanLine1[i], scanLine2[i], scanLine3[i],
scanLine1[i+1], scanLine2[i+1], scanLine3[i+1],
scanLine1[i+2], scanLine2[i+2], scanLine3[i+2],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[3]
);
//return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
float derY = calcFirstDer( scanLine1[i+2], scanLine1[i+1], scanLine1[i],
scanLine2[i+2], scanLine2[i+1], scanLine2[i],
scanLine3[i+2], scanLine3[i+1], scanLine3[i],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[4]
);
if ( derX == rasterParams[1] || derY == rasterParams[1] )
{
resultLine[i] = rasterParams[1];
}
else
{
float res = sqrt( derX * derX + derY * derY );
res = atanpi( res );
resultLine[i] = res * 180.0;
}
}

View File

@ -94,8 +94,11 @@
#include "processing/qgs3dalgorithms.h"
#endif
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
#include "qgsgui.h"
#include "qgsnative.h"
=======
>>>>>>> Use OpenCL command queue
#ifdef HAVE_OPENCL
#include "qgsopenclutils.h"
#endif

View File

@ -528,35 +528,3 @@ cl::Program QgsOpenClUtils::buildProgram( const cl::Context &context, const QStr
}
return program;
}
cl::Program QgsOpenClUtils::buildProgram( const cl::Context &context, const QString &source, ExceptionBehavior exceptionBehavior )
{
cl::Program program;
try
{
program = cl::Program( context, source.toStdString( ) );
program.build( "-cl-std=CL1.1" );
}
catch ( cl::BuildError &e )
{
cl::BuildLogType build_logs = e.getBuildLog();
QString build_log;
if ( build_logs.size() > 0 )
build_log = QString::fromStdString( build_logs[0].second );
else
build_log = QObject::tr( "Build logs not available!" );
QString err = QObject::tr( "Error building OpenCL program: %1" )
.arg( build_log );
QgsMessageLog::logMessage( err, LOGMESSAGE_TAG, Qgis::Critical );
if ( exceptionBehavior == Throw )
throw e;
}
catch ( cl::Error &e )
{
QString err = QObject::tr( "Error %1 running OpenCL program in %2" )
.arg( errorText( e.err() ), QString::fromStdString( e.what() ) );
QgsMessageLog::logMessage( err, LOGMESSAGE_TAG, Qgis::Critical );
throw e;
}
return program;
}

View File

@ -65,7 +65,6 @@ class CORE_EXPORT QgsOpenClUtils
public:
<<<<<<< 79f0eadb05fe4d845ab29045c40c34e1e08b4710
/**
* The ExceptionBehavior enum define how exceptions generated by OpenCL should be treated
*/
@ -114,15 +113,6 @@ class CORE_EXPORT QgsOpenClUtils
*
* This function must always be called before using QGIS OpenCL utils
*/
=======
enum ExceptionBehavior
{
Catch,
Throw
};
static bool enabled();
>>>>>>> Wrap make program in OpenCL utils
static bool available();
//! Returns true if OpenCL is enabled in the user settings
@ -176,7 +166,6 @@ class CORE_EXPORT QgsOpenClUtils
//! Returns a string representation from an OpenCL \a errorCode
static QString errorText( const int errorCode );
<<<<<<< 79f0eadb05fe4d845ab29045c40c34e1e08b4710
/**
* Build the program from \a source in the given \a context and depending on \a exceptionBehavior
@ -192,9 +181,6 @@ class CORE_EXPORT QgsOpenClUtils
* no device were identified or OpenCL support is not available
* and enabled
*/
=======
static cl::Program buildProgram( const cl::Context &context, const QString &source, ExceptionBehavior exceptionBehavior = Catch );
>>>>>>> Wrap make program in OpenCL utils
static cl::Context context();
//! Returns the base path to OpenCL program directory