From 0936526ba7f531cef064068bcea60c0e4ec7fd7d Mon Sep 17 00:00:00 2001 From: Tudor Barascu Date: Thu, 18 Sep 2025 02:08:40 +0300 Subject: [PATCH] add count_substring expression --- resources/function_help/json/substr_count | 57 +++++++++++++++++++ src/core/expression/qgsexpressionfunction.cpp | 41 +++++++++++++ tests/src/core/testqgsexpression.cpp | 6 ++ 3 files changed, 104 insertions(+) create mode 100644 resources/function_help/json/substr_count diff --git a/resources/function_help/json/substr_count b/resources/function_help/json/substr_count new file mode 100644 index 00000000000..4ec07a73165 --- /dev/null +++ b/resources/function_help/json/substr_count @@ -0,0 +1,57 @@ +{ + "name": "substr_count", + "type": "function", + "groups": [ + "String" + ], + "description": "Counts the number of occurrences of a substring within a string. By default, the function counts non-overlapping occurrences (like in Python). If the third argument is set to true, the function counts overlapping occurrences (like in QT).", + "arguments": [ + { + "arg": "input", + "description": "The input string to search in." + }, + { + "arg": "substring", + "description": "The substring to search for." + }, + { + "arg": "overlapping", + "description": "Optional flag to control whether overlapping occurrences should be counted. Defaults to false (non-overlapping counting). Set to true to count overlapping occurrences." + } + ], + "examples": [ + { + "expression": "substr_count('banana', 'an')", + "returns": "2", + "description": "Counts non-overlapping occurrences of 'an' in 'banana'." + }, + { + "expression": "substr_count('Funniness', 'n')", + "returns": "3", + "description": "Counts non-overlapping occurrences of 'n' in 'Funniness'." + }, + { + "expression": "substr_count('aaaaa', 'aa')", + "returns": "2", + "description": "Counts non-overlapping occurrences of 'aa' in 'aaaaa'." + }, + { + "expression": "substr_count('aaaaa', 'aa', true)", + "returns": "4", + "description": "Counts overlapping occurrences of 'aa' in 'aaaaa'. The substring 'aa' appears four times in an overlapping fashion." + }, + { + "expression": "substr_count('BANANA', 'an')", + "returns": "0", + "description": "Counts non-overlapping occurrences of 'an' in 'BANANA'. Case-sensitive, so no match." + } + ], + "tags": [ + "text", + "find", + "string", + "substring", + "count", + "occurrence" + ] +} diff --git a/src/core/expression/qgsexpressionfunction.cpp b/src/core/expression/qgsexpressionfunction.cpp index 751b609ac8b..d2f44b54b7b 100644 --- a/src/core/expression/qgsexpressionfunction.cpp +++ b/src/core/expression/qgsexpressionfunction.cpp @@ -2698,6 +2698,41 @@ static QVariant fcnRight( const QVariantList &values, const QgsExpressionContext return string.right( pos ); } +static QVariant fcnSubstrCount( const QVariantList &values, const QgsExpressionContext *, QgsExpression *parent, const QgsExpressionNodeFunction * ) +{ + if ( values.length() < 2 || values.length() > 3 ) + return QVariant(); + + const QString input = QgsExpressionUtils::getStringValue( values.at( 0 ), parent ); + const QString substring = QgsExpressionUtils::getStringValue( values.at( 1 ), parent ); + + bool overlapping = false; + if ( values.length() == 3 ) + { + overlapping = values.at( 2 ).toBool(); + } + + if ( substring.isEmpty() ) + return QVariant( 0 ); + + int count = 0; + if ( overlapping ) + { + count = input.count( substring ); + } + else + { + int pos = 0; + while ( ( pos = input.indexOf( substring, pos ) ) != -1 ) + { + count++; + pos += substring.length(); + } + } + + return QVariant( count ); +} + static QVariant fcnLeft( const QVariantList &values, const QgsExpressionContext *, QgsExpression *parent, const QgsExpressionNodeFunction * ) { QString string = QgsExpressionUtils::getStringValue( values.at( 0 ), parent ); @@ -8714,6 +8749,12 @@ const QList &QgsExpression::Functions() << new QgsStaticExpressionFunction( QStringLiteral( "set_timezone" ), { QgsExpressionFunction::Parameter( QStringLiteral( "datetime" ) ), QgsExpressionFunction::Parameter( QStringLiteral( "timezone" ) ) }, fcnSetTimeZone, QStringLiteral( "Date and Time" ) ) << new QgsStaticExpressionFunction( QStringLiteral( "convert_timezone" ), { QgsExpressionFunction::Parameter( QStringLiteral( "datetime" ) ), QgsExpressionFunction::Parameter( QStringLiteral( "timezone" ) ) }, fcnConvertTimeZone, QStringLiteral( "Date and Time" ) ) << new QgsStaticExpressionFunction( QStringLiteral( "lower" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "string" ) ), fcnLower, QStringLiteral( "String" ) ) + << new QgsStaticExpressionFunction( QStringLiteral( "substr_count" ), QgsExpressionFunction::ParameterList() + << QgsExpressionFunction::Parameter( QStringLiteral( "input" ) ) + << QgsExpressionFunction::Parameter( QStringLiteral( "substring" ) ) + << QgsExpressionFunction::Parameter( QStringLiteral( "overlapping" ), true, false ), // Optional parameter with default value of false + fcnSubstrCount, + QStringLiteral( "String" ) ) << new QgsStaticExpressionFunction( QStringLiteral( "upper" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "string" ) ), fcnUpper, QStringLiteral( "String" ) ) << new QgsStaticExpressionFunction( QStringLiteral( "title" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "string" ) ), fcnTitle, QStringLiteral( "String" ) ) << new QgsStaticExpressionFunction( QStringLiteral( "trim" ), QgsExpressionFunction::ParameterList() << QgsExpressionFunction::Parameter( QStringLiteral( "string" ) ), fcnTrim, QStringLiteral( "String" ) ) diff --git a/tests/src/core/testqgsexpression.cpp b/tests/src/core/testqgsexpression.cpp index 82a556a596f..81c78ce1af4 100644 --- a/tests/src/core/testqgsexpression.cpp +++ b/tests/src/core/testqgsexpression.cpp @@ -1841,6 +1841,12 @@ class TestQgsExpression : public QObject QTest::newRow( "regexp_replace non greedy" ) << "regexp_replace('HeLLo','(?<=H).*?L', '-')" << false << QVariant( "H-Lo" ); QTest::newRow( "regexp_replace cap group" ) << "regexp_replace('HeLLo','(eL)', 'x\\\\1x')" << false << QVariant( "HxeLxLo" ); QTest::newRow( "regexp_replace invalid" ) << "regexp_replace('HeLLo','[[[', '-')" << true << QVariant(); + QTest::newRow( "substr_count basic" ) << "substr_count('banana', 'an')" << false << QVariant( 2 ); + QTest::newRow( "substr_count basic funny" ) << "substr_count('Funniness', 'n')" << false << QVariant( 3 ); + QTest::newRow( "substr_count non-overlapping counted" ) << "substr_count('aaaaa', 'aa')" << false << QVariant( 2 ); + QTest::newRow( "substr_count overlapping counted" ) << "substr_count('aaaaa', 'aa', true)" << false << QVariant( 4 ); + QTest::newRow( "substr_count empty needle" ) << "substr_count('abc', '')" << false << QVariant( 0 ); + QTest::newRow( "substr_count case sensitivity" ) << "substr_count('BANANA', 'an')" << false << QVariant( 0 ); QTest::newRow( "reverse string" ) << "reverse('HeLLo')" << false << QVariant( "oLLeH" ); QTest::newRow( "reverse empty string" ) << "reverse('')" << false << QVariant( "" ); QTest::newRow( "substr" ) << "substr('HeLLo', 3,2)" << false << QVariant( "LL" );