mirror of
https://github.com/qgis/QGIS.git
synced 2025-12-17 00:05:41 -05:00
Given a candidate string, returns true if the candidate contains all the individual words from another string, regardless of their order.
852 lines
27 KiB
C++
852 lines
27 KiB
C++
/***************************************************************************
|
|
qgsstringutils.cpp
|
|
------------------
|
|
begin : June 2015
|
|
copyright : (C) 2015 by Nyall Dawson
|
|
email : nyall dot dawson at gmail dot com
|
|
***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
***************************************************************************/
|
|
|
|
#include "qgsstringutils.h"
|
|
#include "qgslogger.h"
|
|
#include <QVector>
|
|
#include <QStringList>
|
|
#include <QTextBoundaryFinder>
|
|
#include <QRegularExpression>
|
|
#include <cstdlib> // for std::abs
|
|
|
|
QString QgsStringUtils::capitalize( const QString &string, Qgis::Capitalization capitalization )
|
|
{
|
|
if ( string.isEmpty() )
|
|
return QString();
|
|
|
|
switch ( capitalization )
|
|
{
|
|
case Qgis::Capitalization::MixedCase:
|
|
case Qgis::Capitalization::SmallCaps:
|
|
return string;
|
|
|
|
case Qgis::Capitalization::AllUppercase:
|
|
return string.toUpper();
|
|
|
|
case Qgis::Capitalization::AllLowercase:
|
|
case Qgis::Capitalization::AllSmallCaps:
|
|
return string.toLower();
|
|
|
|
case Qgis::Capitalization::ForceFirstLetterToCapital:
|
|
{
|
|
QString temp = string;
|
|
|
|
QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
|
|
QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
|
|
|
|
wordSplitter.setPosition( 0 );
|
|
bool first = true;
|
|
while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
|
|
|| wordSplitter.toNextBoundary() >= 0 )
|
|
{
|
|
first = false;
|
|
letterSplitter.setPosition( wordSplitter.position() );
|
|
letterSplitter.toNextBoundary();
|
|
QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
|
|
temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
|
|
}
|
|
return temp;
|
|
}
|
|
|
|
case Qgis::Capitalization::TitleCase:
|
|
{
|
|
// yes, this is MASSIVELY simplifying the problem!!
|
|
|
|
static QStringList smallWords;
|
|
static QStringList newPhraseSeparators;
|
|
static QRegularExpression splitWords;
|
|
if ( smallWords.empty() )
|
|
{
|
|
smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
|
|
newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
|
|
splitWords = QRegularExpression( QStringLiteral( "\\b" ), QRegularExpression::UseUnicodePropertiesOption );
|
|
}
|
|
|
|
const bool allSameCase = string.toLower() == string || string.toUpper() == string;
|
|
const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
|
|
QString result;
|
|
bool firstWord = true;
|
|
int i = 0;
|
|
int lastWord = parts.count() - 1;
|
|
for ( const QString &word : std::as_const( parts ) )
|
|
{
|
|
if ( newPhraseSeparators.contains( word.trimmed() ) )
|
|
{
|
|
firstWord = true;
|
|
result += word;
|
|
}
|
|
else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
|
|
{
|
|
result += word.at( 0 ).toUpper() + word.mid( 1 );
|
|
firstWord = false;
|
|
}
|
|
else
|
|
{
|
|
result += word;
|
|
}
|
|
i++;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
case Qgis::Capitalization::UpperCamelCase:
|
|
QString result = QgsStringUtils::capitalize( string.toLower(), Qgis::Capitalization::ForceFirstLetterToCapital ).simplified();
|
|
result.remove( ' ' );
|
|
return result;
|
|
}
|
|
// no warnings
|
|
return string;
|
|
}
|
|
|
|
// original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
|
|
QString QgsStringUtils::ampersandEncode( const QString &string )
|
|
{
|
|
QString encoded;
|
|
for ( int i = 0; i < string.size(); ++i )
|
|
{
|
|
QChar ch = string.at( i );
|
|
if ( ch.unicode() > 160 )
|
|
encoded += QStringLiteral( "&#%1;" ).arg( static_cast< int >( ch.unicode() ) );
|
|
else if ( ch.unicode() == 38 )
|
|
encoded += QLatin1String( "&" );
|
|
else if ( ch.unicode() == 60 )
|
|
encoded += QLatin1String( "<" );
|
|
else if ( ch.unicode() == 62 )
|
|
encoded += QLatin1String( ">" );
|
|
else
|
|
encoded += ch;
|
|
}
|
|
return encoded;
|
|
}
|
|
|
|
int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
|
|
{
|
|
int length1 = string1.length();
|
|
int length2 = string2.length();
|
|
|
|
//empty strings? solution is trivial...
|
|
if ( string1.isEmpty() )
|
|
{
|
|
return length2;
|
|
}
|
|
else if ( string2.isEmpty() )
|
|
{
|
|
return length1;
|
|
}
|
|
|
|
//handle case sensitive flag (or not)
|
|
QString s1( caseSensitive ? string1 : string1.toLower() );
|
|
QString s2( caseSensitive ? string2 : string2.toLower() );
|
|
|
|
const QChar *s1Char = s1.constData();
|
|
const QChar *s2Char = s2.constData();
|
|
|
|
//strip out any common prefix
|
|
int commonPrefixLen = 0;
|
|
while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
|
|
{
|
|
commonPrefixLen++;
|
|
length1--;
|
|
length2--;
|
|
s1Char++;
|
|
s2Char++;
|
|
}
|
|
|
|
//strip out any common suffix
|
|
while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
|
|
{
|
|
length1--;
|
|
length2--;
|
|
}
|
|
|
|
//fully checked either string? if so, the answer is easy...
|
|
if ( length1 == 0 )
|
|
{
|
|
return length2;
|
|
}
|
|
else if ( length2 == 0 )
|
|
{
|
|
return length1;
|
|
}
|
|
|
|
//ensure the inner loop is longer
|
|
if ( length1 > length2 )
|
|
{
|
|
std::swap( s1, s2 );
|
|
std::swap( length1, length2 );
|
|
}
|
|
|
|
//levenshtein algorithm begins here
|
|
std::vector< int > col( length2 + 1, 0 );
|
|
std::vector< int > prevCol;
|
|
prevCol.reserve( length2 + 1 );
|
|
for ( int i = 0; i < length2 + 1; ++i )
|
|
{
|
|
prevCol.emplace_back( i );
|
|
}
|
|
const QChar *s2start = s2Char;
|
|
for ( int i = 0; i < length1; ++i )
|
|
{
|
|
col[0] = i + 1;
|
|
s2Char = s2start;
|
|
for ( int j = 0; j < length2; ++j )
|
|
{
|
|
col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
|
|
s2Char++;
|
|
}
|
|
col.swap( prevCol );
|
|
s1Char++;
|
|
}
|
|
return prevCol[length2];
|
|
}
|
|
|
|
QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
|
|
{
|
|
if ( string1.isEmpty() || string2.isEmpty() )
|
|
{
|
|
//empty strings, solution is trivial...
|
|
return QString();
|
|
}
|
|
|
|
//handle case sensitive flag (or not)
|
|
QString s1( caseSensitive ? string1 : string1.toLower() );
|
|
QString s2( caseSensitive ? string2 : string2.toLower() );
|
|
|
|
if ( s1 == s2 )
|
|
{
|
|
//another trivial case, identical strings
|
|
return s1;
|
|
}
|
|
|
|
int *currentScores = new int [ s2.length()];
|
|
int *previousScores = new int [ s2.length()];
|
|
int maxCommonLength = 0;
|
|
int lastMaxBeginIndex = 0;
|
|
|
|
const QChar *s1Char = s1.constData();
|
|
const QChar *s2Char = s2.constData();
|
|
const QChar *s2Start = s2Char;
|
|
|
|
for ( int i = 0; i < s1.length(); ++i )
|
|
{
|
|
for ( int j = 0; j < s2.length(); ++j )
|
|
{
|
|
if ( *s1Char != *s2Char )
|
|
{
|
|
currentScores[j] = 0;
|
|
}
|
|
else
|
|
{
|
|
if ( i == 0 || j == 0 )
|
|
{
|
|
currentScores[j] = 1;
|
|
}
|
|
else
|
|
{
|
|
currentScores[j] = 1 + previousScores[j - 1];
|
|
}
|
|
|
|
if ( maxCommonLength < currentScores[j] )
|
|
{
|
|
maxCommonLength = currentScores[j];
|
|
lastMaxBeginIndex = i;
|
|
}
|
|
}
|
|
s2Char++;
|
|
}
|
|
std::swap( currentScores, previousScores );
|
|
s1Char++;
|
|
s2Char = s2Start;
|
|
}
|
|
delete [] currentScores;
|
|
delete [] previousScores;
|
|
return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
|
|
}
|
|
|
|
int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
|
|
{
|
|
if ( string1.isEmpty() && string2.isEmpty() )
|
|
{
|
|
//empty strings, solution is trivial...
|
|
return 0;
|
|
}
|
|
|
|
if ( string1.length() != string2.length() )
|
|
{
|
|
//invalid inputs
|
|
return -1;
|
|
}
|
|
|
|
//handle case sensitive flag (or not)
|
|
QString s1( caseSensitive ? string1 : string1.toLower() );
|
|
QString s2( caseSensitive ? string2 : string2.toLower() );
|
|
|
|
if ( s1 == s2 )
|
|
{
|
|
//another trivial case, identical strings
|
|
return 0;
|
|
}
|
|
|
|
int distance = 0;
|
|
const QChar *s1Char = s1.constData();
|
|
const QChar *s2Char = s2.constData();
|
|
|
|
for ( int i = 0; i < string1.length(); ++i )
|
|
{
|
|
if ( *s1Char != *s2Char )
|
|
distance++;
|
|
s1Char++;
|
|
s2Char++;
|
|
}
|
|
|
|
return distance;
|
|
}
|
|
|
|
QString QgsStringUtils::soundex( const QString &string )
|
|
{
|
|
if ( string.isEmpty() )
|
|
return QString();
|
|
|
|
QString tmp = string.toUpper();
|
|
|
|
//strip non character codes, and vowel like characters after the first character
|
|
QChar *char1 = tmp.data();
|
|
QChar *char2 = tmp.data();
|
|
int outLen = 0;
|
|
for ( int i = 0; i < tmp.length(); ++i, ++char2 )
|
|
{
|
|
if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
|
|
&& ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
|
|
&& ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
|
|
&& ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
|
|
{
|
|
*char1 = *char2;
|
|
char1++;
|
|
outLen++;
|
|
}
|
|
}
|
|
tmp.truncate( outLen );
|
|
|
|
QChar *tmpChar = tmp.data();
|
|
tmpChar++;
|
|
for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
|
|
{
|
|
switch ( ( *tmpChar ).unicode() )
|
|
{
|
|
case 0x42:
|
|
case 0x46:
|
|
case 0x50:
|
|
case 0x56:
|
|
tmp.replace( i, 1, QChar( 0x31 ) );
|
|
break;
|
|
|
|
case 0x43:
|
|
case 0x47:
|
|
case 0x4A:
|
|
case 0x4B:
|
|
case 0x51:
|
|
case 0x53:
|
|
case 0x58:
|
|
case 0x5A:
|
|
tmp.replace( i, 1, QChar( 0x32 ) );
|
|
break;
|
|
|
|
case 0x44:
|
|
case 0x54:
|
|
tmp.replace( i, 1, QChar( 0x33 ) );
|
|
break;
|
|
|
|
case 0x4C:
|
|
tmp.replace( i, 1, QChar( 0x34 ) );
|
|
break;
|
|
|
|
case 0x4D:
|
|
case 0x4E:
|
|
tmp.replace( i, 1, QChar( 0x35 ) );
|
|
break;
|
|
|
|
case 0x52:
|
|
tmp.replace( i, 1, QChar( 0x36 ) );
|
|
break;
|
|
}
|
|
}
|
|
|
|
//remove adjacent duplicates
|
|
char1 = tmp.data();
|
|
char2 = tmp.data();
|
|
char2++;
|
|
outLen = 1;
|
|
for ( int i = 1; i < tmp.length(); ++i, ++char2 )
|
|
{
|
|
if ( *char2 != *char1 )
|
|
{
|
|
char1++;
|
|
*char1 = *char2;
|
|
outLen++;
|
|
if ( outLen == 4 )
|
|
break;
|
|
}
|
|
}
|
|
tmp.truncate( outLen );
|
|
if ( tmp.length() < 4 )
|
|
{
|
|
tmp.append( "000" );
|
|
tmp.truncate( 4 );
|
|
}
|
|
|
|
return tmp;
|
|
}
|
|
|
|
|
|
double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
|
|
{
|
|
QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
|
|
QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
|
|
|
|
int candidateLength = candidateNormalized.length();
|
|
int searchLength = searchNormalized.length();
|
|
int score = 0;
|
|
|
|
// if the candidate and the search term are empty, no other option than 0 score
|
|
if ( candidateLength == 0 || searchLength == 0 )
|
|
return score;
|
|
|
|
int candidateIdx = 0;
|
|
int searchIdx = 0;
|
|
// there is always at least one word
|
|
int maxScore = FUZZY_SCORE_WORD_MATCH;
|
|
|
|
bool isPreviousIndexMatching = false;
|
|
bool isWordOpen = true;
|
|
|
|
// loop trough each candidate char and calculate the potential max score
|
|
while ( candidateIdx < candidateLength )
|
|
{
|
|
QChar candidateChar = candidateNormalized[ candidateIdx++ ];
|
|
bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
|
|
|
|
// the first char is always the default score
|
|
if ( candidateIdx == 1 )
|
|
maxScore += FUZZY_SCORE_NEW_MATCH;
|
|
// every space character or underscore is a opportunity for a new word
|
|
else if ( isCandidateCharWordEnd )
|
|
maxScore += FUZZY_SCORE_WORD_MATCH;
|
|
// potentially we can match every other character
|
|
else
|
|
maxScore += FUZZY_SCORE_CONSECUTIVE_MATCH;
|
|
|
|
// we looped through all the characters
|
|
if ( searchIdx >= searchLength )
|
|
continue;
|
|
|
|
QChar searchChar = searchNormalized[ searchIdx ];
|
|
bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
|
|
|
|
// match!
|
|
if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
|
|
{
|
|
searchIdx++;
|
|
|
|
// if we have just successfully finished a word, give higher score
|
|
if ( isSearchCharWordEnd )
|
|
{
|
|
if ( isWordOpen )
|
|
score += FUZZY_SCORE_WORD_MATCH;
|
|
else if ( isPreviousIndexMatching )
|
|
score += FUZZY_SCORE_CONSECUTIVE_MATCH;
|
|
else
|
|
score += FUZZY_SCORE_NEW_MATCH;
|
|
|
|
isWordOpen = true;
|
|
}
|
|
// if we have consecutive characters matching, give higher score
|
|
else if ( isPreviousIndexMatching )
|
|
{
|
|
score += FUZZY_SCORE_CONSECUTIVE_MATCH;
|
|
}
|
|
// normal score for new independent character that matches
|
|
else
|
|
{
|
|
score += FUZZY_SCORE_NEW_MATCH;
|
|
}
|
|
|
|
isPreviousIndexMatching = true;
|
|
}
|
|
// if the current character does NOT match, we are sure we cannot build a word for now
|
|
else
|
|
{
|
|
isPreviousIndexMatching = false;
|
|
isWordOpen = false;
|
|
}
|
|
|
|
// if the search string is covered, check if the last match is end of word
|
|
if ( searchIdx >= searchLength )
|
|
{
|
|
bool isEndOfWord = ( candidateIdx >= candidateLength )
|
|
? true
|
|
: candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
|
|
|
|
if ( isEndOfWord )
|
|
score += FUZZY_SCORE_WORD_MATCH;
|
|
}
|
|
|
|
// QgsLogger::debug( QStringLiteral( "TMP: %1 | %2 | %3 | %4 | %5" ).arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
|
|
}
|
|
|
|
// QgsLogger::debug( QStringLiteral( "RES: %1 | %2" ).arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
|
|
// we didn't loop through all the search chars, it means, that they are not present in the current candidate
|
|
if ( searchIdx < searchLength )
|
|
score = 0;
|
|
|
|
return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
|
|
}
|
|
|
|
|
|
QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
|
|
{
|
|
QString converted = string;
|
|
|
|
// http://alanstorm.com/url_regex_explained
|
|
// note - there's more robust implementations available
|
|
const thread_local QRegularExpression urlRegEx( QStringLiteral( "((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))" ) );
|
|
const thread_local QRegularExpression protoRegEx( QStringLiteral( "^(?:f|ht)tps?://|file://" ) );
|
|
const thread_local QRegularExpression emailRegEx( QStringLiteral( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" ) );
|
|
|
|
int offset = 0;
|
|
bool found = false;
|
|
QRegularExpressionMatch match = urlRegEx.match( converted );
|
|
while ( match.hasMatch() )
|
|
{
|
|
found = true;
|
|
QString url = match.captured( 1 );
|
|
QString protoUrl = url;
|
|
if ( !protoRegEx.match( protoUrl ).hasMatch() )
|
|
{
|
|
protoUrl.prepend( "http://" );
|
|
}
|
|
QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
|
|
converted.replace( match.capturedStart( 1 ), url.length(), anchor );
|
|
offset = match.capturedStart( 1 ) + anchor.length();
|
|
match = urlRegEx.match( converted, offset );
|
|
}
|
|
|
|
offset = 0;
|
|
match = emailRegEx.match( converted );
|
|
while ( match.hasMatch() )
|
|
{
|
|
found = true;
|
|
QString email = match.captured( 1 );
|
|
QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
|
|
converted.replace( match.capturedStart( 1 ), email.length(), anchor );
|
|
offset = match.capturedStart( 1 ) + anchor.length();
|
|
match = emailRegEx.match( converted, offset );
|
|
}
|
|
|
|
if ( foundLinks )
|
|
*foundLinks = found;
|
|
|
|
return converted;
|
|
}
|
|
|
|
bool QgsStringUtils::isUrl( const QString &string )
|
|
{
|
|
const thread_local QRegularExpression rxUrl( QStringLiteral( "^(http|https|ftp|file)://\\S+$" ) );
|
|
return rxUrl.match( string ).hasMatch();
|
|
}
|
|
|
|
QString QgsStringUtils::htmlToMarkdown( const QString &html )
|
|
{
|
|
// Any changes in this function must be copied to qgscrashreport.cpp too
|
|
QString converted = html;
|
|
converted.replace( QLatin1String( "<br>" ), QLatin1String( "\n" ) );
|
|
converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
|
|
converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );
|
|
converted.replace( QLatin1String( "<pre>" ), QLatin1String( "\n```\n" ) );
|
|
converted.replace( QLatin1String( "</pre>" ), QLatin1String( "```\n" ) );
|
|
|
|
const thread_local QRegularExpression hrefRegEx( QStringLiteral( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" ) );
|
|
|
|
int offset = 0;
|
|
QRegularExpressionMatch match = hrefRegEx.match( converted );
|
|
while ( match.hasMatch() )
|
|
{
|
|
QString url = match.captured( 1 ).replace( QLatin1String( "\"" ), QString() );
|
|
url.replace( '\'', QString() );
|
|
QString name = match.captured( 2 );
|
|
QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
|
|
converted.replace( match.capturedStart(), match.capturedLength(), anchor );
|
|
offset = match.capturedStart() + anchor.length();
|
|
match = hrefRegEx.match( converted, offset );
|
|
}
|
|
|
|
return converted;
|
|
}
|
|
|
|
QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
|
|
{
|
|
if ( string.isEmpty() || length == 0 )
|
|
return string;
|
|
|
|
QString newstr;
|
|
QRegularExpression rx;
|
|
int delimiterLength = 0;
|
|
|
|
if ( !customDelimiter.isEmpty() )
|
|
{
|
|
rx.setPattern( QRegularExpression::escape( customDelimiter ) );
|
|
delimiterLength = customDelimiter.length();
|
|
}
|
|
else
|
|
{
|
|
// \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
|
|
rx.setPattern( QStringLiteral( "[\\x{200B}\\s]" ) );
|
|
delimiterLength = 1;
|
|
}
|
|
|
|
const QStringList lines = string.split( '\n' );
|
|
int strLength, strCurrent, strHit, lastHit;
|
|
|
|
for ( int i = 0; i < lines.size(); i++ )
|
|
{
|
|
const QString line = lines.at( i );
|
|
strLength = line.length();
|
|
if ( strLength <= length )
|
|
{
|
|
// shortcut, no wrapping required
|
|
newstr.append( line );
|
|
if ( i < lines.size() - 1 )
|
|
newstr.append( '\n' );
|
|
continue;
|
|
}
|
|
strCurrent = 0;
|
|
strHit = 0;
|
|
lastHit = 0;
|
|
|
|
while ( strCurrent < strLength )
|
|
{
|
|
// positive wrap value = desired maximum line width to wrap
|
|
// negative wrap value = desired minimum line width before wrap
|
|
if ( useMaxLineLength )
|
|
{
|
|
//first try to locate delimiter backwards
|
|
strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
|
|
if ( strHit == lastHit || strHit == -1 )
|
|
{
|
|
//if no new backward delimiter found, try to locate forward
|
|
strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
|
|
}
|
|
lastHit = strHit;
|
|
}
|
|
else
|
|
{
|
|
strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
|
|
}
|
|
if ( strHit > -1 )
|
|
{
|
|
newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
|
|
newstr.append( '\n' );
|
|
strCurrent = strHit + delimiterLength;
|
|
}
|
|
else
|
|
{
|
|
newstr.append( QStringView {line} .mid( strCurrent ) );
|
|
strCurrent = strLength;
|
|
}
|
|
}
|
|
if ( i < lines.size() - 1 )
|
|
newstr.append( '\n' );
|
|
}
|
|
|
|
return newstr;
|
|
}
|
|
|
|
QString QgsStringUtils::substituteVerticalCharacters( QString string )
|
|
{
|
|
string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
|
|
string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
|
|
string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
|
|
string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
|
|
string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
|
|
string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
|
|
string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
|
|
string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
|
|
string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
|
|
string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
|
|
string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
|
|
string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
|
|
string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
|
|
string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
|
|
string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
|
|
string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
|
|
string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
|
|
return string;
|
|
}
|
|
|
|
QString QgsStringUtils::qRegExpEscape( const QString &string )
|
|
{
|
|
// code and logic taken from the Qt source code
|
|
const QLatin1Char backslash( '\\' );
|
|
const int count = string.count();
|
|
|
|
QString escaped;
|
|
escaped.reserve( count * 2 );
|
|
for ( int i = 0; i < count; i++ )
|
|
{
|
|
switch ( string.at( i ).toLatin1() )
|
|
{
|
|
case '$':
|
|
case '(':
|
|
case ')':
|
|
case '*':
|
|
case '+':
|
|
case '.':
|
|
case '?':
|
|
case '[':
|
|
case '\\':
|
|
case ']':
|
|
case '^':
|
|
case '{':
|
|
case '|':
|
|
case '}':
|
|
escaped.append( backslash );
|
|
}
|
|
escaped.append( string.at( i ) );
|
|
}
|
|
return escaped;
|
|
}
|
|
|
|
QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )
|
|
{
|
|
const int charactersToTruncate = string.length() - maxLength;
|
|
if ( charactersToTruncate <= 0 )
|
|
return string;
|
|
|
|
// note we actually truncate an extra character, as we'll be replacing it with the ... character
|
|
const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
|
|
if ( truncateFrom <= 0 )
|
|
return QChar( 0x2026 );
|
|
|
|
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
|
|
return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) + string.midRef( truncateFrom + charactersToTruncate + 1 );
|
|
#else
|
|
return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );
|
|
#endif
|
|
}
|
|
|
|
bool QgsStringUtils::containsByWord( const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity )
|
|
{
|
|
if ( candidate.trimmed().isEmpty() )
|
|
return false;
|
|
|
|
const thread_local QRegularExpression rxWhitespace( QStringLiteral( "\\s+" ) );
|
|
const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
|
|
if ( parts.empty() )
|
|
return false;
|
|
for ( const QString &word : parts )
|
|
{
|
|
if ( !candidate.contains( word, sensitivity ) )
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
QgsStringReplacement::QgsStringReplacement( const QString &match, const QString &replacement, bool caseSensitive, bool wholeWordOnly )
|
|
: mMatch( match )
|
|
, mReplacement( replacement )
|
|
, mCaseSensitive( caseSensitive )
|
|
, mWholeWordOnly( wholeWordOnly )
|
|
{
|
|
if ( mWholeWordOnly )
|
|
{
|
|
mRx.setPattern( QStringLiteral( "\\b%1\\b" ).arg( mMatch ) );
|
|
mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
|
|
}
|
|
}
|
|
|
|
QString QgsStringReplacement::process( const QString &input ) const
|
|
{
|
|
QString result = input;
|
|
if ( !mWholeWordOnly )
|
|
{
|
|
return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
|
|
}
|
|
else
|
|
{
|
|
return result.replace( mRx, mReplacement );
|
|
}
|
|
}
|
|
|
|
QgsStringMap QgsStringReplacement::properties() const
|
|
{
|
|
QgsStringMap map;
|
|
map.insert( QStringLiteral( "match" ), mMatch );
|
|
map.insert( QStringLiteral( "replace" ), mReplacement );
|
|
map.insert( QStringLiteral( "caseSensitive" ), mCaseSensitive ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
|
|
map.insert( QStringLiteral( "wholeWord" ), mWholeWordOnly ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
|
|
return map;
|
|
}
|
|
|
|
QgsStringReplacement QgsStringReplacement::fromProperties( const QgsStringMap &properties )
|
|
{
|
|
return QgsStringReplacement( properties.value( QStringLiteral( "match" ) ),
|
|
properties.value( QStringLiteral( "replace" ) ),
|
|
properties.value( QStringLiteral( "caseSensitive" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ),
|
|
properties.value( QStringLiteral( "wholeWord" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ) );
|
|
}
|
|
|
|
QString QgsStringReplacementCollection::process( const QString &input ) const
|
|
{
|
|
QString result = input;
|
|
for ( const QgsStringReplacement &r : mReplacements )
|
|
{
|
|
result = r.process( result );
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
|
|
{
|
|
for ( const QgsStringReplacement &r : mReplacements )
|
|
{
|
|
QgsStringMap props = r.properties();
|
|
QDomElement propEl = doc.createElement( QStringLiteral( "replacement" ) );
|
|
QgsStringMap::const_iterator it = props.constBegin();
|
|
for ( ; it != props.constEnd(); ++it )
|
|
{
|
|
propEl.setAttribute( it.key(), it.value() );
|
|
}
|
|
elem.appendChild( propEl );
|
|
}
|
|
}
|
|
|
|
void QgsStringReplacementCollection::readXml( const QDomElement &elem )
|
|
{
|
|
mReplacements.clear();
|
|
QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral( "replacement" ) );
|
|
for ( int i = 0; i < nodelist.count(); i++ )
|
|
{
|
|
QDomElement replacementElem = nodelist.at( i ).toElement();
|
|
QDomNamedNodeMap nodeMap = replacementElem.attributes();
|
|
|
|
QgsStringMap props;
|
|
for ( int j = 0; j < nodeMap.count(); ++j )
|
|
{
|
|
props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
|
|
}
|
|
mReplacements << QgsStringReplacement::fromProperties( props );
|
|
}
|
|
|
|
}
|