Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HPCC-32795 Additional optimizations to replaceString #19239

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rtl/eclrtl/eclrtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6066,7 +6066,7 @@ void rtlAddExceptionTag(StringBuffer & errorText, const char * tag, const char *
void rtlSubstituteEmbeddedScript(size32_t &__lenResult, char * &__result, size32_t scriptChars, const char *script, size32_t outFieldsChars, const char *outFields, size32_t searchChars, const char *search)
{
StringBuffer result;
::replaceString(result, rtlUtf8Size(scriptChars, script), script, rtlUtf8Size(searchChars, search), search, rtlUtf8Size(outFieldsChars, outFields), outFields);
::replaceString(result, rtlUtf8Size(scriptChars, script), script, rtlUtf8Size(searchChars, search), search, rtlUtf8Size(outFieldsChars, outFields), outFields, true);
__lenResult = result.lengthUtf8();
__result = result.detach();
}
Expand Down
46 changes: 33 additions & 13 deletions system/jlib/jstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,13 +935,10 @@ StringBuffer & StringBuffer::replace(char oldChar, char newChar)
}

// Copy source to result, replacing all occurrences of "oldStr" with "newStr"
StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr)
bool replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr, bool forceCopy)
{
if (lenOldStr && lenSource >= lenOldStr)
{
// Avoid allocating an unnecessarly large buffer and match the source string
result.ensureCapacity(lenSource);

size_t offset = 0;
size_t lastCopied = 0;
size_t maxOffset = lenSource - lenOldStr + 1;
Expand All @@ -951,6 +948,10 @@ StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char
if (unlikely(source[offset] == firstChar)
&& unlikely((lenOldStr == 1) || memcmp(source + offset, oldStr, lenOldStr)==0))
{
// Wait to allocate memory until a match is found
if (!lastCopied)
result.ensureCapacity(lenSource); // Avoid allocating an unnecessarly large buffer and match the source string

// If lastCopied matches the offset nothing is appended, but we can avoid a test for offset == lastCopied
result.append(offset - lastCopied, source + lastCopied);
result.append(lenNewStr, newStr);
Expand All @@ -960,13 +961,16 @@ StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char
else
offset++;
}
// Append the remaining characters
result.append(lenSource - lastCopied, source + lastCopied);

if (lastCopied || forceCopy)
result.append(lenSource - lastCopied, source + lastCopied); // Append the remaining characters

return lastCopied;
}
else
else if (forceCopy)
result.append(lenSource, source); // Search string does not fit in source or is empty

return result;
return false;
}

StringBuffer &replaceVariables(StringBuffer & result, const char *source, bool exceptions, IVariableSubstitutionHelper *helper, const char* delim, const char* term)
Expand Down Expand Up @@ -1056,13 +1060,29 @@ StringBuffer &replaceStringNoCase(StringBuffer & result, size_t lenSource, const
// this method will replace all occurrences of "oldStr" with "newStr"
StringBuffer & StringBuffer::replaceString(const char* oldStr, const char* newStr)
{
if (curLen)
if (curLen && oldStr)
{
StringBuffer temp;
size_t oldlen = oldStr ? strlen(oldStr) : 0;
size_t oldlen = strlen(oldStr);
if (oldlen > curLen)
return *this;

size_t newlen = newStr ? strlen(newStr) : 0;
::replaceString(temp, curLen, buffer, oldlen, oldStr, newlen, newStr);
swapWith(temp);
if (oldlen == curLen)
{
if (memcmp(buffer, oldStr, oldlen) == 0)
{
if (newlen > curLen)
ensureCapacity(newlen);
memcpy(buffer, newStr, newlen);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I was reviewing this code (I am not because of general review comments) - this has a serious memory corruption if newlen > oldlen

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. I added ensureCapacity(newlen).

curLen = newlen;
}
}
else
{
StringBuffer temp;
if(::replaceString(temp, curLen, buffer, oldlen, oldStr, newlen, newStr))
swapWith(temp);
}
}
return *this;
}
Expand Down
2 changes: 1 addition & 1 deletion system/jlib/jstring.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ extern jlib_decl void decodeXML(ISimpleReadStream &in, StringBuffer &out, unsign
extern jlib_decl int utf8CharLen(unsigned char ch);
extern jlib_decl int utf8CharLen(const unsigned char *ch, unsigned maxsize = (unsigned)-1);

extern jlib_decl StringBuffer &replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr);
extern jlib_decl bool replaceString(StringBuffer & result, size_t lenSource, const char *source, size_t lenOldStr, const char* oldStr, size_t lenNewStr, const char* newStr, bool forceCopy = false);

interface IVariableSubstitutionHelper
{
Expand Down
20 changes: 20 additions & 0 deletions testing/unittests/jlibtests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,26 @@ void testEncodeCSVColumn()
source.set("abbabab");
source.replaceString("ab", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxbxxxxxx", source.str());

// Search string has same length as source string and matches
source.set("ababab");
source.replaceString("ababab", "xxxxxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxxxx", source.str());

// Search string has same length as source string and replace is smaller than source
source.set("ababab");
source.replaceString("ababab", "xxx");
CPPUNIT_ASSERT_EQUAL_STR("xxx", source.str());

// Search string has same length as source string and replace is larger than source
source.set("ababab");
source.replaceString("ababab", "xxxxxxxxx");
CPPUNIT_ASSERT_EQUAL_STR("xxxxxxxxx", source.str());

// Search string has same length as source string and does not match
source.set("ababab");
source.replaceString("ababac", "xxxxxx");
CPPUNIT_ASSERT_EQUAL_STR("ababab", source.str());
}
};

Expand Down
Loading