diff --git a/ChangeLog b/ChangeLog index fbd41e430..36cdaf74a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2024-23-09: Hugo Melder + + * Headers/Foundation/NSThread.h: + * Source/NSString.m: + * Source/NSThread.m: + Cache ICU collator in thread-local storage to avoid + expensive construction when comparing strings. + 2024-13-08: Hugo Melder * Source/NSOperation.m: diff --git a/Headers/Foundation/NSThread.h b/Headers/Foundation/NSThread.h index 079276dcf..6f003e2b6 100644 --- a/Headers/Foundation/NSThread.h +++ b/Headers/Foundation/NSThread.h @@ -72,6 +72,8 @@ GS_EXPORT_CLASS struct autorelease_thread_vars _autorelease_vars; id _gcontext; void *_runLoopInfo; // Per-thread runloop related info. + // Used to store a GSICUStringCollatorCache object for this thread. + id _stringCollatorCache; #endif #if GS_NONFRAGILE # if defined(GS_NSThread_IVARS) diff --git a/Source/NSString.m b/Source/NSString.m index eff926f14..b0c85b4af 100644 --- a/Source/NSString.m +++ b/Source/NSString.m @@ -69,6 +69,7 @@ #import "Foundation/NSObjCRuntime.h" #import "Foundation/NSScanner.h" #import "Foundation/NSUserDefaults.h" +#import "Foundation/NSThread.h" #import "Foundation/FoundationErrors.h" // For private method _decodePropertyListForKey: #import "Foundation/NSKeyedArchiver.h" @@ -530,6 +531,134 @@ static unsigned rootOf(NSString *s, unsigned l) return root; } +#if GS_USE_ICU == 1 + +/** + * Returns an ICU collator for the given locale and options, or returns + * NULL if a collator couldn't be created or the GNUstep comparison code + * should be used instead. + * + * Used in -[GSICUCollatorCache initWithMask:locale:] + */ +static UCollator * +_GSICUCollatorCreate(NSStringCompareOptions mask, const char *localeCString) +{ + UErrorCode status = U_ZERO_ERROR; + UCollator *coll; + + coll = ucol_open(localeCString, &status); + + if (U_SUCCESS(status)) + { + if (mask & (NSCaseInsensitiveSearch | NSDiacriticInsensitiveSearch)) + { + ucol_setStrength(coll, UCOL_PRIMARY); + } + else if (mask & NSCaseInsensitiveSearch) + { + ucol_setStrength(coll, UCOL_SECONDARY); + } + else if (mask & NSDiacriticInsensitiveSearch) + { + ucol_setStrength(coll, UCOL_PRIMARY); + ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); + } + + if (mask & NSNumericSearch) + { + ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); + } + + if (U_SUCCESS(status)) + { + return coll; + } + } + + ucol_close(coll); + return NULL; +} + +@interface GSICUCollatorCache : NSObject +{ + @public + UCollator *collator; + NSUInteger mask; + NSLocale *locale; +} +- (instancetype) initWithMask: (NSUInteger) aMask locale: (NSLocale *) aLocale; +- (void) dealloc; +@end + +@implementation GSICUCollatorCache +- (instancetype) initWithMask: (NSUInteger) aMask locale: (NSLocale *) aLocale +{ + const char *localeId; + self = [super init]; + if (self != nil) + { + mask = aMask; + ASSIGN(locale, aLocale); + localeId = [[locale localeIdentifier] UTF8String]; + collator = _GSICUCollatorCreate(mask, localeId); + if (NULL == collator) + { + DESTROY(self); + return nil; + } + } + return self; +} + +- (void) dealloc { + RELEASE(locale); + if (collator != NULL) + { + ucol_close(collator); + } + [super dealloc]; +} + +@end + + +@interface NSThread (StringCollatorCache) +- (id) _stringCollatorCache; +- (void) _setStringCollatorCache: (id)cache; +@end + +// The locale parameter must not be nil at this point. +static UCollator * +GSICUCachedCollator(NSStringCompareOptions mask, NSLocale *locale) +{ + NSThread *current; + GSICUCollatorCache *cache; + + current = [NSThread currentThread]; + cache = [current _stringCollatorCache]; + if (nil == cache) { + cache = [[GSICUCollatorCache alloc] initWithMask: mask locale: locale]; + [current _setStringCollatorCache: cache]; + [cache release]; + return cache->collator; + } + + // Do a pointer comparison first to avoid the overhead of isEqual: + // The locale instance is likely a global constant object. + // If this fails, do a full comparison. + if ((cache->locale == locale || [cache->locale isEqual: locale]) && mask == cache->mask) + { + return cache->collator; + } + else + { + cache = [[GSICUCollatorCache alloc] initWithMask: mask locale: locale]; + [current _setStringCollatorCache: cache]; + [cache release]; + return cache->collator; + } +} + @implementation NSString // NSString itself is an abstract class which provides factory @@ -660,92 +789,6 @@ @implementation NSString } -#if GS_USE_ICU == 1 -/** - * Returns an ICU collator for the given locale and options, or returns - * NULL if a collator couldn't be created or the GNUstep comparison code - * should be used instead. - */ -static UCollator * -GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale) -{ - UErrorCode status = U_ZERO_ERROR; - const char *localeCString; - UCollator *coll; - - if (mask & NSLiteralSearch) - { - return NULL; - } - - if (NO == [locale isKindOfClass: [NSLocale class]]) - { - if (nil == locale) - { - /* See comments below about the posix locale. - * It's bad for case insensitive search, but needed for numeric - */ - if (mask & NSNumericSearch) - { - locale = [NSLocale systemLocale]; - } - else - { - /* A nil locale should trigger POSIX collation (i.e. 'A'-'Z' sort - * before 'a'), and support for this was added in ICU 4.6 under the - * locale name en_US_POSIX, but it doesn't fit our requirements - * (e.g. 'e' and 'E' don't compare as equal with case insensitive - * comparison.) - so return NULL to indicate that the GNUstep - * comparison code should be used. - */ - return NULL; - } - } - else - { - locale = [NSLocale currentLocale]; - } - } - - localeCString = [[locale localeIdentifier] UTF8String]; - - if (localeCString != NULL && strcmp("", localeCString) == 0) - { - localeCString = NULL; - } - - coll = ucol_open(localeCString, &status); - - if (U_SUCCESS(status)) - { - if (mask & (NSCaseInsensitiveSearch | NSDiacriticInsensitiveSearch)) - { - ucol_setStrength(coll, UCOL_PRIMARY); - } - else if (mask & NSCaseInsensitiveSearch) - { - ucol_setStrength(coll, UCOL_SECONDARY); - } - else if (mask & NSDiacriticInsensitiveSearch) - { - ucol_setStrength(coll, UCOL_PRIMARY); - ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); - } - - if (mask & NSNumericSearch) - { - ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); - } - - if (U_SUCCESS(status)) - { - return coll; - } - } - - ucol_close(coll); - return NULL; -} #if defined(HAVE_UNICODE_UNORM2_H) || defined(HAVE_ICU_H) - (NSString *) _normalizedICUStringOfType: (const char*)normalization @@ -2851,9 +2894,18 @@ - (NSRange) rangeOfString: (NSString *)aString return result; } + if (locale == nil && (mask & NSNumericSearch) == 0) + { + return strRangeNsNs(self, aString, mask, searchRange); + } + else if (locale == nil) + { + locale = [NSLocale systemLocale]; + } + #if GS_USE_ICU == 1 { - UCollator *coll = GSICUCollatorOpen(mask, locale); + UCollator *coll = GSICUCachedCollator(mask, locale); if (NULL != coll) { @@ -2919,7 +2971,6 @@ - (NSRange) rangeOfString: (NSString *)aString GS_ENDITEMBUF2() GS_ENDITEMBUF() usearch_close(search); - ucol_close(coll); return result; } } @@ -5825,9 +5876,25 @@ - (NSComparisonResult) compare: (NSString *)string format: @"compare with nil"]; } + /* A nil locale should trigger POSIX collation (i.e. 'A'-'Z' sort + * before 'a'), and support for this was added in ICU 4.6 under the + * locale name en_US_POSIX, but it doesn't fit our requirements + * (e.g. 'e' and 'E' don't compare as equal with case insensitive + * comparison.) - so return NULL to indicate that the GNUstep + * comparison code should be used. + */ + if (locale == nil && (mask & NSNumericSearch) == 0) + { + return strCompNsNs(self, string, mask, compareRange); + } + else if (locale == nil) + { + locale = [NSLocale systemLocale]; + } + #if GS_USE_ICU == 1 { - UCollator *coll = GSICUCollatorOpen(mask, locale); + UCollator *coll = GSICUCachedCollator(mask, locale); if (coll != NULL) { @@ -5836,29 +5903,35 @@ - (NSComparisonResult) compare: (NSString *)string unichar *charsSelf; unichar *charsOther; UCollationResult result; + NSUInteger sizeSelf = countSelf * sizeof(unichar); + NSUInteger sizeOther = countOther * sizeof(unichar); + bool useStack = sizeSelf + sizeOther < 128; + + if (useStack) + { + charsSelf = alloca(sizeSelf); + charsOther = alloca(sizeOther); + } else { + charsSelf = NSZoneMalloc(NSDefaultMallocZone(), sizeSelf); + charsOther = NSZoneMalloc(NSDefaultMallocZone(), sizeOther); + } + - charsSelf = NSZoneMalloc(NSDefaultMallocZone(), - countSelf * sizeof(unichar)); - charsOther = NSZoneMalloc(NSDefaultMallocZone(), - countOther * sizeof(unichar)); // Copy to buffer - [self getCharacters: charsSelf range: compareRange]; [string getCharacters: charsOther range: NSMakeRange(0, countOther)]; result = ucol_strcoll(coll, charsSelf, countSelf, charsOther, countOther); - NSZoneFree(NSDefaultMallocZone(), charsSelf); - NSZoneFree(NSDefaultMallocZone(), charsOther); - ucol_close(coll); + if (!useStack) + { + NSZoneFree(NSDefaultMallocZone(), charsSelf); + NSZoneFree(NSDefaultMallocZone(), charsOther); + } - switch (result) - { - case UCOL_EQUAL: return NSOrderedSame; - case UCOL_GREATER: return NSOrderedDescending; - case UCOL_LESS: return NSOrderedAscending; - } + // UCollationResult enums are stable and match NSComparisonResult enums + return (NSComparisonResult)result; } } #endif diff --git a/Source/NSThread.m b/Source/NSThread.m index b6e028873..f3ec8d720 100644 --- a/Source/NSThread.m +++ b/Source/NSThread.m @@ -1194,6 +1194,7 @@ - (void) dealloc DESTROY(_target); DESTROY(_arg); DESTROY(_name); + DESTROY(_stringCollatorCache); if (_autorelease_vars.pool_cache != 0) { [NSAutoreleasePool _endThread: self]; @@ -1570,9 +1571,18 @@ - (NSMutableDictionary*) threadDictionary return _thread_dictionary; } +- (id) _stringCollatorCache +{ + return (id)self->_stringCollatorCache; +} +- (void) _setStringCollatorCache: (id) cache +{ + ASSIGN(self->_stringCollatorCache, cache); +} + @end - + @implementation NSThread (GSLockInfo)