Skip to content

Commit 8be6575

Browse files
committed
ICU-23348 Fix thread safety issues with parsing and formatting in RuleBasedNumberFormat
1 parent b914389 commit 8be6575

File tree

3 files changed

+71
-46
lines changed

3 files changed

+71
-46
lines changed

icu4c/source/i18n/rbnf.cpp

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,10 @@ RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
785785
setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
786786
setRoundingMode(rhs.getRoundingMode());
787787

788+
if (lenient) {
789+
initializeCollator(status);
790+
}
791+
788792
capitalizationInfoSet = rhs.capitalizationInfoSet;
789793
capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
790794
capitalizationForStandAlone = rhs.capitalizationForStandAlone;
@@ -1202,7 +1206,11 @@ RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
12021206
(capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
12031207
// titlecase first word of currentResult, here use sentence iterator unlike current implementations
12041208
// in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1205-
currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1209+
// Clone the break iterator to avoid mutating it in a const method (thread safety).
1210+
LocalPointer<BreakIterator> iter(capitalizationBrkIter->clone());
1211+
if (iter.isValid()) {
1212+
currentResult.toTitle(iter.getAlias(), locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1213+
}
12061214
}
12071215
}
12081216
#endif
@@ -1272,6 +1280,10 @@ RuleBasedNumberFormat::setLenient(UBool enabled)
12721280
if (!enabled && collator) {
12731281
delete collator;
12741282
collator = nullptr;
1283+
} else if (enabled && collator == nullptr) {
1284+
// Eagerly initialize the collator so that getCollator() is thread-safe.
1285+
UErrorCode status = U_ZERO_ERROR;
1286+
initializeCollator(status);
12751287
}
12761288
#endif
12771289
}
@@ -1689,55 +1701,52 @@ RuleBasedNumberFormat::dispose()
16891701
//-----------------------------------------------------------------------
16901702

16911703
/**
1692-
* Returns the collator to use for lenient parsing. The collator is lazily created:
1693-
* this function creates it the first time it's called.
1694-
* @return The collator to use for lenient parsing, or null if lenient parsing
1695-
* is turned off.
1696-
*/
1697-
const RuleBasedCollator*
1698-
RuleBasedNumberFormat::getCollator() const
1704+
* Initializes the collator for lenient parsing. Must be called from
1705+
* non-const methods (setLenient, operator=) so that getCollator() can
1706+
* be thread-safe.
1707+
*/
1708+
void
1709+
RuleBasedNumberFormat::initializeCollator(UErrorCode &status)
16991710
{
17001711
#if !UCONFIG_NO_COLLATION
1701-
if (!fRuleSets) {
1702-
return nullptr;
1712+
if (collator != nullptr || !fRuleSets) {
1713+
return;
17031714
}
17041715

1705-
// lazy-evaluate the collator
1706-
if (collator == nullptr && lenient) {
1707-
// create a default collator based on the formatter's locale,
1708-
// then pull out that collator's rules, append any additional
1709-
// rules specified in the description, and create a _new_
1710-
// collator based on the combination of those rules
1711-
1712-
UErrorCode status = U_ZERO_ERROR;
1716+
Collator* temp = Collator::createInstance(locale, status);
1717+
RuleBasedCollator* newCollator;
1718+
if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != nullptr) {
1719+
if (lenientParseRules) {
1720+
UnicodeString rules(newCollator->getRules());
1721+
rules.append(*lenientParseRules);
17131722

1714-
Collator* temp = Collator::createInstance(locale, status);
1715-
RuleBasedCollator* newCollator;
1716-
if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != nullptr) {
1717-
if (lenientParseRules) {
1718-
UnicodeString rules(newCollator->getRules());
1719-
rules.append(*lenientParseRules);
1720-
1721-
newCollator = new RuleBasedCollator(rules, status);
1722-
// Exit if newCollator could not be created.
1723-
if (newCollator == nullptr) {
1724-
return nullptr;
1725-
}
1726-
} else {
1727-
temp = nullptr;
1728-
}
1729-
if (U_SUCCESS(status)) {
1730-
newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1731-
// cast away const
1732-
const_cast<RuleBasedNumberFormat*>(this)->collator = newCollator;
1733-
} else {
1734-
delete newCollator;
1723+
newCollator = new RuleBasedCollator(rules, status);
1724+
// Exit if newCollator could not be created.
1725+
if (newCollator == nullptr) {
1726+
status = U_MEMORY_ALLOCATION_ERROR;
1727+
return;
17351728
}
1729+
} else {
1730+
temp = nullptr;
1731+
}
1732+
if (U_SUCCESS(status)) {
1733+
newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1734+
collator = newCollator;
1735+
} else {
1736+
delete newCollator;
17361737
}
1737-
delete temp;
17381738
}
1739+
delete temp;
17391740
#endif
1741+
}
17401742

1743+
/**
1744+
* Returns the collator to use for lenient parsing, or null if lenient parsing
1745+
* is turned off.
1746+
*/
1747+
const RuleBasedCollator*
1748+
RuleBasedNumberFormat::getCollator() const
1749+
{
17411750
// if lenient-parse mode is off, this will be null
17421751
// (see setLenientParseMode())
17431752
return collator;

icu4c/source/i18n/unicode/rbnf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,6 +1131,7 @@ class U_I18N_API_CLASS RuleBasedNumberFormat : public NumberFormat {
11311131
friend class FractionalPartSubstitution;
11321132

11331133
inline NFRuleSet * getDefaultRuleSet() const;
1134+
void initializeCollator(UErrorCode &status);
11341135
const RuleBasedCollator * getCollator() const;
11351136
DecimalFormatSymbols * initializeDecimalFormatSymbols(UErrorCode &status);
11361137
const DecimalFormatSymbols * getDecimalFormatSymbols() const;

icu4j/main/core/src/main/java/com/ibm/icu/text/RuleBasedNumberFormat.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,11 @@ public Number parse(String text, ParsePosition parsePosition) {
13621362
*/
13631363
public void setLenientParseMode(boolean enabled) {
13641364
lenientParse = enabled;
1365+
if (enabled) {
1366+
// Eagerly initialize the scanner provider so that getLenientScannerProvider()
1367+
// is thread-safe when called from parse().
1368+
initializeLenientScannerProvider();
1369+
}
13651370
}
13661371

13671372
/**
@@ -1398,10 +1403,20 @@ public void setLenientScannerProvider(RbnfLenientScannerProvider scannerProvider
13981403
* @stable ICU 4.4
13991404
*/
14001405
public RbnfLenientScannerProvider getLenientScannerProvider() {
1401-
// there's a potential race condition if two threads try to set/get the scanner at
1402-
// the same time, but you get what you get, and you shouldn't be using this from
1403-
// multiple threads anyway.
14041406
if (scannerProvider == null && lenientParse && !lookedForScanner) {
1407+
initializeLenientScannerProvider();
1408+
}
1409+
1410+
return scannerProvider;
1411+
}
1412+
1413+
/**
1414+
* Attempts to instantiate the default lenient scanner provider. Called eagerly from
1415+
* setLenientParseMode() so that getLenientScannerProvider() does not need to perform lazy
1416+
* initialization from parse() (thread safety).
1417+
*/
1418+
private void initializeLenientScannerProvider() {
1419+
if (scannerProvider == null && !lookedForScanner) {
14051420
try {
14061421
lookedForScanner = true;
14071422
Class<?> cls = Class.forName("com.ibm.icu.impl.text.RbnfScannerProviderImpl");
@@ -1412,8 +1427,6 @@ public RbnfLenientScannerProvider getLenientScannerProvider() {
14121427
// any failure, we just ignore and return null
14131428
}
14141429
}
1415-
1416-
return scannerProvider;
14171430
}
14181431

14191432
/**
@@ -2066,10 +2079,12 @@ private String adjustForContext(String result) {
20662079
// should only happen when deserializing, etc.
20672080
capitalizationBrkIter = BreakIterator.getSentenceInstance(locale);
20682081
}
2082+
// Clone the break iterator to avoid mutating it (thread safety).
2083+
BreakIterator iter = (BreakIterator) capitalizationBrkIter.clone();
20692084
return UCharacter.toTitleCase(
20702085
locale,
20712086
result,
2072-
capitalizationBrkIter,
2087+
iter,
20732088
UCharacter.TITLECASE_NO_LOWERCASE
20742089
| UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
20752090
}

0 commit comments

Comments
 (0)