Skip to content
This repository was archived by the owner on Aug 31, 2021. It is now read-only.

Commit 162983e

Browse files
committed
[[ Bug 17232 ]] Correct fast code path for single unicode char in string
This patch corrects two problems. The first is a mismatch between the enums MCUnicodeCompareOptions and MCStringOptions. The second is to ensure that a single character needle is folded appropriately in MCUnicodeFirstIndexOfChar.
1 parent 7d8f286 commit 162983e

File tree

4 files changed

+47
-5
lines changed

4 files changed

+47
-5
lines changed

docs/notes/bugfix-17232.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Correct searching for a single character in a unicode string

libfoundation/include/foundation-unicode.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,9 @@ bool MCUnicodeCaseFold(const unichar_t *p_in, uindex_t p_in_length,
331331
enum MCUnicodeCompareOption
332332
{
333333
kMCUnicodeCompareOptionExact = 0, // Codepoint (not code unit!) equality
334-
kMCUnicodeCompareOptionNormalised = 1, // Normalise inputs before comparison
335-
kMCUnicodeCompareOptionCaseless = 2, // Both normalise and case fold
336-
kMCUnicodeCompareOptionFolded = 3, // Case fold inputs before comparison
334+
kMCUnicodeCompareOptionNormalised = 1, // Normalise inputs before comparison
335+
kMCUnicodeCompareOptionFolded = 2, // Case fold inputs before comparison
336+
kMCUnicodeCompareOptionCaseless = 3, // Both normalise and case fold
337337
};
338338

339339
////////////////////////////////////////////////////////////////////////////////

libfoundation/src/foundation-unicode.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,12 +1095,22 @@ bool MCUnicodeFirstIndexOfChar(const unichar_t *p_string, uindex_t p_string_leng
10951095
{
10961096
// Create filter chain for the string being searched
10971097
MCTextFilter* t_string_filter = MCTextFilterCreate(p_string, p_string_length, kMCStringEncodingUTF16, p_option);
1098-
1098+
1099+
// Process the needle codepoint according to the string options.
1100+
// We use NFC for normalization, so all single char unicode strings
1101+
// are already normalized. Therefore we just need to fold if
1102+
// caseless or folded.
1103+
codepoint_t t_processed_needle;
1104+
if (p_option == kMCUnicodeCompareOptionFolded || p_option == kMCUnicodeCompareOptionCaseless)
1105+
t_processed_needle = MCUnicodeGetCharacterProperty(p_needle, kMCUnicodePropertySimpleCaseFolding);
1106+
else
1107+
t_processed_needle = p_needle;
1108+
10991109
// Loop until we find the character
11001110
while (t_string_filter->HasData())
11011111
{
11021112
codepoint_t t_cp = t_string_filter->GetNextCodepoint();
1103-
if (t_cp == p_needle)
1113+
if (t_cp == t_processed_needle)
11041114
{
11051115
t_string_filter->MarkText();
11061116
r_index = t_string_filter->GetMarkedLength() - 1;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
script "CoreStringOffset"
2+
/*
3+
Copyright (C) 2016 LiveCode Ltd.
4+
5+
This file is part of LiveCode.
6+
7+
LiveCode is free software; you can redistribute it and/or modify it under
8+
the terms of the GNU General Public License v3 as published by the Free
9+
Software Foundation.
10+
11+
LiveCode is distributed in the hope that it will be useful, but WITHOUT ANY
12+
WARRANTY; without even the implied warranty of MERCHANTABILITY or
13+
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14+
for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with LiveCode. If not see <http://www.gnu.org/licenses/>. */
18+
19+
on TestOffsetSingleUnicodeChar
20+
local tNeedle, tHaystack
21+
-- Native needle
22+
put "B" into tNeedle
23+
-- Unicode pattern
24+
put "aAbBcCdDeEfFgG" & numToCodepoint(0x3B1) into tHaystack
25+
26+
set the caseSensitive to true
27+
TestAssert "offset of native needle in unicode string - case-sensitive", offset(tNeedle, tHaystack) is 4
28+
29+
set the caseSensitive to false
30+
TestAssert "offset of native needle in unicode string - caseless", offset(tNeedle, tHaystack) is 3
31+
end TestOffsetSingleUnicodeChar

0 commit comments

Comments
 (0)