Skip to content
This repository was archived by the owner on Aug 31, 2021. It is now read-only.

Commit f30207c

Browse files
author
runrevali
committed
[[ StringDelimiters ]] Allow configurable delimiters to be arbitrary strings
1 parent 311883f commit f30207c

16 files changed

+256
-125
lines changed

engine/src/chunk.cpp

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5421,9 +5421,18 @@ void MCChunk::compile_object_ptr(MCSyntaxFactoryRef ctxt)
54215421

54225422
////////////////////////////////////////////////////////////////////////////////
54235423

5424-
static bool need_increment(Chunk_term p_chunk_type)
5424+
static uindex_t delim_increment(MCExecContext& ctxt, Chunk_term p_chunk_type)
54255425
{
5426-
return (p_chunk_type == CT_LINE || p_chunk_type == CT_ITEM || p_chunk_type == CT_PARAGRAPH);
5426+
if (p_chunk_type == CT_PARAGRAPH)
5427+
return 1;
5428+
5429+
if (p_chunk_type == CT_LINE)
5430+
return MCStringGetLength(ctxt . GetLineDelimiter());
5431+
5432+
if (p_chunk_type == CT_ITEM)
5433+
return MCStringGetLength(ctxt . GetItemDelimiter());
5434+
5435+
return 0;
54275436
}
54285437

54295438
static bool MCStringsIsAmongTheChunksOfRange(MCExecContext& ctxt, MCStringRef p_chunk, MCStringRef p_string, Chunk_term p_chunk_type, MCStringOptions p_options, MCRange p_range)
@@ -5432,17 +5441,17 @@ static bool MCStringsIsAmongTheChunksOfRange(MCExecContext& ctxt, MCStringRef p_
54325441
if (!MCStringFind(p_string, p_range, p_chunk, p_options, &t_range))
54335442
return false;
54345443

5435-
char_t t_delimiter;
5444+
MCStringRef t_delimiter;
54365445
t_delimiter = p_chunk_type == CT_ITEM ? ctxt . GetItemDelimiter() : ctxt . GetLineDelimiter();
54375446

54385447
// if there is no delimiter to the left then continue searching the string.
54395448
if (t_range . offset != 0 &&
5440-
MCStringGetNativeCharAtIndex(p_string, t_range . offset - 1) != t_delimiter)
5449+
!MCStringSubstringEndsWith(p_string, MCRangeMake(0, t_range . offset), t_delimiter, p_options))
54415450
return MCStringsIsAmongTheChunksOfRange(ctxt, p_chunk, p_string, p_chunk_type, p_options, MCRangeMake(t_range . offset + t_range . length, p_range . length));
54425451

54435452
// if there is no delimiter to the right then continue searching the string.
54445453
if (t_range . offset + t_range . length != MCStringGetLength(p_string) &&
5445-
MCStringGetNativeCharAtIndex(p_string, t_range . offset + t_range . length) != t_delimiter)
5454+
!MCStringSubstringBeginsWith(p_string, MCRangeMake(t_range . offset + t_range . length, UINDEX_MAX), t_delimiter, p_options))
54465455
return MCStringsIsAmongTheChunksOfRange(ctxt, p_chunk, p_string, p_chunk_type, p_options, MCRangeMake(t_range . offset + t_range . length, p_range . length));
54475456

54485457
return true;
@@ -5481,17 +5490,17 @@ static bool MCStringsFindChunkInRange(MCExecContext& ctxt, MCStringRef p_string,
54815490
return false;
54825491

54835492
// Work out the delimiter.
5484-
char_t t_delimiter;
5493+
MCStringRef t_delimiter;
54855494
t_delimiter = p_chunk_type == CT_ITEM ? ctxt . GetItemDelimiter() : ctxt . GetLineDelimiter();
54865495

54875496
// If we are in wholeMatches mode, ensure the delimiter is either side.
54885497
if (ctxt . GetWholeMatches())
54895498
{
54905499
if (t_range . offset > 0 &&
5491-
MCStringGetNativeCharAtIndex(p_string, t_range . offset - 1) != t_delimiter)
5500+
!MCStringSubstringEndsWith(p_string, MCRangeMake(0, t_range . offset), t_delimiter, p_options))
54925501
return MCStringsFindChunkInRange(ctxt, p_string, p_needle, p_chunk_type, p_options, MCRangeMake(t_range . offset + t_range . length, p_range . length), r_offset);
54935502
if (t_range . offset + t_range . length < MCStringGetLength(p_string) &&
5494-
MCStringGetNativeCharAtIndex(p_string, t_range . offset + t_range . length) != t_delimiter)
5503+
!MCStringSubstringBeginsWith(p_string, MCRangeMake(t_range . offset + t_range . length, UINDEX_MAX), t_delimiter, p_options))
54955504
return MCStringsFindChunkInRange(ctxt, p_string, p_needle, p_chunk_type, p_options, MCRangeMake(t_range . offset + t_range . length + 1, p_range . length), r_offset);
54965505
}
54975506

@@ -5639,8 +5648,8 @@ bool MCTextChunkIterator::next(MCExecContext& ctxt)
56395648

56405649
uindex_t t_offset = range . offset + range . length;
56415650

5642-
if (!first_chunk && need_increment(type))
5643-
t_offset++;
5651+
if (!first_chunk)
5652+
t_offset += delim_increment(ctxt, type);
56445653

56455654
if (t_offset >= length)
56465655
return false;
@@ -5653,13 +5662,13 @@ bool MCTextChunkIterator::next(MCExecContext& ctxt)
56535662
case CT_LINE:
56545663
case CT_ITEM:
56555664
{
5656-
char_t t_line_delimiter = ctxt . GetLineDelimiter();
5657-
char_t t_item_delimiter = ctxt . GetItemDelimiter();
5665+
MCStringRef t_line_delimiter = ctxt . GetLineDelimiter();
5666+
MCStringRef t_item_delimiter = ctxt . GetItemDelimiter();
56585667

5659-
char_t t_delimiter = (type == CT_LINE) ? t_line_delimiter : t_item_delimiter;
5668+
MCStringRef t_delimiter = (type == CT_LINE) ? t_line_delimiter : t_item_delimiter;
56605669

56615670
// calculate the length of the line / item
5662-
if (!MCStringFirstIndexOfChar(text, t_delimiter, t_offset, kMCCompareExact, t_offset))
5671+
if (!MCStringFirstIndexOf(text, t_delimiter, t_offset, kMCCompareExact, t_offset))
56635672
{
56645673
range . length = length - range . offset;
56655674
exhausted = true;
@@ -5831,12 +5840,12 @@ uindex_t MCTextChunkIterator::chunkoffset(MCExecContext& ctxt, MCStringRef p_nee
58315840
return 0;
58325841
}
58335842

5834-
codepoint_t t_delimiter;
5843+
MCStringRef t_delimiter;
58355844
t_delimiter = type == CT_ITEM ? ctxt . GetItemDelimiter() : ctxt . GetLineDelimiter();
58365845

58375846
// Count the number of delimiters between the start of the first chunk
58385847
// and the start of the found string.
5839-
t_chunk_offset += MCStringCountChar(text, MCRangeMake(range . offset, t_found_offset - range . offset), t_delimiter, t_options);
5848+
t_chunk_offset += MCStringCount(text, MCRangeMake(range . offset, t_found_offset - range . offset), t_delimiter, t_options);
58405849

58415850
if (type == CT_PARAGRAPH)
58425851
t_chunk_offset += MCStringCountChar(text, MCRangeMake(range . offset, t_found_offset - range . offset), 0x2029, t_options);

engine/src/cmdsm.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,7 +1158,6 @@ void MCArrayOp::exec_ctxt(MCExecContext &ctxt)
11581158

11591159
MCAutoStringRef t_element_del;
11601160
MCAutoStringRef t_key_del;
1161-
codepoint_t t_delim_char;
11621161
uint4 chunk;
11631162
chunk = mode;
11641163
switch(chunk)
@@ -1174,20 +1173,16 @@ void MCArrayOp::exec_ctxt(MCExecContext &ctxt)
11741173
}
11751174
break;
11761175
case TYPE_ROW:
1177-
t_delim_char = ctxt.GetRowDelimiter();
1178-
/* UNCHECKED */ MCStringCreateWithBytes((const byte_t*)&t_delim_char, 4, kMCStringEncodingUTF32, false, &t_element_del);
1176+
t_element_del = ctxt.GetRowDelimiter();
11791177
break;
11801178
case TYPE_COLUMN:
1181-
t_delim_char = ctxt.GetColumnDelimiter();
1182-
/* UNCHECKED */ MCStringCreateWithBytes((const byte_t*)&t_delim_char, 4, kMCStringEncodingUTF32, false, &t_element_del);
1179+
t_element_del = ctxt.GetColumnDelimiter();
11831180
break;
11841181
case TYPE_LINE:
1185-
t_delim_char = ctxt.GetLineDelimiter();
1186-
/* UNCHECKED */ MCStringCreateWithBytes((const byte_t*)&t_delim_char, 4, kMCStringEncodingUTF32, false, &t_element_del);
1182+
t_element_del = ctxt.GetLineDelimiter();
11871183
break;
11881184
case TYPE_ITEM:
1189-
t_delim_char = ctxt.GetItemDelimiter();
1190-
/* UNCHECKED */ MCStringCreateWithBytes((const byte_t*)&t_delim_char, 4, kMCStringEncodingUTF32, false, &t_element_del);
1185+
t_element_del = ctxt.GetItemDelimiter();
11911186
break;
11921187
case TYPE_WORD:
11931188
case TYPE_TOKEN:

engine/src/exec-array.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ void MCArraysExecCombineByRowOrColumn(MCExecContext& ctxt, MCArrayRef p_array, b
245245
return;
246246
}
247247

248-
char_t t_delimiter;
248+
MCStringRef t_delimiter;
249249
if (p_is_row)
250250
t_delimiter = ctxt . GetRowDelimiter();
251251
else
@@ -349,7 +349,7 @@ void MCArraysExecSplit(MCExecContext& ctxt, MCStringRef p_string, MCStringRef p_
349349

350350
void MCArraysExecSplitByColumn(MCExecContext& ctxt, MCStringRef p_string, MCArrayRef& r_array)
351351
{
352-
codepoint_t t_row_delim, t_column_delim;
352+
MCStringRef t_row_delim, t_column_delim;
353353
t_row_delim = ctxt . GetRowDelimiter();
354354
t_column_delim = ctxt . GetColumnDelimiter();
355355

@@ -372,7 +372,7 @@ void MCArraysExecSplitByColumn(MCExecContext& ctxt, MCStringRef p_string, MCArra
372372
{
373373
// Find the end of this row
374374
uindex_t t_row_end;
375-
if (!MCStringFirstIndexOfChar(p_string, t_row_delim, t_offset, ctxt . GetCaseSensitive(), t_row_end))
375+
if (!MCStringFirstIndexOf(p_string, t_row_delim, t_offset, ctxt . GetCaseSensitive(), t_row_end))
376376
t_row_end = t_length;
377377

378378
// Iterate over the cells of this row
@@ -383,7 +383,7 @@ void MCArraysExecSplitByColumn(MCExecContext& ctxt, MCStringRef p_string, MCArra
383383
{
384384
// Find the end of this cell
385385
uindex_t t_cell_end;
386-
if (!MCStringFirstIndexOfChar(p_string, t_column_delim, t_cell_offset, ctxt . GetCaseSensitive(), t_cell_end) || t_cell_end > t_row_end)
386+
if (!MCStringFirstIndexOf(p_string, t_column_delim, t_cell_offset, ctxt . GetCaseSensitive(), t_cell_end) || t_cell_end > t_row_end)
387387
t_cell_end = t_row_end;
388388

389389
// Check that the output array has a slot for this column
@@ -398,9 +398,9 @@ void MCArraysExecSplitByColumn(MCExecContext& ctxt, MCStringRef p_string, MCArra
398398
t_success = MCStringMutableCopySubstring(p_string, t_range, t_temp_array[t_column_index]);
399399
else
400400
{
401-
t_success = MCStringAppendChar(t_temp_array[t_column_index], t_row_delim);
401+
t_success = MCStringAppend(t_temp_array[t_column_index], t_row_delim);
402402
if (t_success)
403-
t_success = MCStringAppendFormat(t_temp_array[t_column_delim], "%*@", t_range, p_string);
403+
t_success = MCStringAppendFormat(t_temp_array[t_column_index], "%*@", t_range, p_string);
404404
}
405405

406406
if (!t_success)
@@ -411,11 +411,11 @@ void MCArraysExecSplitByColumn(MCExecContext& ctxt, MCStringRef p_string, MCArra
411411

412412
// Next cell
413413
t_column_index++;
414-
t_cell_offset = t_cell_end + 1;
414+
t_cell_offset = t_cell_end + MCStringGetLength(t_column_delim);
415415
}
416416

417417
// Next row
418-
t_offset = t_row_end;
418+
t_offset = t_row_end + MCStringGetLength(t_row_delim);
419419
}
420420

421421
// Convert the temporary array into a "proper" array

engine/src/exec-engine.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1467,44 +1467,44 @@ void MCEngineGetConvertOctals(MCExecContext& ctxt, bool& r_value)
14671467
r_value = ctxt . GetConvertOctals();
14681468
}
14691469

1470-
void MCEngineSetItemDelimiter(MCExecContext& ctxt, char_t p_value)
1470+
void MCEngineSetItemDelimiter(MCExecContext& ctxt, MCStringRef p_value)
14711471
{
14721472
ctxt . SetItemDelimiter(p_value);
14731473
}
14741474

1475-
void MCEngineGetItemDelimiter(MCExecContext& ctxt, char_t& r_value)
1475+
void MCEngineGetItemDelimiter(MCExecContext& ctxt, MCStringRef& r_value)
14761476
{
1477-
r_value = ctxt . GetItemDelimiter();
1477+
r_value = MCValueRetain(ctxt . GetItemDelimiter());
14781478
}
14791479

1480-
void MCEngineSetLineDelimiter(MCExecContext& ctxt, char_t p_value)
1480+
void MCEngineSetLineDelimiter(MCExecContext& ctxt, MCStringRef p_value)
14811481
{
14821482
ctxt . SetLineDelimiter(p_value);
14831483
}
14841484

1485-
void MCEngineGetLineDelimiter(MCExecContext& ctxt, char_t& r_value)
1485+
void MCEngineGetLineDelimiter(MCExecContext& ctxt, MCStringRef& r_value)
14861486
{
1487-
r_value = ctxt . GetLineDelimiter();
1487+
r_value = MCValueRetain(ctxt . GetLineDelimiter());
14881488
}
14891489

1490-
void MCEngineSetColumnDelimiter(MCExecContext& ctxt, char_t p_value)
1490+
void MCEngineSetColumnDelimiter(MCExecContext& ctxt, MCStringRef p_value)
14911491
{
14921492
ctxt . SetColumnDelimiter(p_value);
14931493
}
14941494

1495-
void MCEngineGetColumnDelimiter(MCExecContext& ctxt, char_t& r_value)
1495+
void MCEngineGetColumnDelimiter(MCExecContext& ctxt, MCStringRef& r_value)
14961496
{
1497-
r_value = ctxt . GetColumnDelimiter();
1497+
r_value = MCValueRetain(ctxt . GetColumnDelimiter());
14981498
}
14991499

1500-
void MCEngineSetRowDelimiter(MCExecContext& ctxt, char_t p_value)
1500+
void MCEngineSetRowDelimiter(MCExecContext& ctxt, MCStringRef p_value)
15011501
{
15021502
ctxt . SetRowDelimiter(p_value);
15031503
}
15041504

1505-
void MCEngineGetRowDelimiter(MCExecContext& ctxt, char_t& r_value)
1505+
void MCEngineGetRowDelimiter(MCExecContext& ctxt, MCStringRef& r_value)
15061506
{
1507-
r_value = ctxt . GetRowDelimiter();
1507+
r_value = MCValueRetain(ctxt . GetRowDelimiter());
15081508
}
15091509

15101510
void MCEngineSetWholeMatches(MCExecContext& ctxt, bool p_value)

engine/src/exec-interface.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3927,7 +3927,7 @@ bool MCInterfaceExecSortContainer(MCExecContext &ctxt, MCStringRef p_data, int p
39273927
// all other forms of search default to the lineDelimiter for now. Note that this is a slight
39283928
// change of behavior as previously sorting containers by line ignored the lineDelimiter and
39293929
// always delimited by ascii 10.
3930-
char t_delimiter;
3930+
MCStringRef t_delimiter;
39313931
if (p_type == CT_ITEM)
39323932
t_delimiter = ctxt . GetItemDelimiter();
39333933
else
@@ -3938,7 +3938,7 @@ bool MCInterfaceExecSortContainer(MCExecContext &ctxt, MCStringRef p_data, int p
39383938

39393939
MCAutoStringRefArray t_chunks;
39403940

3941-
extern bool MCStringsSplit(MCStringRef p_string, codepoint_t p_separator, MCStringRef*&r_strings, uindex_t& r_count);
3941+
extern bool MCStringsSplit(MCStringRef p_string, MCStringRef p_separator, MCStringRef*&r_strings, uindex_t& r_count);
39423942

39433943
if (!MCStringsSplit(p_data, t_delimiter, t_chunks . PtrRef(), t_chunks . CountRef()))
39443944
return false;

engine/src/exec-strings-chunk.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,14 @@ void MCStringsSkipWord(MCExecContext& ctxt, MCStringRef p_string, bool p_skip_sp
6464
// then bump the offset up to the next quotation mark + 1, or the beginning of the next line
6565
// if neither of these are present then set offset to string length.
6666
MCStringFirstIndexOfChar(p_string, '"', x_offset + 1, kMCCompareExact, t_end_quote_offset);
67-
MCStringFirstIndexOfChar(p_string, ctxt . GetLineDelimiter(), x_offset + 1, kMCCompareExact, t_end_line_offset);
67+
MCStringFirstIndexOf(p_string, ctxt . GetLineDelimiter(), x_offset + 1, kMCCompareExact, t_end_line_offset);
6868

69-
if (t_end_quote_offset == t_length && t_end_line_offset == t_length)
70-
x_offset = t_length;
69+
if (t_end_quote_offset < t_end_line_offset)
70+
x_offset = t_end_quote_offset + 1;
71+
else if (t_end_line_offset < t_end_quote_offset)
72+
x_offset = t_end_line_offset + MCStringGetLength(ctxt . GetLineDelimiter());
7173
else
72-
x_offset = MCU_min(t_end_quote_offset, t_end_line_offset) + 1;
74+
x_offset = t_length;
7375
}
7476
else
7577
{
@@ -245,16 +247,16 @@ void MCStringsMarkTextChunk(MCExecContext& ctxt, MCStringRef p_string, Chunk_ter
245247
case CT_LINE:
246248
case CT_ITEM:
247249
{
248-
char_t t_line_delimiter = ctxt . GetLineDelimiter();
249-
char_t t_item_delimiter = ctxt . GetItemDelimiter();
250+
MCStringRef t_line_delimiter = ctxt . GetLineDelimiter();
251+
MCStringRef t_item_delimiter = ctxt . GetItemDelimiter();
250252

251-
char_t t_delimiter = (p_chunk_type == CT_LINE) ? t_line_delimiter : t_item_delimiter;
253+
MCStringRef t_delimiter = (p_chunk_type == CT_LINE) ? t_line_delimiter : t_item_delimiter;
252254

253255
// calculate the start of the (p_first)th line or item
254-
while (p_first && MCStringFirstIndexOfChar(p_string, t_delimiter, t_offset, kMCCompareExact, t_offset))
256+
while (p_first && MCStringFirstIndexOf(p_string, t_delimiter, t_offset, kMCCompareExact, t_offset))
255257
{
256258
p_first--;
257-
t_offset++;
259+
t_offset += MCStringGetLength(t_delimiter);
258260
}
259261

260262
// if we couldn't find enough delimiters, set r_add to the number of
@@ -270,15 +272,15 @@ void MCStringsMarkTextChunk(MCExecContext& ctxt, MCStringRef p_string, Chunk_ter
270272
// calculate the length of the next p_count lines / items
271273
while (p_count--)
272274
{
273-
if (t_offset > t_end_index || !MCStringFirstIndexOfChar(p_string, t_delimiter, t_offset, kMCCompareExact, t_offset))
275+
if (t_offset > t_end_index || !MCStringFirstIndexOf(p_string, t_delimiter, t_offset, kMCCompareExact, t_offset))
274276
{
275277
r_end = t_length;
276278
break;
277279
}
278280
if (p_count == 0)
279281
r_end = t_offset;
280282
else
281-
t_offset++;
283+
t_offset += MCStringGetLength(t_delimiter);
282284
}
283285

284286
if (p_whole_chunk && !p_further_chunks)
@@ -497,11 +499,11 @@ void MCStringsSetTextChunk(MCExecContext& ctxt, MCStringRef p_source, Prepositio
497499

498500
if (t_add && (p_chunk_type == CT_ITEM || p_chunk_type == CT_LINE))
499501
{
500-
char_t t_delimiter;
502+
MCStringRef t_delimiter;
501503
t_delimiter = p_chunk_type == CT_LINE ? ctxt . GetLineDelimiter() : ctxt . GetItemDelimiter();
502504
while (t_add--)
503505
{
504-
MCStringPrependNativeChar(x_target, t_delimiter);
506+
MCStringPrepend(x_target, t_delimiter);
505507
}
506508
}
507509

@@ -845,14 +847,14 @@ void MCStringsAddChunks(MCExecContext& ctxt, Chunk_term p_chunk_type, uindex_t p
845847
if ((p_chunk_type != CT_ITEM && p_chunk_type != CT_LINE) || !p_to_add)
846848
return;
847849

848-
char_t t_delimiter;
850+
MCStringRef t_delimiter;
849851
MCAutoStringRef t_string;
850852
/* UNCHECKED */ MCStringMutableCopyAndRelease((MCStringRef)x_text . text, &t_string);
851853
t_delimiter = p_chunk_type == CT_LINE ? ctxt . GetLineDelimiter() : ctxt . GetItemDelimiter();
852854
uindex_t t_count = p_to_add;
853855

854856
while (t_count--)
855-
/* UNCHECKED */ MCStringInsertNativeChar(*t_string, x_text . finish, t_delimiter);
857+
/* UNCHECKED */ MCStringInsert(*t_string, x_text . finish, t_delimiter);
856858

857859
/* UNCHECKED */ MCStringCopy(*t_string, (MCStringRef&)x_text . text);
858860

0 commit comments

Comments
 (0)