Skip to content

Commit aa92589

Browse files
committed
svnmerge duplicated contents of Objects/stringlib/split.h
1 parent cbfdee3 commit aa92589

1 file changed

Lines changed: 0 additions & 394 deletions

File tree

Objects/stringlib/split.h

Lines changed: 0 additions & 394 deletions
Original file line numberDiff line numberDiff line change
@@ -392,397 +392,3 @@ stringlib_splitlines(PyObject* str_obj,
392392
}
393393

394394
#endif
395-
/* stringlib: split implementation */
396-
397-
#ifndef STRINGLIB_SPLIT_H
398-
#define STRINGLIB_SPLIT_H
399-
400-
#ifndef STRINGLIB_FASTSEARCH_H
401-
#error must include "stringlib/fastsearch.h" before including this module
402-
#endif
403-
404-
/* Overallocate the initial list to reduce the number of reallocs for small
405-
split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
406-
resizes, to sizes 4, 8, then 16. Most observed string splits are for human
407-
text (roughly 11 words per line) and field delimited data (usually 1-10
408-
fields). For large strings the split algorithms are bandwidth limited
409-
so increasing the preallocation likely will not improve things.*/
410-
411-
#define MAX_PREALLOC 12
412-
413-
/* 5 splits gives 6 elements */
414-
#define PREALLOC_SIZE(maxsplit) \
415-
(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
416-
417-
#define SPLIT_APPEND(data, left, right) \
418-
sub = STRINGLIB_NEW((data) + (left), \
419-
(right) - (left)); \
420-
if (sub == NULL) \
421-
goto onError; \
422-
if (PyList_Append(list, sub)) { \
423-
Py_DECREF(sub); \
424-
goto onError; \
425-
} \
426-
else \
427-
Py_DECREF(sub);
428-
429-
#define SPLIT_ADD(data, left, right) { \
430-
sub = STRINGLIB_NEW((data) + (left), \
431-
(right) - (left)); \
432-
if (sub == NULL) \
433-
goto onError; \
434-
if (count < MAX_PREALLOC) { \
435-
PyList_SET_ITEM(list, count, sub); \
436-
} else { \
437-
if (PyList_Append(list, sub)) { \
438-
Py_DECREF(sub); \
439-
goto onError; \
440-
} \
441-
else \
442-
Py_DECREF(sub); \
443-
} \
444-
count++; }
445-
446-
447-
/* Always force the list to the expected size. */
448-
#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
449-
450-
Py_LOCAL_INLINE(PyObject *)
451-
stringlib_split_whitespace(PyObject* str_obj,
452-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
453-
Py_ssize_t maxcount)
454-
{
455-
Py_ssize_t i, j, count=0;
456-
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
457-
PyObject *sub;
458-
459-
if (list == NULL)
460-
return NULL;
461-
462-
i = j = 0;
463-
while (maxcount-- > 0) {
464-
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
465-
i++;
466-
if (i == str_len) break;
467-
j = i; i++;
468-
while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
469-
i++;
470-
#ifndef STRINGLIB_MUTABLE
471-
if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
472-
/* No whitespace in str_obj, so just use it as list[0] */
473-
Py_INCREF(str_obj);
474-
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
475-
count++;
476-
break;
477-
}
478-
#endif
479-
SPLIT_ADD(str, j, i);
480-
}
481-
482-
if (i < str_len) {
483-
/* Only occurs when maxcount was reached */
484-
/* Skip any remaining whitespace and copy to end of string */
485-
while (i < str_len && STRINGLIB_ISSPACE(str[i]))
486-
i++;
487-
if (i != str_len)
488-
SPLIT_ADD(str, i, str_len);
489-
}
490-
FIX_PREALLOC_SIZE(list);
491-
return list;
492-
493-
onError:
494-
Py_DECREF(list);
495-
return NULL;
496-
}
497-
498-
Py_LOCAL_INLINE(PyObject *)
499-
stringlib_split_char(PyObject* str_obj,
500-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
501-
const STRINGLIB_CHAR ch,
502-
Py_ssize_t maxcount)
503-
{
504-
Py_ssize_t i, j, count=0;
505-
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
506-
PyObject *sub;
507-
508-
if (list == NULL)
509-
return NULL;
510-
511-
i = j = 0;
512-
while ((j < str_len) && (maxcount-- > 0)) {
513-
for(; j < str_len; j++) {
514-
/* I found that using memchr makes no difference */
515-
if (str[j] == ch) {
516-
SPLIT_ADD(str, i, j);
517-
i = j = j + 1;
518-
break;
519-
}
520-
}
521-
}
522-
#ifndef STRINGLIB_MUTABLE
523-
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
524-
/* ch not in str_obj, so just use str_obj as list[0] */
525-
Py_INCREF(str_obj);
526-
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
527-
count++;
528-
} else
529-
#endif
530-
if (i <= str_len) {
531-
SPLIT_ADD(str, i, str_len);
532-
}
533-
FIX_PREALLOC_SIZE(list);
534-
return list;
535-
536-
onError:
537-
Py_DECREF(list);
538-
return NULL;
539-
}
540-
541-
Py_LOCAL_INLINE(PyObject *)
542-
stringlib_split(PyObject* str_obj,
543-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
544-
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
545-
Py_ssize_t maxcount)
546-
{
547-
Py_ssize_t i, j, pos, count=0;
548-
PyObject *list, *sub;
549-
550-
if (sep_len == 0) {
551-
PyErr_SetString(PyExc_ValueError, "empty separator");
552-
return NULL;
553-
}
554-
else if (sep_len == 1)
555-
return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
556-
557-
list = PyList_New(PREALLOC_SIZE(maxcount));
558-
if (list == NULL)
559-
return NULL;
560-
561-
i = j = 0;
562-
while (maxcount-- > 0) {
563-
pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
564-
if (pos < 0)
565-
break;
566-
j = i + pos;
567-
SPLIT_ADD(str, i, j);
568-
i = j + sep_len;
569-
}
570-
#ifndef STRINGLIB_MUTABLE
571-
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
572-
/* No match in str_obj, so just use it as list[0] */
573-
Py_INCREF(str_obj);
574-
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
575-
count++;
576-
} else
577-
#endif
578-
{
579-
SPLIT_ADD(str, i, str_len);
580-
}
581-
FIX_PREALLOC_SIZE(list);
582-
return list;
583-
584-
onError:
585-
Py_DECREF(list);
586-
return NULL;
587-
}
588-
589-
Py_LOCAL_INLINE(PyObject *)
590-
stringlib_rsplit_whitespace(PyObject* str_obj,
591-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
592-
Py_ssize_t maxcount)
593-
{
594-
Py_ssize_t i, j, count=0;
595-
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
596-
PyObject *sub;
597-
598-
if (list == NULL)
599-
return NULL;
600-
601-
i = j = str_len - 1;
602-
while (maxcount-- > 0) {
603-
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
604-
i--;
605-
if (i < 0) break;
606-
j = i; i--;
607-
while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
608-
i--;
609-
#ifndef STRINGLIB_MUTABLE
610-
if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
611-
/* No whitespace in str_obj, so just use it as list[0] */
612-
Py_INCREF(str_obj);
613-
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
614-
count++;
615-
break;
616-
}
617-
#endif
618-
SPLIT_ADD(str, i + 1, j + 1);
619-
}
620-
621-
if (i >= 0) {
622-
/* Only occurs when maxcount was reached */
623-
/* Skip any remaining whitespace and copy to beginning of string */
624-
while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
625-
i--;
626-
if (i >= 0)
627-
SPLIT_ADD(str, 0, i + 1);
628-
}
629-
FIX_PREALLOC_SIZE(list);
630-
if (PyList_Reverse(list) < 0)
631-
goto onError;
632-
return list;
633-
634-
onError:
635-
Py_DECREF(list);
636-
return NULL;
637-
}
638-
639-
Py_LOCAL_INLINE(PyObject *)
640-
stringlib_rsplit_char(PyObject* str_obj,
641-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
642-
const STRINGLIB_CHAR ch,
643-
Py_ssize_t maxcount)
644-
{
645-
Py_ssize_t i, j, count=0;
646-
PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
647-
PyObject *sub;
648-
649-
if (list == NULL)
650-
return NULL;
651-
652-
i = j = str_len - 1;
653-
while ((i >= 0) && (maxcount-- > 0)) {
654-
for(; i >= 0; i--) {
655-
if (str[i] == ch) {
656-
SPLIT_ADD(str, i + 1, j + 1);
657-
j = i = i - 1;
658-
break;
659-
}
660-
}
661-
}
662-
#ifndef STRINGLIB_MUTABLE
663-
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
664-
/* ch not in str_obj, so just use str_obj as list[0] */
665-
Py_INCREF(str_obj);
666-
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
667-
count++;
668-
} else
669-
#endif
670-
if (j >= -1) {
671-
SPLIT_ADD(str, 0, j + 1);
672-
}
673-
FIX_PREALLOC_SIZE(list);
674-
if (PyList_Reverse(list) < 0)
675-
goto onError;
676-
return list;
677-
678-
onError:
679-
Py_DECREF(list);
680-
return NULL;
681-
}
682-
683-
Py_LOCAL_INLINE(PyObject *)
684-
stringlib_rsplit(PyObject* str_obj,
685-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
686-
const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
687-
Py_ssize_t maxcount)
688-
{
689-
Py_ssize_t j, pos, count=0;
690-
PyObject *list, *sub;
691-
692-
if (sep_len == 0) {
693-
PyErr_SetString(PyExc_ValueError, "empty separator");
694-
return NULL;
695-
}
696-
else if (sep_len == 1)
697-
return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
698-
699-
list = PyList_New(PREALLOC_SIZE(maxcount));
700-
if (list == NULL)
701-
return NULL;
702-
703-
j = str_len;
704-
while (maxcount-- > 0) {
705-
pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
706-
if (pos < 0)
707-
break;
708-
SPLIT_ADD(str, pos + sep_len, j);
709-
j = pos;
710-
}
711-
#ifndef STRINGLIB_MUTABLE
712-
if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
713-
/* No match in str_obj, so just use it as list[0] */
714-
Py_INCREF(str_obj);
715-
PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
716-
count++;
717-
} else
718-
#endif
719-
{
720-
SPLIT_ADD(str, 0, j);
721-
}
722-
FIX_PREALLOC_SIZE(list);
723-
if (PyList_Reverse(list) < 0)
724-
goto onError;
725-
return list;
726-
727-
onError:
728-
Py_DECREF(list);
729-
return NULL;
730-
}
731-
732-
Py_LOCAL_INLINE(PyObject *)
733-
stringlib_splitlines(PyObject* str_obj,
734-
const STRINGLIB_CHAR* str, Py_ssize_t str_len,
735-
int keepends)
736-
{
737-
/* This does not use the preallocated list because splitlines is
738-
usually run with hundreds of newlines. The overhead of
739-
switching between PyList_SET_ITEM and append causes about a
740-
2-3% slowdown for that common case. A smarter implementation
741-
could move the if check out, so the SET_ITEMs are done first
742-
and the appends only done when the prealloc buffer is full.
743-
That's too much work for little gain.*/
744-
745-
register Py_ssize_t i;
746-
register Py_ssize_t j;
747-
PyObject *list = PyList_New(0);
748-
PyObject *sub;
749-
750-
if (list == NULL)
751-
return NULL;
752-
753-
for (i = j = 0; i < str_len; ) {
754-
Py_ssize_t eol;
755-
756-
/* Find a line and append it */
757-
while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
758-
i++;
759-
760-
/* Skip the line break reading CRLF as one line break */
761-
eol = i;
762-
if (i < str_len) {
763-
if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
764-
i += 2;
765-
else
766-
i++;
767-
if (keepends)
768-
eol = i;
769-
}
770-
#ifndef STRINGLIB_MUTABLE
771-
if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
772-
/* No linebreak in str_obj, so just use it as list[0] */
773-
if (PyList_Append(list, str_obj))
774-
goto onError;
775-
break;
776-
}
777-
#endif
778-
SPLIT_APPEND(str, j, eol);
779-
j = i;
780-
}
781-
return list;
782-
783-
onError:
784-
Py_DECREF(list);
785-
return NULL;
786-
}
787-
788-
#endif

0 commit comments

Comments
 (0)