Skip to content

Commit 618d5fb

Browse files
thisisnicAlenkaF
andauthored
GH-47389: [Python] CSV and JSON options lack a nice repr/str (#47397)
### Rationale for this change CSV and JSON options lack a nice repr/str dunder method ### What changes are included in this PR? Add both these methods ### Are these changes tested? Will be once it's ready for review ### Are there any user-facing changes? No * GitHub Issue: #47389 Lead-authored-by: Nic Crane <thisisnic@gmail.com> Co-authored-by: AlenkaF <frim.alenka@gmail.com> Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com> Signed-off-by: AlenkaF <frim.alenka@gmail.com>
1 parent f9315d4 commit 618d5fb

4 files changed

Lines changed: 155 additions & 0 deletions

File tree

python/pyarrow/_csv.pyx

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,22 @@ cdef class ReadOptions(_Weakrefable):
332332
except TypeError:
333333
return False
334334

335+
def _repr_base(self):
336+
return (f"""
337+
use_threads={self.use_threads},
338+
block_size={self.block_size},
339+
skip_rows={self.skip_rows},
340+
skip_rows_after_names={self.skip_rows_after_names},
341+
column_names={self.column_names},
342+
autogenerate_column_names={self.autogenerate_column_names},
343+
encoding={self.encoding!r}""")
344+
345+
def __repr__(self):
346+
return (f"<pyarrow.csv.ReadOptions>({self._repr_base()})")
347+
348+
def __str__(self):
349+
return (f"ReadOptions({self._repr_base()})")
350+
335351

336352
cdef class ParseOptions(_Weakrefable):
337353
"""
@@ -585,6 +601,23 @@ cdef class ParseOptions(_Weakrefable):
585601
except TypeError:
586602
return False
587603

604+
def _repr_base(self):
605+
return (f"""
606+
delimiter={self.delimiter!r},
607+
quote_char={self.quote_char!r},
608+
double_quote={self.double_quote},
609+
escape_char={self.escape_char!r},
610+
newlines_in_values={self.newlines_in_values},
611+
ignore_empty_lines={self.ignore_empty_lines},
612+
invalid_row_handler={getattr(self.invalid_row_handler, '__name__',
613+
self.invalid_row_handler)}""")
614+
615+
def __repr__(self):
616+
return (f"<pyarrow.csv.ParseOptions>({self._repr_base()})")
617+
618+
def __str__(self):
619+
return (f"ParseOptions({self._repr_base()})")
620+
588621

589622
cdef class _ISO8601(_Weakrefable):
590623
"""
@@ -1108,6 +1141,28 @@ cdef class ConvertOptions(_Weakrefable):
11081141
except TypeError:
11091142
return False
11101143

1144+
def _repr_base(self):
1145+
return (f"""
1146+
check_utf8={self.check_utf8},
1147+
column_types={self.column_types},
1148+
null_values={self.null_values},
1149+
true_values={self.true_values},
1150+
false_values={self.false_values},
1151+
decimal_point={self.decimal_point!r},
1152+
strings_can_be_null={self.strings_can_be_null},
1153+
quoted_strings_can_be_null={self.quoted_strings_can_be_null},
1154+
include_columns={self.include_columns},
1155+
include_missing_columns={self.include_missing_columns},
1156+
auto_dict_encode={self.auto_dict_encode},
1157+
auto_dict_max_cardinality={self.auto_dict_max_cardinality},
1158+
timestamp_parsers={[str(i) for i in self.timestamp_parsers]}""")
1159+
1160+
def __repr__(self):
1161+
return (f"<pyarrow.csv.ConvertOptions>({self._repr_base()})")
1162+
1163+
def __str__(self):
1164+
return (f"ConvertOptions({self._repr_base()})")
1165+
11111166

11121167
cdef _get_reader(input_file, ReadOptions read_options,
11131168
shared_ptr[CInputStream]* out):
@@ -1459,6 +1514,19 @@ cdef class WriteOptions(_Weakrefable):
14591514
def validate(self):
14601515
check_status(self.options.get().Validate())
14611516

1517+
def _repr_base(self):
1518+
return (f"""
1519+
include_header={self.include_header},
1520+
batch_size={self.batch_size},
1521+
delimiter={self.delimiter!r},
1522+
quoting_style={self.quoting_style!r}""")
1523+
1524+
def __repr__(self):
1525+
return (f"<pyarrow.csv.WriteOptions>({self._repr_base()})")
1526+
1527+
def __str__(self):
1528+
return (f"WriteOptions({self._repr_base()})")
1529+
14621530

14631531
cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
14641532
if write_options is None:

python/pyarrow/_json.pyx

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,16 @@ cdef class ReadOptions(_Weakrefable):
105105
except TypeError:
106106
return False
107107

108+
def __repr__(self):
109+
return (f"""<pyarrow.json.ReadOptions>(
110+
use_threads={self.use_threads},
111+
block_size={self.block_size})""")
112+
113+
def __str__(self):
114+
return (f"""ReadOptions(
115+
use_threads={self.use_threads},
116+
block_size={self.block_size})""")
117+
108118
@staticmethod
109119
cdef ReadOptions wrap(CJSONReadOptions options):
110120
out = ReadOptions()
@@ -244,6 +254,18 @@ cdef class ParseOptions(_Weakrefable):
244254
except TypeError:
245255
return False
246256

257+
def _repr_base(self):
258+
return (f"""
259+
explicit_schema={self.explicit_schema},
260+
newlines_in_values={self.newlines_in_values},
261+
unexpected_field_behavior={self.unexpected_field_behavior!r}""")
262+
263+
def __repr__(self):
264+
return (f"<pyarrow.json.ParseOptions>({self._repr_base()})")
265+
266+
def __str__(self):
267+
return (f"ParseOptions({self._repr_base()})")
268+
247269
@staticmethod
248270
cdef ParseOptions wrap(CJSONParseOptions options):
249271
out = ParseOptions()

python/pyarrow/tests/test_csv.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,18 @@ def test_read_options(pickle_module):
213213
opts.column_names = ('a', 'b')
214214
opts.validate()
215215

216+
expected_repr_inner = """
217+
use_threads=True,
218+
block_size=1048576,
219+
skip_rows=0,
220+
skip_rows_after_names=0,
221+
column_names=['a', 'b'],
222+
autogenerate_column_names=True,
223+
encoding='utf8'"""
224+
225+
assert repr(opts) == f"<pyarrow.csv.ReadOptions>({expected_repr_inner})"
226+
assert str(opts) == f"ReadOptions({expected_repr_inner})"
227+
216228

217229
def test_parse_options(pickle_module):
218230
cls = ParseOptions
@@ -273,6 +285,18 @@ def test_parse_options(pickle_module):
273285
opts.escape_char = "\r"
274286
opts.validate()
275287

288+
expected_repr_inner = r"""
289+
delimiter=',',
290+
quote_char='"',
291+
double_quote=True,
292+
escape_char='\r',
293+
newlines_in_values=False,
294+
ignore_empty_lines=True,
295+
invalid_row_handler=None"""
296+
297+
assert repr(opts) == f"<pyarrow.csv.ParseOptions>({expected_repr_inner})"
298+
assert str(opts) == f"ParseOptions({expected_repr_inner})"
299+
276300

277301
def test_convert_options(pickle_module):
278302
cls = ConvertOptions
@@ -354,6 +378,23 @@ def test_convert_options(pickle_module):
354378
assert opts.auto_dict_max_cardinality == 999
355379
assert opts.timestamp_parsers == [ISO8601, '%Y-%m-%d']
356380

381+
expected_repr_inner = ("""
382+
check_utf8=True,
383+
column_types={'a': DataType(null)},
384+
null_values=['N', 'nn'],
385+
true_values=['T', 'tt'],
386+
false_values=['F', 'ff'],
387+
decimal_point='.',
388+
strings_can_be_null=False,
389+
quoted_strings_can_be_null=True,
390+
include_columns=[],
391+
include_missing_columns=False,
392+
auto_dict_encode=False,
393+
auto_dict_max_cardinality=999,
394+
timestamp_parsers=['ISO8601', '%Y-%m-%d']""")
395+
assert repr(opts) == f"<pyarrow.csv.ConvertOptions>({expected_repr_inner})"
396+
assert str(opts) == f"ConvertOptions({expected_repr_inner})"
397+
357398

358399
def test_write_options():
359400
cls = WriteOptions
@@ -378,6 +419,15 @@ def test_write_options():
378419
opts.batch_size = 0
379420
opts.validate()
380421

422+
expected_repr_inner = """
423+
include_header=True,
424+
batch_size=0,
425+
delimiter=',',
426+
quoting_style='needed'"""
427+
428+
assert repr(opts) == f"<pyarrow.csv.WriteOptions>({expected_repr_inner})"
429+
assert str(opts) == f"WriteOptions({expected_repr_inner})"
430+
381431

382432
class BaseTestCSV(abc.ABC):
383433
"""Common tests which are shared by streaming and non streaming readers"""

python/pyarrow/tests/test_json.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ def test_read_options(pickle_module):
8080
assert opts.block_size == 1234
8181
assert opts.use_threads is False
8282

83+
expected_repr_inner = """
84+
use_threads=False,
85+
block_size=1234"""
86+
87+
assert repr(opts) == f"<pyarrow.json.ReadOptions>({expected_repr_inner})"
88+
assert str(opts) == f"ReadOptions({expected_repr_inner})"
89+
8390
check_options_class_pickling(cls, pickler=pickle_module,
8491
block_size=1234,
8592
use_threads=False)
@@ -94,6 +101,14 @@ def test_parse_options(pickle_module):
94101
opts.newlines_in_values = True
95102
assert opts.newlines_in_values is True
96103

104+
expected_repr_inner = """
105+
explicit_schema=None,
106+
newlines_in_values=True,
107+
unexpected_field_behavior='infer'"""
108+
109+
assert repr(opts) == f"<pyarrow.json.ParseOptions>({expected_repr_inner})"
110+
assert str(opts) == f"ParseOptions({expected_repr_inner})"
111+
97112
schema = pa.schema([pa.field('foo', pa.int32())])
98113
opts.explicit_schema = schema
99114
assert opts.explicit_schema == schema

0 commit comments

Comments
 (0)