Skip to content

Commit 5647eda

Browse files
authored
GH-49349: [Doc][Python] Simplify doctests in tables.pxi and types.pxi (#49350)
### Rationale for this change Closes #49349 ### What changes are included in this PR? a) Changing docstring examples from pandas 2.3.3 or 3 agnostic to pandas 3 specific output b) Replace `pa.Table.from_arrays([...], names=[...])` with `pa.table({...})` ### Are these changes tested? Yes, tests pass locally. ### Are there any user-facing changes? No. * GitHub Issue: #49349 Authored-by: Tadeja Kadunc <tadeja.kadunc@gmail.com> Signed-off-by: Rok Mihevc <rok@mihevc.org>
1 parent b095098 commit 5647eda

2 files changed

Lines changed: 57 additions & 80 deletions

File tree

python/pyarrow/table.pxi

Lines changed: 56 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,9 +1809,8 @@ cdef class _Tabular(_PandasConvertible):
18091809
Table (works similarly for RecordBatch)
18101810
18111811
>>> import pyarrow as pa
1812-
>>> table = pa.Table.from_arrays([[2, 4, 5, 100],
1813-
... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
1814-
... names=['n_legs', 'animals'])
1812+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
1813+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
18151814
>>> table.column_names
18161815
['n_legs', 'animals']
18171816
"""
@@ -1873,13 +1872,9 @@ cdef class _Tabular(_PandasConvertible):
18731872
Table (works similarly for RecordBatch)
18741873
18751874
>>> import pyarrow as pa
1876-
>>> import pandas as pd
1877-
>>> df = pd.DataFrame({'year': [None, 2022, 2019, 2021],
1875+
>>> table = pa.table({'year': [None, 2022, 2019, 2021],
18781876
... 'n_legs': [2, 4, 5, 100],
18791877
... 'animals': ["Flamingo", "Horse", None, "Centipede"]})
1880-
>>> table = pa.Table.from_arrays(
1881-
... [[None, 2022, 2019, 2021], [2, 4, 5, 100], ["Flamingo", "Horse", None, "Centipede"]],
1882-
... names=['year', 'n_legs', 'animals'])
18831878
>>> table.drop_null()
18841879
pyarrow.Table
18851880
year: int64
@@ -1911,9 +1906,8 @@ cdef class _Tabular(_PandasConvertible):
19111906
Table (works similarly for RecordBatch)
19121907
19131908
>>> import pyarrow as pa
1914-
>>> table = pa.Table.from_arrays(
1915-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
1916-
... names=['n_legs', 'animals'])
1909+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
1910+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
19171911
>>> table.field(0)
19181912
pyarrow.Field<n_legs: int64>
19191913
>>> table.field(1)
@@ -2065,9 +2059,8 @@ cdef class _Tabular(_PandasConvertible):
20652059
Table (works similarly for RecordBatch)
20662060
20672061
>>> import pyarrow as pa
2068-
>>> table = pa.Table.from_arrays(
2069-
... [[None, 4, 5, None], ["Flamingo", "Horse", None, "Centipede"]],
2070-
... names=['n_legs', 'animals'])
2062+
>>> table = pa.table({'n_legs': [None, 4, 5, None],
2063+
... 'animals': ["Flamingo", "Horse", None, "Centipede"]})
20712064
>>> for i in table.itercolumns():
20722065
... print(i.null_count)
20732066
...
@@ -2134,11 +2127,10 @@ cdef class _Tabular(_PandasConvertible):
21342127
Table (works similarly for RecordBatch)
21352128
21362129
>>> import pyarrow as pa
2137-
>>> table = pa.Table.from_arrays(
2138-
... [[2020, 2022, 2021, 2022, 2019, 2021],
2139-
... [2, 2, 4, 4, 5, 100],
2140-
... ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]],
2141-
... names=['year', 'n_legs', 'animal'])
2130+
>>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
2131+
... 'n_legs': [2, 2, 4, 4, 5, 100],
2132+
... 'animal': ["Flamingo", "Parrot", "Dog", "Horse",
2133+
... "Brittle stars", "Centipede"]})
21422134
>>> table.sort_by('animal')
21432135
pyarrow.Table
21442136
year: int64
@@ -2180,10 +2172,9 @@ cdef class _Tabular(_PandasConvertible):
21802172
Table (works similarly for RecordBatch)
21812173
21822174
>>> import pyarrow as pa
2183-
>>> table = pa.Table.from_arrays(
2184-
... [[2020, 2022, 2019, 2021], [2, 4, 5, 100],
2185-
... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
2186-
... names=['year', 'n_legs', 'animals'])
2175+
>>> table = pa.table({'year': [2020, 2022, 2019, 2021],
2176+
... 'n_legs': [2, 4, 5, 100],
2177+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
21872178
>>> table.take([1,3])
21882179
pyarrow.Table
21892180
year: int64
@@ -2471,9 +2462,8 @@ cdef class _Tabular(_PandasConvertible):
24712462
Examples
24722463
--------
24732464
>>> import pyarrow as pa
2474-
>>> table = pa.Table.from_arrays(
2475-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
2476-
... names=['n_legs', 'animals'])
2465+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
2466+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
24772467
24782468
Append column at the end:
24792469
@@ -2542,7 +2532,7 @@ cdef class RecordBatch(_Tabular):
25422532
month: int64
25432533
day: int64
25442534
n_legs: int64
2545-
animals: ...string
2535+
animals: large_string
25462536
----
25472537
year: [2020,2022,2021,2022]
25482538
month: [3,5,7,9]
@@ -2582,7 +2572,7 @@ cdef class RecordBatch(_Tabular):
25822572
month: int64
25832573
day: int64
25842574
n_legs: int64
2585-
animals: ...string
2575+
animals: large_string
25862576
----
25872577
year: [2020,2022,2021,2022]
25882578
month: [3,5,7,9]
@@ -3406,7 +3396,7 @@ cdef class RecordBatch(_Tabular):
34063396
month: int64
34073397
day: int64
34083398
n_legs: int64
3409-
animals: ...string
3399+
animals: large_string
34103400
----
34113401
year: [2020,2022,2021,2022]
34123402
month: [3,5,7,9]
@@ -4146,7 +4136,7 @@ cdef class Table(_Tabular):
41464136
pyarrow.Table
41474137
year: int64
41484138
n_legs: int64
4149-
animals: ...string
4139+
animals: large_string
41504140
----
41514141
year: [[2020,2022,2019,2021]]
41524142
n_legs: [[2,4,5,100]]
@@ -4272,10 +4262,9 @@ cdef class Table(_Tabular):
42724262
Examples
42734263
--------
42744264
>>> import pyarrow as pa
4275-
>>> table = pa.Table.from_arrays(
4276-
... [[2020, 2022, 2019, 2021], [2, 4, 5, 100],
4277-
... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
4278-
... names=['year', 'n_legs', 'animals'])
4265+
>>> table = pa.table({'year': [2020, 2022, 2019, 2021],
4266+
... 'n_legs': [2, 4, 5, 100],
4267+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
42794268
>>> table.slice(length=3)
42804269
pyarrow.Table
42814270
year: int64
@@ -4336,10 +4325,9 @@ cdef class Table(_Tabular):
43364325
Examples
43374326
--------
43384327
>>> import pyarrow as pa
4339-
>>> table = pa.Table.from_arrays(
4340-
... [[2020, 2022, 2019, 2021], [2, 4, 5, 100],
4341-
... ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
4342-
... names=['year', 'n_legs', 'animals'])
4328+
>>> table = pa.table({'year': [2020, 2022, 2019, 2021],
4329+
... 'n_legs': [2, 4, 5, 100],
4330+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
43434331
>>> table.select([0,1])
43444332
pyarrow.Table
43454333
year: int64
@@ -4675,9 +4663,8 @@ cdef class Table(_Tabular):
46754663
Examples
46764664
--------
46774665
>>> import pyarrow as pa
4678-
>>> table = pa.Table.from_arrays(
4679-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
4680-
... names=['n_legs', 'animals'])
4666+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
4667+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
46814668
>>> table.schema
46824669
n_legs: int64
46834670
animals: string
@@ -4772,7 +4759,7 @@ cdef class Table(_Tabular):
47724759
>>> pa.Table.from_pandas(df)
47734760
pyarrow.Table
47744761
n_legs: int64
4775-
animals: ...string
4762+
animals: large_string
47764763
----
47774764
n_legs: [[2,4,5,100]]
47784765
animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
@@ -5117,9 +5104,8 @@ cdef class Table(_Tabular):
51175104
Examples
51185105
--------
51195106
>>> import pyarrow as pa
5120-
>>> table = pa.Table.from_arrays(
5121-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
5122-
... names=['n_legs', 'animals'])
5107+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
5108+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
51235109
51245110
Convert a Table to a RecordBatchReader:
51255111
@@ -5175,9 +5161,8 @@ cdef class Table(_Tabular):
51755161
Examples
51765162
--------
51775163
>>> import pyarrow as pa
5178-
>>> table = pa.Table.from_arrays(
5179-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
5180-
... names=['n_legs', 'animals'])
5164+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
5165+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
51815166
>>> table.schema
51825167
n_legs: int64
51835168
animals: string
@@ -5267,9 +5252,8 @@ cdef class Table(_Tabular):
52675252
Examples
52685253
--------
52695254
>>> import pyarrow as pa
5270-
>>> table = pa.Table.from_arrays(
5271-
... [[None, 4, 5, None], ["Flamingo", "Horse", None, "Centipede"]],
5272-
... names=['n_legs', 'animals'])
5255+
>>> table = pa.table({'n_legs': [None, 4, 5, None],
5256+
... 'animals': ["Flamingo", "Horse", None, "Centipede"]})
52735257
>>> table.nbytes
52745258
72
52755259
"""
@@ -5296,9 +5280,8 @@ cdef class Table(_Tabular):
52965280
Examples
52975281
--------
52985282
>>> import pyarrow as pa
5299-
>>> table = pa.Table.from_arrays(
5300-
... [[None, 4, 5, None], ["Flamingo", "Horse", None, "Centipede"]],
5301-
... names=['n_legs', 'animals'])
5283+
>>> table = pa.table({'n_legs': [None, 4, 5, None],
5284+
... 'animals': ["Flamingo", "Horse", None, "Centipede"]})
53025285
>>> table.get_total_buffer_size()
53035286
76
53045287
"""
@@ -5337,9 +5320,8 @@ cdef class Table(_Tabular):
53375320
Examples
53385321
--------
53395322
>>> import pyarrow as pa
5340-
>>> table = pa.Table.from_arrays(
5341-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
5342-
... names=['n_legs', 'animals'])
5323+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
5324+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
53435325
53445326
Add column:
53455327
@@ -5402,9 +5384,8 @@ cdef class Table(_Tabular):
54025384
Examples
54035385
--------
54045386
>>> import pyarrow as pa
5405-
>>> table = pa.Table.from_arrays(
5406-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
5407-
... names=['n_legs', 'animals'])
5387+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
5388+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
54085389
>>> table.remove_column(1)
54095390
pyarrow.Table
54105391
n_legs: int64
@@ -5440,9 +5421,8 @@ cdef class Table(_Tabular):
54405421
Examples
54415422
--------
54425423
>>> import pyarrow as pa
5443-
>>> table = pa.Table.from_arrays(
5444-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
5445-
... names=['n_legs', 'animals'])
5424+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
5425+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
54465426
54475427
Replace a column:
54485428
@@ -5501,9 +5481,8 @@ cdef class Table(_Tabular):
55015481
Examples
55025482
--------
55035483
>>> import pyarrow as pa
5504-
>>> table = pa.Table.from_arrays(
5505-
... [[2, 4, 5, 100], ["Flamingo", "Horse", "Brittle stars", "Centipede"]],
5506-
... names=['n_legs', 'animals'])
5484+
>>> table = pa.table({'n_legs': [2, 4, 5, 100],
5485+
... 'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
55075486
>>> new_names = ["n", "name"]
55085487
>>> table.rename_columns(new_names)
55095488
pyarrow.Table
@@ -5593,11 +5572,10 @@ cdef class Table(_Tabular):
55935572
Examples
55945573
--------
55955574
>>> import pyarrow as pa
5596-
>>> table = pa.Table.from_arrays(
5597-
... [[2020, 2022, 2021, 2022, 2019, 2021],
5598-
... [2, 2, 4, 4, 5, 100],
5599-
... ["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]],
5600-
... names=['year', 'n_legs', 'animal'])
5575+
>>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
5576+
... 'n_legs': [2, 2, 4, 4, 5, 100],
5577+
... 'animal': ["Flamingo", "Parrot", "Dog", "Horse",
5578+
... "Brittle stars", "Centipede"]})
56015579
>>> table.group_by('year').aggregate([('n_legs', 'sum')])
56025580
pyarrow.Table
56035581
year: int64
@@ -5656,12 +5634,11 @@ cdef class Table(_Tabular):
56565634
--------
56575635
>>> import pyarrow as pa
56585636
>>> import pyarrow.compute as pc
5659-
>>> t1 = pa.Table.from_arrays(
5660-
... [[1, 2, 3], [2020, 2022, 2019]],
5661-
... names=['id', 'year'])
5662-
>>> t2 = pa.Table.from_arrays(
5663-
... [[3, 4], [5, 100], ["Brittle stars", "Centipede"]],
5664-
... names=['id', 'n_legs', 'animal'])
5637+
>>> t1 = pa.table({'id': [1, 2, 3],
5638+
... 'year': [2020, 2022, 2019]})
5639+
>>> t2 = pa.table({'id': [3, 4],
5640+
... 'n_legs': [5, 100],
5641+
... 'animal': ["Brittle stars", "Centipede"]})
56655642
56665643
Left outer join:
56675644
@@ -5973,7 +5950,7 @@ def record_batch(data, names=None, schema=None, metadata=None):
59735950
month: int64
59745951
day: int64
59755952
n_legs: int64
5976-
animals: ...string
5953+
animals: large_string
59775954
----
59785955
year: [2020,2022,2021,2022]
59795956
month: [3,5,7,9]
@@ -6134,7 +6111,7 @@ def table(data, names=None, schema=None, metadata=None, nthreads=None):
61346111
pyarrow.Table
61356112
year: int64
61366113
n_legs: int64
6137-
animals: ...string
6114+
animals: large_string
61386115
----
61396116
year: [[2020,2022,2019,2021]]
61406117
n_legs: [[2,4,5,100]]

python/pyarrow/types.pxi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3140,7 +3140,7 @@ cdef class Schema(_Weakrefable):
31403140
31413141
>>> pa.Schema.from_pandas(df)
31423142
int: int64
3143-
str: ...string
3143+
str: large_string
31443144
-- schema metadata --
31453145
pandas: '{"index_columns": [{"kind": "range", "name": null, ...
31463146
"""

0 commit comments

Comments
 (0)