2323import warnings
2424from typing import Any , Union
2525
26- from packaging import version
27-
2826from google .cloud .bigquery import _helpers
27+ from google .cloud .bigquery import _pyarrow_helpers
28+ from google .cloud .bigquery import _versions_helpers
2929from google .cloud .bigquery import schema
3030
3131try :
4949 db_dtypes_import_exception = exc
5050 date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
5151
52- pyarrow = _helpers .PYARROW_VERSIONS .try_import ()
52+ pyarrow = _versions_helpers .PYARROW_VERSIONS .try_import ()
53+
54+ _BIGNUMERIC_SUPPORT = False
55+ if pyarrow is not None :
56+ _BIGNUMERIC_SUPPORT = True
5357
5458try :
5559 # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
@@ -119,87 +123,6 @@ def __init__(self):
119123 self .done = False
120124
121125
122- def pyarrow_datetime ():
123- return pyarrow .timestamp ("us" , tz = None )
124-
125-
126- def pyarrow_numeric ():
127- return pyarrow .decimal128 (38 , 9 )
128-
129-
130- def pyarrow_bignumeric ():
131- # 77th digit is partial.
132- # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
133- return pyarrow .decimal256 (76 , 38 )
134-
135-
136- def pyarrow_time ():
137- return pyarrow .time64 ("us" )
138-
139-
140- def pyarrow_timestamp ():
141- return pyarrow .timestamp ("us" , tz = "UTC" )
142-
143-
144- if pyarrow :
145- # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
146- # When modifying it be sure to update it there as well.
147- BQ_TO_ARROW_SCALARS = {
148- "BOOL" : pyarrow .bool_ ,
149- "BOOLEAN" : pyarrow .bool_ ,
150- "BYTES" : pyarrow .binary ,
151- "DATE" : pyarrow .date32 ,
152- "DATETIME" : pyarrow_datetime ,
153- "FLOAT" : pyarrow .float64 ,
154- "FLOAT64" : pyarrow .float64 ,
155- "GEOGRAPHY" : pyarrow .string ,
156- "INT64" : pyarrow .int64 ,
157- "INTEGER" : pyarrow .int64 ,
158- "NUMERIC" : pyarrow_numeric ,
159- "STRING" : pyarrow .string ,
160- "TIME" : pyarrow_time ,
161- "TIMESTAMP" : pyarrow_timestamp ,
162- }
163- ARROW_SCALAR_IDS_TO_BQ = {
164- # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
165- pyarrow .bool_ ().id : "BOOL" ,
166- pyarrow .int8 ().id : "INT64" ,
167- pyarrow .int16 ().id : "INT64" ,
168- pyarrow .int32 ().id : "INT64" ,
169- pyarrow .int64 ().id : "INT64" ,
170- pyarrow .uint8 ().id : "INT64" ,
171- pyarrow .uint16 ().id : "INT64" ,
172- pyarrow .uint32 ().id : "INT64" ,
173- pyarrow .uint64 ().id : "INT64" ,
174- pyarrow .float16 ().id : "FLOAT64" ,
175- pyarrow .float32 ().id : "FLOAT64" ,
176- pyarrow .float64 ().id : "FLOAT64" ,
177- pyarrow .time32 ("ms" ).id : "TIME" ,
178- pyarrow .time64 ("ns" ).id : "TIME" ,
179- pyarrow .timestamp ("ns" ).id : "TIMESTAMP" ,
180- pyarrow .date32 ().id : "DATE" ,
181- pyarrow .date64 ().id : "DATETIME" , # because millisecond resolution
182- pyarrow .binary ().id : "BYTES" ,
183- pyarrow .string ().id : "STRING" , # also alias for pyarrow.utf8()
184- # The exact scale and precision don't matter, see below.
185- pyarrow .decimal128 (38 , scale = 9 ).id : "NUMERIC" ,
186- }
187-
188- if version .parse (pyarrow .__version__ ) >= version .parse ("3.0.0" ):
189- BQ_TO_ARROW_SCALARS ["BIGNUMERIC" ] = pyarrow_bignumeric
190- # The exact decimal's scale and precision are not important, as only
191- # the type ID matters, and it's the same for all decimal256 instances.
192- ARROW_SCALAR_IDS_TO_BQ [pyarrow .decimal256 (76 , scale = 38 ).id ] = "BIGNUMERIC"
193- _BIGNUMERIC_SUPPORT = True
194- else :
195- _BIGNUMERIC_SUPPORT = False # pragma: NO COVER
196-
197- else : # pragma: NO COVER
198- BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
199- ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
200- _BIGNUMERIC_SUPPORT = False # pragma: NO COVER
201-
202-
203126BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = {
204127 "GEOGRAPHY" : {
205128 b"ARROW:extension:name" : b"google:sqlType:geography" ,
@@ -240,7 +163,7 @@ def bq_to_arrow_data_type(field):
240163 if field_type_upper in schema ._STRUCT_TYPES :
241164 return bq_to_arrow_struct_data_type (field )
242165
243- data_type_constructor = BQ_TO_ARROW_SCALARS . get (field_type_upper )
166+ data_type_constructor = _pyarrow_helpers . bq_to_arrow_scalars (field_type_upper )
244167 if data_type_constructor is None :
245168 return None
246169 return data_type_constructor ()
@@ -568,7 +491,9 @@ def augment_schema(dataframe, current_bq_schema):
568491 if pyarrow .types .is_list (arrow_table .type ):
569492 # `pyarrow.ListType`
570493 detected_mode = "REPEATED"
571- detected_type = ARROW_SCALAR_IDS_TO_BQ .get (arrow_table .values .type .id )
494+ detected_type = _pyarrow_helpers .arrow_scalar_ids_to_bq (
495+ arrow_table .values .type .id
496+ )
572497
573498 # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds
574499 # it to such datetimes, causing them to be recognized as TIMESTAMP type.
@@ -584,7 +509,7 @@ def augment_schema(dataframe, current_bq_schema):
584509 detected_type = "DATETIME"
585510 else :
586511 detected_mode = field .mode
587- detected_type = ARROW_SCALAR_IDS_TO_BQ . get (arrow_table .type .id )
512+ detected_type = _pyarrow_helpers . arrow_scalar_ids_to_bq (arrow_table .type .id )
588513
589514 if detected_type is None :
590515 unknown_type_fields .append (field )
@@ -705,13 +630,13 @@ def dataframe_to_parquet(
705630
706631 This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``.
707632 """
708- pyarrow = _helpers .PYARROW_VERSIONS .try_import (raise_if_error = True )
633+ pyarrow = _versions_helpers .PYARROW_VERSIONS .try_import (raise_if_error = True )
709634
710635 import pyarrow .parquet # type: ignore
711636
712637 kwargs = (
713638 {"use_compliant_nested_type" : parquet_use_compliant_nested_type }
714- if _helpers .PYARROW_VERSIONS .use_compliant_nested_type
639+ if _versions_helpers .PYARROW_VERSIONS .use_compliant_nested_type
715640 else {}
716641 )
717642
0 commit comments