Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit 98e568f

Browse files
committed
fix: load_table_from_dataframe does not error out when nan in a required column
1 parent 5ceed05 commit 98e568f

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,19 @@ def bq_to_arrow_array(series, bq_field):
380380
return pyarrow.Array.from_pandas(series, type=arrow_type)
381381

382382

383+
def _check_nullability(arrow_fields, dataframe):
384+
"""Throws error if dataframe has null values and column doesn't allow nullable"""
385+
if dataframe.index.name:
386+
dataframe[dataframe.index.name] = dataframe.index
387+
for arrow_field in arrow_fields:
388+
col_name = arrow_field.name
389+
if (
390+
not arrow_field.nullable
391+
and dataframe[arrow_field.name].isnull().values.any()
392+
):
393+
raise ValueError(f"required field {col_name} can not be nulls")
394+
395+
383396
def get_column_or_index(dataframe, name):
384397
"""Return a column or index as a pandas series."""
385398
if name in dataframe.columns:
@@ -663,6 +676,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
663676
)
664677
arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
665678

679+
_check_nullability(arrow_fields, dataframe)
666680
if all((field is not None for field in arrow_fields)):
667681
return pyarrow.Table.from_arrays(
668682
arrow_arrays, schema=pyarrow.schema(arrow_fields)

tests/unit/test_client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8689,6 +8689,32 @@ def test_load_table_from_dataframe_w_nulls(self):
86898689
assert sent_config.schema == schema
86908690
assert sent_config.source_format == job.SourceFormat.PARQUET
86918691

8692+
@unittest.skipIf(pandas is None, "Requires `pandas`")
8693+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
8694+
def test_load_table_from_dataframe_w_nulls_for_required_cols(self):
8695+
"""Test that a DataFrame with null columns should throw error if
8696+
corresponding field in bigquery schema is required.
8697+
8698+
See: https://github.com/googleapis/python-bigquery/issues/1692
8699+
"""
8700+
from google.cloud.bigquery.schema import SchemaField
8701+
from google.cloud.bigquery import job
8702+
8703+
client = self._make_client()
8704+
records = [{"name": None, "age": None}, {"name": None, "age": None}]
8705+
dataframe = pandas.DataFrame(records, columns=["name", "age"])
8706+
schema = [
8707+
SchemaField("name", "STRING"),
8708+
SchemaField("age", "INTEGER", mode="REQUIRED"),
8709+
]
8710+
job_config = job.LoadJobConfig(schema=schema)
8711+
with pytest.raises(ValueError) as e:
8712+
client.load_table_from_dataframe(
8713+
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
8714+
)
8715+
8716+
assert str(e.value) == "required field age can not be nulls"
8717+
86928718
@unittest.skipIf(pandas is None, "Requires `pandas`")
86938719
def test_load_table_from_dataframe_w_invaild_job_config(self):
86948720
from google.cloud.bigquery import job

0 commit comments

Comments
 (0)