Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Commit ed452fb

Browse files
committed
fix: load_table_from_dataframe does not error out when nan in a required column
1 parent 5ceed05 commit ed452fb

2 files changed

Lines changed: 42 additions & 0 deletions

File tree

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,16 @@ def bq_to_arrow_array(series, bq_field):
380380
return pyarrow.Array.from_pandas(series, type=arrow_type)
381381

382382

383+
def _check_nullability(arrow_fields, dataframe):
384+
"""Throws error if dataframe has null values and column doesn't allow nullable"""
385+
if dataframe.index.name:
386+
dataframe[dataframe.index.name] = dataframe.index
387+
for arrow_field in arrow_fields:
388+
col_name = arrow_field.name
389+
if not arrow_field.nullable and dataframe[col_name].isnull().values.any():
390+
raise ValueError(f"required field {col_name} can not be nulls")
391+
392+
383393
def get_column_or_index(dataframe, name):
384394
"""Return a column or index as a pandas series."""
385395
if name in dataframe.columns:
@@ -663,6 +673,7 @@ def dataframe_to_arrow(dataframe, bq_schema):
663673
)
664674
arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
665675

676+
_check_nullability(arrow_fields, dataframe)
666677
if all((field is not None for field in arrow_fields)):
667678
return pyarrow.Table.from_arrays(
668679
arrow_arrays, schema=pyarrow.schema(arrow_fields)

tests/unit/test_client.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8689,6 +8689,37 @@ def test_load_table_from_dataframe_w_nulls(self):
86898689
assert sent_config.schema == schema
86908690
assert sent_config.source_format == job.SourceFormat.PARQUET
86918691

8692+
@unittest.skipIf(pandas is None, "Requires `pandas`")
8693+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
8694+
def test_load_table_from_dataframe_w_nulls_for_required_cols(self):
8695+
"""Test that a DataFrame with null columns should throw error if
8696+
corresponding field in bigquery schema is required.
8697+
8698+
See: https://github.com/googleapis/python-bigquery/issues/1692
8699+
"""
8700+
from google.cloud.bigquery.schema import SchemaField
8701+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
8702+
from google.cloud.bigquery import job
8703+
8704+
client = self._make_client()
8705+
records = [{"name": None, "age": None}, {"name": None, "age": None}]
8706+
dataframe = pandas.DataFrame(records, columns=["name", "age"])
8707+
schema = [
8708+
SchemaField("name", "STRING"),
8709+
SchemaField("age", "INTEGER", mode="REQUIRED"),
8710+
]
8711+
job_config = job.LoadJobConfig(schema=schema)
8712+
8713+
load_patch = mock.patch(
8714+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
8715+
)
8716+
with load_patch as load_table_from_file, pytest.raises(ValueError) as e:
8717+
client.load_table_from_dataframe(
8718+
dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
8719+
)
8720+
8721+
assert str(e.value) == "required field age can not be nulls"
8722+
86928723
@unittest.skipIf(pandas is None, "Requires `pandas`")
86938724
def test_load_table_from_dataframe_w_invaild_job_config(self):
86948725
from google.cloud.bigquery import job

0 commit comments

Comments
 (0)