@@ -187,6 +187,49 @@ def bq_to_arrow_array(series, bq_field):
187187 return pyarrow .array (series , type = arrow_type )
188188
189189
190+ def get_column_or_index (dataframe , name ):
191+ """Return a column or index as a pandas series."""
192+ if name in dataframe .columns :
193+ return dataframe [name ].reset_index (drop = True )
194+
195+ if isinstance (dataframe .index , pandas .MultiIndex ):
196+ if name in dataframe .index .names :
197+ return (
198+ dataframe .index .get_level_values (name )
199+ .to_series ()
200+ .reset_index (drop = True )
201+ )
202+ else :
203+ if name == dataframe .index .name :
204+ return dataframe .index .to_series ().reset_index (drop = True )
205+
206+ raise ValueError ("column or index '{}' not found." .format (name ))
207+
208+
209+ def list_columns_and_indexes (dataframe ):
210+ """Return all index and column names with dtypes.
211+
212+ Returns:
213+ Sequence[Tuple[dtype, str]]:
214+ Returns a sorted list of indexes and column names with
215+ corresponding dtypes. If an index is missing a name or has the
216+ same name as a column, the index is omitted.
217+ """
218+ column_names = frozenset (dataframe .columns )
219+ columns_and_indexes = []
220+ if isinstance (dataframe .index , pandas .MultiIndex ):
221+ for name in dataframe .index .names :
222+ if name and name not in column_names :
223+ values = dataframe .index .get_level_values (name )
224+ columns_and_indexes .append ((name , values .dtype ))
225+ else :
226+ if dataframe .index .name and dataframe .index .name not in column_names :
227+ columns_and_indexes .append ((dataframe .index .name , dataframe .index .dtype ))
228+
229+ columns_and_indexes += zip (dataframe .columns , dataframe .dtypes )
230+ return columns_and_indexes
231+
232+
190233def dataframe_to_bq_schema (dataframe , bq_schema ):
191234 """Convert a pandas DataFrame schema to a BigQuery schema.
192235
@@ -217,7 +260,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
217260 bq_schema_unused = set ()
218261
219262 bq_schema_out = []
220- for column , dtype in zip (dataframe . columns , dataframe . dtypes ):
263+ for column , dtype in list_columns_and_indexes (dataframe ):
221264 # Use provided type from schema, if present.
222265 bq_field = bq_schema_index .get (column )
223266 if bq_field :
@@ -229,7 +272,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
229272 # pandas dtype.
230273 bq_type = _PANDAS_DTYPE_TO_BQ .get (dtype .name )
231274 if not bq_type :
232- warnings .warn ("Unable to determine type of column '{}'." .format (column ))
275+ warnings .warn (u "Unable to determine type of column '{}'." .format (column ))
233276 return None
234277 bq_field = schema .SchemaField (column , bq_type )
235278 bq_schema_out .append (bq_field )
@@ -238,7 +281,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
238281 # column, but it was not found.
239282 if bq_schema_unused :
240283 raise ValueError (
241- "bq_schema contains fields not present in dataframe: {}" .format (
284+ u "bq_schema contains fields not present in dataframe: {}" .format (
242285 bq_schema_unused
243286 )
244287 )
@@ -261,20 +304,25 @@ def dataframe_to_arrow(dataframe, bq_schema):
261304 BigQuery schema.
262305 """
263306 column_names = set (dataframe .columns )
307+ column_and_index_names = set (
308+ name for name , _ in list_columns_and_indexes (dataframe )
309+ )
264310 bq_field_names = set (field .name for field in bq_schema )
265311
266- extra_fields = bq_field_names - column_names
312+ extra_fields = bq_field_names - column_and_index_names
267313 if extra_fields :
268314 raise ValueError (
269- "bq_schema contains fields not present in dataframe: {}" .format (
315+ u "bq_schema contains fields not present in dataframe: {}" .format (
270316 extra_fields
271317 )
272318 )
273319
320+ # It's okay for indexes to be missing from bq_schema, but it's not okay to
321+ # be missing columns.
274322 missing_fields = column_names - bq_field_names
275323 if missing_fields :
276324 raise ValueError (
277- "bq_schema is missing fields from dataframe: {}" .format (missing_fields )
325+ u "bq_schema is missing fields from dataframe: {}" .format (missing_fields )
278326 )
279327
280328 arrow_arrays = []
@@ -283,7 +331,9 @@ def dataframe_to_arrow(dataframe, bq_schema):
283331 for bq_field in bq_schema :
284332 arrow_fields .append (bq_to_arrow_field (bq_field ))
285333 arrow_names .append (bq_field .name )
286- arrow_arrays .append (bq_to_arrow_array (dataframe [bq_field .name ], bq_field ))
334+ arrow_arrays .append (
335+ bq_to_arrow_array (get_column_or_index (dataframe , bq_field .name ), bq_field )
336+ )
287337
288338 if all ((field is not None for field in arrow_fields )):
289339 return pyarrow .Table .from_arrays (
0 commit comments