Skip to content

Commit 8fdd2a4

Browse files
authored
fix(automl): fix uploading pandas dataframe to AutoML Tables (#9647)
pandas.dataframe.to_csv() by default exports data index as an column with empty column name. This causes uploading the export csv file to fail because AutoML Tables does not allow empty column names. Given that the data index is not useful for training the model. This PR fixes the problem by setting the index argument to false so that the index is not exported.
1 parent cb9cdc9 commit 8fdd2a4

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

automl/google/cloud/automl_v1beta1/tables/gcs_client.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,12 @@ def upload_pandas_dataframe(self, dataframe, uploaded_csv_name=None):
132132
uploaded_csv_name = "automl-tables-dataframe-{}.csv".format(
133133
int(time.time())
134134
)
135-
csv_string = dataframe.to_csv()
135+
136+
# Setting index to False to ignore exporting the data index:
137+
# 1. The resulting column name for the index column is empty, AutoML
138+
# Tables does not allow empty column name
139+
# 2. The index is not an useful training information
140+
csv_string = dataframe.to_csv(index=False)
136141

137142
bucket = self.client.get_bucket(self.bucket_name)
138143
blob = bucket.blob(uploaded_csv_name)

automl/tests/unit/gapic/v1beta1/test_gcs_client_v1beta1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def test_upload_pandas_dataframe(self):
139139

140140
gcs_client.client.get_bucket.assert_called_with("my-bucket")
141141
mock_bucket.blob.assert_called_with("my-file.csv")
142-
mock_blob.upload_from_string.assert_called_with(",col1,col2\n0,1,3\n1,2,4\n")
142+
mock_blob.upload_from_string.assert_called_with("col1,col2\n1,3\n2,4\n")
143143
assert gcs_uri == "gs://my-bucket/my-file.csv"
144144

145145
def test_upload_pandas_dataframe_no_csv_name(self):
@@ -156,7 +156,7 @@ def test_upload_pandas_dataframe_no_csv_name(self):
156156

157157
gcs_client.client.get_bucket.assert_called_with("my-bucket")
158158
mock_bucket.blob.assert_called_with(generated_csv_name)
159-
mock_blob.upload_from_string.assert_called_with(",col1,col2\n0,1,3\n1,2,4\n")
159+
mock_blob.upload_from_string.assert_called_with("col1,col2\n1,3\n2,4\n")
160160
assert re.match("^gs://my-bucket/automl-tables-dataframe-[0-9]*.csv$", gcs_uri)
161161

162162
def test_upload_pandas_dataframe_not_type_dataframe(self):

0 commit comments

Comments
 (0)