Skip to content

Commit 2e7eb8a

Browse files
sudohainguyentqtensor
authored andcommitted
feat: Pandas v2 compatibility (feast-dev#3957)
* feat: Support pandas v2 Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * fix: Prune dependencies Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * chore: Re-compile reqs py310 Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * fix: Mark test skip with conditions Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * chore: Re-compile reqs py39 Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * chore: Update skip reason Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * chore: Re-compile reqs py38 Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * chore: Bump snowflake connector Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> * chore: Remove test skip Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com> --------- Signed-off-by: Hai Nguyen <quanghai.ng1512@gmail.com>
1 parent bccead5 commit 2e7eb8a

10 files changed

Lines changed: 27 additions & 78 deletions

File tree

sdk/python/feast/infra/offline_stores/file.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
from typing import Any, Callable, List, Literal, Optional, Tuple, Union
66

7+
import dask
78
import dask.dataframe as dd
89
import pandas as pd
910
import pyarrow
@@ -42,6 +43,11 @@
4243
_run_dask_field_mapping,
4344
)
4445

46+
# FileRetrievalJob will cast string objects to string[pyarrow] from dask version 2023.7.1
47+
# This is not the desired behavior for our use case, so we set the convert-string option to False
48+
# See (https://github.com/dask/dask/issues/10881#issuecomment-1923327936)
49+
dask.config.set({"dataframe.convert-string": False})
50+
4551

4652
class FileOfflineStoreConfig(FeastConfigBaseModel):
4753
"""Offline store config for local (file-based) store"""
@@ -366,8 +372,6 @@ def evaluate_offline_job():
366372
source_df[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL
367373
columns_to_extract.add(DUMMY_ENTITY_ID)
368374

369-
source_df = source_df.persist()
370-
371375
return source_df[list(columns_to_extract)].persist()
372376

373377
# When materializing a single feature view, we don't need full feature names. On demand transforms aren't materialized

sdk/python/requirements/py3.10-ci-requirements.txt

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,6 @@ executing==2.0.1
179179
# via stack-data
180180
fastapi==0.109.2
181181
# via feast (setup.py)
182-
fastavro==1.9.4
183-
# via
184-
# feast (setup.py)
185-
# pandavro
186182
fastjsonschema==2.19.1
187183
# via nbformat
188184
filelock==3.13.1
@@ -515,7 +511,6 @@ numpy==1.24.4
515511
# great-expectations
516512
# ibis-framework
517513
# pandas
518-
# pandavro
519514
# pyarrow
520515
# scipy
521516
oauthlib==3.2.2
@@ -543,18 +538,14 @@ packaging==23.2
543538
# pytest
544539
# snowflake-connector-python
545540
# sphinx
546-
pandas==1.5.3
541+
pandas==2.2.0 ; python_version >= "3.9"
547542
# via
548543
# altair
549544
# db-dtypes
550545
# feast (setup.py)
551546
# google-cloud-bigquery
552547
# great-expectations
553-
# ibis-framework
554-
# pandavro
555548
# snowflake-connector-python
556-
pandavro==1.5.2
557-
# via feast (setup.py)
558549
pandocfilters==1.5.1
559550
# via nbconvert
560551
parso==0.8.3
@@ -824,7 +815,6 @@ six==1.16.0
824815
# isodate
825816
# kubernetes
826817
# mock
827-
# pandavro
828818
# python-dateutil
829819
# rfc3339-validator
830820
# thriftpy2
@@ -975,6 +965,8 @@ typing-extensions==4.9.0
975965
# sqlalchemy2-stubs
976966
# typeguard
977967
# uvicorn
968+
tzdata==2024.1
969+
# via pandas
978970
tzlocal==5.2
979971
# via
980972
# great-expectations

sdk/python/requirements/py3.10-requirements.txt

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ exceptiongroup==1.2.0
4646
# via anyio
4747
fastapi==0.109.2
4848
# via feast (setup.py)
49-
fastavro==1.9.4
50-
# via
51-
# feast (setup.py)
52-
# pandavro
5349
fissix==21.11.13
5450
# via bowler
5551
fsspec==2024.2.0
@@ -115,17 +111,12 @@ numpy==1.24.4
115111
# via
116112
# feast (setup.py)
117113
# pandas
118-
# pandavro
119114
# pyarrow
120115
packaging==23.2
121116
# via
122117
# dask
123118
# gunicorn
124-
pandas==1.5.3
125-
# via
126-
# feast (setup.py)
127-
# pandavro
128-
pandavro==1.5.2
119+
pandas==2.2.0
129120
# via feast (setup.py)
130121
partd==1.4.1
131122
# via dask
@@ -171,9 +162,7 @@ rpds-py==0.18.0
171162
# jsonschema
172163
# referencing
173164
six==1.16.0
174-
# via
175-
# pandavro
176-
# python-dateutil
165+
# via python-dateutil
177166
sniffio==1.3.0
178167
# via
179168
# anyio

sdk/python/requirements/py3.8-ci-requirements.txt

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,6 @@ executing==2.0.1
182182
# via stack-data
183183
fastapi==0.109.2
184184
# via feast (setup.py)
185-
fastavro==1.9.4
186-
# via
187-
# feast (setup.py)
188-
# pandavro
189185
fastjsonschema==2.19.1
190186
# via nbformat
191187
filelock==3.13.1
@@ -530,7 +526,6 @@ numpy==1.24.4
530526
# great-expectations
531527
# ibis-framework
532528
# pandas
533-
# pandavro
534529
# pyarrow
535530
# scipy
536531
oauthlib==3.2.2
@@ -558,18 +553,14 @@ packaging==23.2
558553
# pytest
559554
# snowflake-connector-python
560555
# sphinx
561-
pandas==1.5.3
556+
pandas==1.5.3 ; python_version < "3.9"
562557
# via
563558
# altair
564559
# db-dtypes
565560
# feast (setup.py)
566561
# google-cloud-bigquery
567562
# great-expectations
568-
# ibis-framework
569-
# pandavro
570563
# snowflake-connector-python
571-
pandavro==1.5.2
572-
# via feast (setup.py)
573564
pandocfilters==1.5.1
574565
# via nbconvert
575566
parso==0.8.3

sdk/python/requirements/py3.8-requirements.txt

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ exceptiongroup==1.2.0
4646
# via anyio
4747
fastapi==0.109.2
4848
# via feast (setup.py)
49-
fastavro==1.9.4
50-
# via
51-
# feast (setup.py)
52-
# pandavro
5349
fissix==21.11.13
5450
# via bowler
5551
fsspec==2024.2.0
@@ -119,17 +115,12 @@ numpy==1.24.4
119115
# via
120116
# feast (setup.py)
121117
# pandas
122-
# pandavro
123118
# pyarrow
124119
packaging==23.2
125120
# via
126121
# dask
127122
# gunicorn
128-
pandas==1.5.3
129-
# via
130-
# feast (setup.py)
131-
# pandavro
132-
pandavro==1.5.2
123+
pandas==2.0.3
133124
# via feast (setup.py)
134125
partd==1.4.1
135126
# via dask
@@ -177,9 +168,7 @@ rpds-py==0.18.0
177168
# jsonschema
178169
# referencing
179170
six==1.16.0
180-
# via
181-
# pandavro
182-
# python-dateutil
171+
# via python-dateutil
183172
sniffio==1.3.0
184173
# via
185174
# anyio

sdk/python/requirements/py3.9-ci-requirements.txt

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,6 @@ executing==2.0.1
179179
# via stack-data
180180
fastapi==0.109.2
181181
# via feast (setup.py)
182-
fastavro==1.9.4
183-
# via
184-
# feast (setup.py)
185-
# pandavro
186182
fastjsonschema==2.19.1
187183
# via nbformat
188184
filelock==3.13.1
@@ -523,7 +519,6 @@ numpy==1.24.4
523519
# great-expectations
524520
# ibis-framework
525521
# pandas
526-
# pandavro
527522
# pyarrow
528523
# scipy
529524
oauthlib==3.2.2
@@ -551,18 +546,14 @@ packaging==23.2
551546
# pytest
552547
# snowflake-connector-python
553548
# sphinx
554-
pandas==1.5.3
549+
pandas==2.2.0
555550
# via
556551
# altair
557552
# db-dtypes
558553
# feast (setup.py)
559554
# google-cloud-bigquery
560555
# great-expectations
561-
# ibis-framework
562-
# pandavro
563556
# snowflake-connector-python
564-
pandavro==1.5.2
565-
# via feast (setup.py)
566557
pandocfilters==1.5.1
567558
# via nbconvert
568559
parso==0.8.3
@@ -834,7 +825,6 @@ six==1.16.0
834825
# isodate
835826
# kubernetes
836827
# mock
837-
# pandavro
838828
# python-dateutil
839829
# rfc3339-validator
840830
# thriftpy2
@@ -988,6 +978,8 @@ typing-extensions==4.9.0
988978
# starlette
989979
# typeguard
990980
# uvicorn
981+
tzdata==2024.1
982+
# via pandas
991983
tzlocal==5.2
992984
# via
993985
# great-expectations

sdk/python/requirements/py3.9-requirements.txt

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ exceptiongroup==1.2.0
4646
# via anyio
4747
fastapi==0.109.2
4848
# via feast (setup.py)
49-
fastavro==1.9.4
50-
# via
51-
# feast (setup.py)
52-
# pandavro
5349
fissix==21.11.13
5450
# via bowler
5551
fsspec==2024.2.0
@@ -116,17 +112,12 @@ numpy==1.24.4
116112
# via
117113
# feast (setup.py)
118114
# pandas
119-
# pandavro
120115
# pyarrow
121116
packaging==23.2
122117
# via
123118
# dask
124119
# gunicorn
125-
pandas==1.5.3
126-
# via
127-
# feast (setup.py)
128-
# pandavro
129-
pandavro==1.5.2
120+
pandas==2.2.0
130121
# via feast (setup.py)
131122
partd==1.4.1
132123
# via dask
@@ -172,9 +163,7 @@ rpds-py==0.18.0
172163
# jsonschema
173164
# referencing
174165
six==1.16.0
175-
# via
176-
# pandavro
177-
# python-dateutil
166+
# via python-dateutil
178167
sniffio==1.3.0
179168
# via
180169
# anyio

sdk/python/tests/integration/e2e/test_validation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def test_logged_features_validation(environment, universal_data_sources):
167167
{
168168
"customer_id": 2000 + i,
169169
"driver_id": 6000 + i,
170-
"event_timestamp": datetime.datetime.now(),
170+
"event_timestamp": make_tzaware(datetime.datetime.now()),
171171
}
172172
]
173173
),

sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,11 @@ def test_historical_features_with_entities_from_query(
340340

341341
table_from_sql_entities = job_from_sql.to_arrow().to_pandas()
342342
for col in table_from_sql_entities.columns:
343+
# check if col dtype is timezone naive
344+
if pd.api.types.is_datetime64_dtype(table_from_sql_entities[col]):
345+
table_from_sql_entities[col] = table_from_sql_entities[col].dt.tz_localize(
346+
"UTC"
347+
)
343348
expected_df_query[col] = expected_df_query[col].astype(
344349
table_from_sql_entities[col].dtype
345350
)

setup.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
"click>=7.0.0,<9.0.0",
4545
"colorama>=0.3.9,<1",
4646
"dill~=0.3.0",
47-
"fastavro>=1.1.0,<2",
4847
"grpcio>=1.56.2,<2",
4948
"grpcio-tools>=1.56.2,<2",
5049
"grpcio-reflection>=1.56.2,<2",
@@ -54,9 +53,7 @@
5453
"jsonschema",
5554
"mmh3",
5655
"numpy>=1.22,<1.25",
57-
"pandas>=1.4.3,<2",
58-
# For some reason pandavro higher than 1.5.* only support pandas less than 1.3.
59-
"pandavro~=1.5.0",
56+
"pandas>=1.4.3,<3",
6057
# Higher than 4.23.4 seems to cause a seg fault
6158
"protobuf<4.23.4,>3.20",
6259
"proto-plus>=1.20.0,<2",
@@ -190,6 +187,7 @@
190187
"types-setuptools",
191188
"types-tabulate",
192189
"virtualenv<20.24.2",
190+
"pandas>=1.4.3,<2; python_version < '3.9'",
193191
]
194192
+ GCP_REQUIRED
195193
+ REDIS_REQUIRED

0 commit comments

Comments
 (0)