Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 36 additions & 12 deletions packages/pandas-gbq/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,19 @@

from __future__ import absolute_import

from functools import wraps
import os
import pathlib
import re
import shutil
import time
import warnings
from functools import wraps

import nox

BLACK_VERSION = "black==23.7.0"
ISORT_VERSION = "isort==5.10.1"
RUFF_VERSION = "ruff==0.14.14"
Comment thread
chalmerlowe marked this conversation as resolved.
Comment thread
chalmerlowe marked this conversation as resolved.
LINT_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]

DEFAULT_PYTHON_VERSION = "3.14"
Expand Down Expand Up @@ -147,19 +148,29 @@ def blacken(session):
@_calculate_duration
def format(session):
"""
Run isort to sort imports. Then run black
to format code to uniform standard.
Run ruff to sort imports and format code.
"""
session.install(BLACK_VERSION, ISORT_VERSION)
# Use the --fss option to sort imports using strict alphabetical order.
# See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections
# 1. Install ruff (skipped automatically if you run with --no-venv)
session.install(RUFF_VERSION)

# 2. Run Ruff to fix imports
session.run(
"isort",
"--fss",
"ruff",
"check",
"--select",
"I",
"--fix",
f"--target-version=py{UNIT_TEST_PYTHON_VERSIONS[0].replace('.', '')}",
Comment thread
chalmerlowe marked this conversation as resolved.
"--line-length=88",
*LINT_PATHS,
)

# 3. Run Ruff to format code
session.run(
"black",
"ruff",
"format",
f"--target-version=py{UNIT_TEST_PYTHON_VERSIONS[0].replace('.', '')}",
"--line-length=88",
*LINT_PATHS,
)

Expand Down Expand Up @@ -519,11 +530,24 @@ def docfx(session):


@nox.session(python=DEFAULT_PYTHON_VERSION)
@_calculate_duration
def mypy(session):
"""Run the type checker."""
# TODO(https://github.com/googleapis/google-cloud-python/issues/16014):
# Add mypy tests
session.skip("mypy tests are not yet supported")
session.install(
"mypy<1.16.0",
"types-requests",
"types-protobuf",
"pandas-stubs",
"types-tqdm",
"types-psutil",
)
session.install(".")
session.run(
"mypy",
"pandas_gbq",
"--check-untyped-defs",
*session.posargs,
)


@nox.session(python=DEFAULT_PYTHON_VERSION)
Expand Down
6 changes: 4 additions & 2 deletions packages/pandas-gbq/pandas_gbq/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def get_credentials(
client_id=None,
client_secret=None,
):
import pydata_google_auth
# pydata-google-auth does not have type hints nor stubs that mypy uses for type checking.
import pydata_google_auth # type: ignore[import-untyped]

if private_key:
raise NotImplementedError(
Expand All @@ -48,7 +49,8 @@ def get_credentials(


def get_credentials_cache(reauth):
import pydata_google_auth.cache
# pydata-google-auth does not have type hints nor stubs that mypy uses for type checking.
import pydata_google_auth.cache # type: ignore[import-untyped]

if reauth:
return pydata_google_auth.cache.WriteOnlyCredentialsCache(
Expand Down
5 changes: 4 additions & 1 deletion packages/pandas-gbq/pandas_gbq/core/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import itertools

import pandas
import typing


def list_columns_and_indexes(dataframe, index=True):
Expand All @@ -22,7 +23,9 @@ def list_columns_and_indexes(dataframe, index=True):
if isinstance(dataframe.index, pandas.MultiIndex):
for name in dataframe.index.names:
if name and name not in column_names:
values = dataframe.index.get_level_values(name)
values = dataframe.index.get_level_values(
typing.cast(typing.Union[str, int], name)
)
columns_and_indexes.append((name, values.dtype))
else:
if dataframe.index.name and dataframe.index.name not in column_names:
Expand Down
8 changes: 5 additions & 3 deletions packages/pandas-gbq/pandas_gbq/core/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from __future__ import annotations

import typing
from typing import Any, Dict, Optional, Sequence
import warnings
from typing import Any, Dict, Optional, Sequence

import google.cloud.bigquery
import google.cloud.bigquery.table
Expand Down Expand Up @@ -34,7 +34,8 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
#missing-data-casting-rules-and-indexing
"""
import db_dtypes
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you include a link to a tracking bug in google-cloud-python to follow up on this? Applies throughout for all the packages in this PR that don't have type hints

import db_dtypes # type: ignore[import-untyped]

# If you update this mapping, also update the table at
# `docs/reading.rst`.
Expand Down Expand Up @@ -79,7 +80,8 @@ def _finalize_dtypes(
1970. See:
https://github.com/googleapis/python-bigquery-pandas/issues/365
"""
import db_dtypes
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
import db_dtypes # type: ignore[import-untyped]
import pandas.api.types

# If you update this mapping, also update the table at
Expand Down
2 changes: 1 addition & 1 deletion packages/pandas-gbq/pandas_gbq/dry_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import copy
from typing import Any, List

from google.cloud import bigquery
import pandas
from google.cloud import bigquery


def get_query_stats(
Expand Down
44 changes: 26 additions & 18 deletions packages/pandas-gbq/pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,36 @@
# license that can be found in the LICENSE file.

import copy
from datetime import datetime
import logging
import re
import typing
import warnings
from datetime import datetime

import pandas

from pandas_gbq.contexts import Context # noqa - backward compatible export
from pandas_gbq.contexts import context
from pandas_gbq.exceptions import ( # noqa - backward compatible export
import pandas_gbq.schema
import pandas_gbq.schema.pandas_to_bigquery
from pandas_gbq.contexts import ( # noqa: F401
Context, # noqa: F401 - imported solely to support a backwards compatible export
context,
)
from pandas_gbq.exceptions import ( # noqa: F401 - imported solely to support a backwards compatible export
DatasetCreationError,
GenericGBQException,
InvalidColumnOrder,
InvalidIndexColumn,
InvalidPageToken, # noqa: F401 - imported solely to support a backwards compatible export
InvalidSchema, # noqa: F401 - imported solely to support a backwards compatible export
NotFoundException,
QueryTimeout, # noqa: F401 - imported solely to support a backwards compatible export
TableCreationError,
)
from pandas_gbq.exceptions import InvalidPageToken # noqa - backward compatible export
from pandas_gbq.exceptions import InvalidSchema # noqa - backward compatible export
from pandas_gbq.exceptions import QueryTimeout # noqa - backward compatible export
from pandas_gbq.features import FEATURES
from pandas_gbq.gbq_connector import GbqConnector # noqa - backward compatible export
from pandas_gbq.gbq_connector import _get_client # noqa - backward compatible export
import pandas_gbq.schema
import pandas_gbq.schema.pandas_to_bigquery
from pandas_gbq.gbq_connector import ( # noqa: F401
GbqConnector, # noqa: F401 - imported solely to support a backwards compatible export
_get_client, # noqa: F401 - imported solely to support a backwards compatible export
)

logger = logging.getLogger(__name__)

Expand All @@ -40,17 +44,23 @@ def _test_google_api_imports():
raise ImportError("pandas-gbq requires db-dtypes") from ex

try:
import db_dtypes # noqa
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
# This import is solely to test if the package is installed, so we ignore the "unused import" warning.
import db_dtypes # type: ignore[import-untyped] # noqa: F401
except ImportError as ex: # pragma: NO COVER
raise ImportError("pandas-gbq requires db-dtypes") from ex

try:
import pydata_google_auth # noqa
# pydata-google-auth does not have type hints nor stubs that mypy uses for type checking.
# This import is solely to test if the package is installed, so we ignore the "unused import" warning.
import pydata_google_auth # type: ignore[import-untyped] # noqa: F401
except ImportError as ex: # pragma: NO COVER
raise ImportError("pandas-gbq requires pydata-google-auth") from ex

try:
from google_auth_oauthlib.flow import InstalledAppFlow # noqa
# google-auth-oauthlib does not have type hints nor stubs that mypy uses for type checking.
# This import is solely to test if the package is installed, so we ignore the "unused import" warning.
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore[import-untyped] # noqa: F401
except ImportError as ex: # pragma: NO COVER
raise ImportError("pandas-gbq requires google-auth-oauthlib") from ex

Expand Down Expand Up @@ -686,7 +696,7 @@ def generate_bq_schema(df, default_type="STRING"):
"""
# deprecation TimeSeries, #11121
warnings.warn(
"generate_bq_schema is deprecated and will be removed in " "a future version",
"generate_bq_schema is deprecated and will be removed in a future version",
FutureWarning,
stacklevel=2,
)
Expand Down Expand Up @@ -927,9 +937,7 @@ def create(self, dataset_id):
from google.cloud.bigquery import Dataset

if self.exists(dataset_id):
raise DatasetCreationError(
"Dataset {0} already " "exists".format(dataset_id)
)
raise DatasetCreationError("Dataset {0} already exists".format(dataset_id))

dataset = Dataset(self._dataset_ref(dataset_id))

Expand Down
14 changes: 9 additions & 5 deletions packages/pandas-gbq/pandas_gbq/gbq_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,30 @@
import logging
import time
import typing
from typing import Any, Dict, Optional, Union
import warnings
from typing import Any, Dict, Optional, Union

# Only import at module-level at type checking time to avoid circular
# dependencies in the pandas package, which has an optional dependency on
# pandas-gbq.
if typing.TYPE_CHECKING: # pragma: NO COVER
import pandas

from pandas_gbq import dry_runs
import pandas_gbq.constants
from pandas_gbq.contexts import context
import pandas_gbq.core.read
import pandas_gbq.environment as environment
import pandas_gbq.exceptions
import pandas_gbq.query
from pandas_gbq import dry_runs
from pandas_gbq.contexts import context
from pandas_gbq.exceptions import QueryTimeout
from pandas_gbq.features import FEATURES
import pandas_gbq.query

tqdm: Any = None
try:
import tqdm # noqa
except ImportError:
tqdm = None
pass

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -204,6 +205,8 @@ def run_query(
].get("timeoutMs")

if timeout_ms:
if not isinstance(timeout_ms, (str, int, float)):
raise TypeError(f"Expected str, int or float, got {type(timeout_ms)}")
timeout_ms = int(timeout_ms)
# Having too small a timeout_ms results in individual
# API calls timing out before they can finish.
Expand All @@ -220,6 +223,7 @@ def run_query(

self._start_timer()
job_config = bigquery.QueryJobConfig.from_api_repr(job_config_dict)
job_config = typing.cast(bigquery.QueryJobConfig, job_config)
job_config.dry_run = dry_run

if FEATURES.bigquery_has_query_and_wait:
Expand Down
19 changes: 11 additions & 8 deletions packages/pandas-gbq/pandas_gbq/load/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
import io
from typing import Any, Callable, Dict, List, Optional

import db_dtypes
from google.cloud import bigquery
# db-dtypes does not have type hints nor stubs that mypy uses for type checking.
import db_dtypes # type: ignore[import-untyped]
import pandas
import pyarrow.lib
from google.cloud import bigquery

from pandas_gbq import exceptions
import pandas_gbq.schema
import pandas_gbq.schema.bigquery
import pandas_gbq.schema.pandas_to_bigquery
from pandas_gbq import exceptions


def encode_chunk(dataframe):
Expand All @@ -38,8 +39,8 @@ def encode_chunk(dataframe):
# Convert to a BytesIO buffer so that unicode text is properly handled.
# See: https://github.com/pydata/pandas-gbq/issues/106
body = csv_buffer.getvalue()
body = body.encode("utf-8")
return io.BytesIO(body)
body_bytes = body.encode("utf-8")
return io.BytesIO(body_bytes)


def split_dataframe(dataframe, chunksize=None):
Expand Down Expand Up @@ -68,7 +69,7 @@ def cast_dataframe_for_parquet(
See: https://github.com/googleapis/python-bigquery-pandas/issues/421
"""

columns = schema.get("fields", [])
columns = schema.get("fields", []) if schema is not None else []

# Protect against an explicit None in the dictionary.
columns = columns if columns is not None else []
Expand Down Expand Up @@ -130,7 +131,7 @@ def cast_dataframe_for_csv(
) -> pandas.DataFrame:
"""Cast columns to needed dtype when writing CSV files."""

columns = schema.get("fields", [])
columns = schema.get("fields", []) if schema is not None else []

# Protect against an explicit None in the dictionary.
columns = columns if columns is not None else []
Expand Down Expand Up @@ -280,7 +281,9 @@ def load_chunk(chunk, job_config):
finally:
chunk_buffer.close()

return load_csv(dataframe, write_disposition, chunksize, bq_schema, load_chunk)
return load_csv(
dataframe, write_disposition, chunksize, list(bq_schema), load_chunk
)


def load_chunks(
Expand Down
Loading
Loading