diff --git a/.gitignore b/.gitignore index b87e1ed58..b9daa52f1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ pip-log.txt # Built documentation docs/_build bigquery/docs/generated +docs.metadata # Virtual environment env/ @@ -57,4 +58,4 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc -pylintrc.test \ No newline at end of file +pylintrc.test diff --git a/.kokoro/build.sh b/.kokoro/build.sh index d3749e290..0e71e2aca 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -36,4 +36,10 @@ python3.6 -m pip uninstall --yes --quiet nox-automation python3.6 -m pip install --upgrade --quiet nox python3.6 -m nox --version -python3.6 -m nox +# If NOX_SESSION is set, it only runs the specified session, +# otherwise run all the sessions. +if [[ -n "${NOX_SESSION:-}" ]]; then + python3.6 -m nox -s "${NOX_SESSION:-}" +else + python3.6 -m nox +fi diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile new file mode 100644 index 000000000..412b0b56a --- /dev/null +++ b/.kokoro/docker/docs/Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive + +# Ensure local Python is preferred over distribution Python. +ENV PATH /usr/local/bin:$PATH + +# Install dependencies. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + apt-transport-https \ + build-essential \ + ca-certificates \ + curl \ + dirmngr \ + git \ + gpg-agent \ + graphviz \ + libbz2-dev \ + libdb5.3-dev \ + libexpat1-dev \ + libffi-dev \ + liblzma-dev \ + libreadline-dev \ + libsnappy-dev \ + libssl-dev \ + libsqlite3-dev \ + portaudio19-dev \ + redis-server \ + software-properties-common \ + ssh \ + sudo \ + tcl \ + tcl-dev \ + tk \ + tk-dev \ + uuid-dev \ + wget \ + zlib1g-dev \ + && add-apt-repository universe \ + && apt-get update \ + && apt-get -y install jq \ + && apt-get clean autoclean \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && rm -f /var/cache/apt/archives/*.deb + + +COPY fetch_gpg_keys.sh /tmp +# Install the desired versions of Python. +RUN set -ex \ + && export GNUPGHOME="$(mktemp -d)" \ + && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ + && /tmp/fetch_gpg_keys.sh \ + && for PYTHON_VERSION in 3.7.8 3.8.5; do \ + wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ + && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ + && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ + && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ + && mkdir -p /usr/src/python-${PYTHON_VERSION} \ + && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ + && rm python-${PYTHON_VERSION}.tar.xz \ + && cd /usr/src/python-${PYTHON_VERSION} \ + && ./configure \ + --enable-shared \ + # This works only on Python 2.7 and throws a warning on every other + # version, but seems otherwise harmless. + --enable-unicode=ucs4 \ + --with-system-ffi \ + --without-ensurepip \ + && make -j$(nproc) \ + && make install \ + && ldconfig \ + ; done \ + && rm -rf "${GNUPGHOME}" \ + && rm -rf /usr/src/python* \ + && rm -rf ~/.cache/ + +RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ + && python3.7 /tmp/get-pip.py \ + && python3.8 /tmp/get-pip.py \ + && rm /tmp/get-pip.py + +CMD ["python3.7"] diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh new file mode 100755 index 000000000..d653dd868 --- /dev/null +++ b/.kokoro/docker/docs/fetch_gpg_keys.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to fetch gpg keys with retry. +# Avoid jinja parsing the file. +# + +function retry { + if [[ "${#}" -le 1 ]]; then + echo "Usage: ${0} retry_count commands.." + exit 1 + fi + local retries=${1} + local command="${@:2}" + until [[ "${retries}" -le 0 ]]; do + $command && return 0 + if [[ $? -ne 0 ]]; then + echo "command failed, retrying" + ((retries--)) + fi + done + return 1 +} + +# 3.6.9, 3.7.5 (Ned Deily) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D + +# 3.8.0 (Ɓukasz Langa) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + E3FF2839C048B25C084DEBE9B26995E310250568 + +# diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index 229abf075..8f9807f72 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -11,12 +11,12 @@ action { gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" # Configure the docker image for kokoro-trampoline. env_vars: { key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" + value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" } env_vars: { key: "TRAMPOLINE_BUILD_FILE" @@ -28,6 +28,23 @@ env_vars: { value: "docs-staging" } +env_vars: { + key: "V2_STAGING_BUCKET" + value: "docs-staging-v2-staging" +} + +# It will upload the docker image after successful builds. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "true" +} + +# It will always build the docker image. +env_vars: { + key: "TRAMPOLINE_DOCKERFILE" + value: ".kokoro/docker/docs/Dockerfile" +} + # Fetch the token needed for reporting release status to GitHub before_action { fetch_keystore { diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg new file mode 100644 index 000000000..111810782 --- /dev/null +++ b/.kokoro/docs/docs-presubmit.cfg @@ -0,0 +1,17 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "STAGING_BUCKET" + value: "gcloud-python-test" +} + +env_vars: { + key: "V2_STAGING_BUCKET" + value: "gcloud-python-test" +} + +# We only upload the image in the main `docs` build. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "false" +} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 8f43917d9..b158096f0 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1 +1,7 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Disable system tests. +env_vars: { + key: "RUN_SYSTEM_TESTS" + value: "false" +} diff --git a/.kokoro/presubmit/system-2.7.cfg b/.kokoro/presubmit/system-2.7.cfg new file mode 100644 index 000000000..3b6523a19 --- /dev/null +++ b/.kokoro/presubmit/system-2.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-2.7" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.8.cfg new file mode 100644 index 000000000..f4bcee3db --- /dev/null +++ b/.kokoro/presubmit/system-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "system-3.8" +} \ No newline at end of file diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 309212789..8acb14e80 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -18,26 +18,16 @@ set -eo pipefail # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 -cd github/python-bigquery - -# Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --user --upgrade --quiet nox +python3 -m nox --version # build docs nox -s docs -python3 -m pip install gcp-docuploader - -# install a json parser -sudo apt-get update -sudo apt-get -y install software-properties-common -sudo add-apt-repository universe -sudo apt-get update -sudo apt-get -y install jq +python3 -m pip install --user gcp-docuploader # create metadata python3 -m docuploader create-metadata \ @@ -52,4 +42,23 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket docs-staging +python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" + + +# docfx yaml files +nox -s docfx + +# create metadata. +python3 -m docuploader create-metadata \ + --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ + --version=$(python3 setup.py --version) \ + --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ + --distribution-name=$(python3 setup.py --name) \ + --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ + --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ + --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) + +cat docs.metadata + +# upload docs +python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh new file mode 100755 index 000000000..719bcd5ba --- /dev/null +++ b/.kokoro/trampoline_v2.sh @@ -0,0 +1,487 @@ +#!/usr/bin/env bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# trampoline_v2.sh +# +# This script does 3 things. +# +# 1. Prepare the Docker image for the test +# 2. Run the Docker with appropriate flags to run the test +# 3. Upload the newly built Docker image +# +# in a way that is somewhat compatible with trampoline_v1. +# +# To run this script, first download few files from gcs to /dev/shm. +# (/dev/shm is passed into the container as KOKORO_GFILE_DIR). +# +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/secrets_viewer_service_account.json /dev/shm +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/automl_secrets.txt /dev/shm +# +# Then run the script. +# .kokoro/trampoline_v2.sh +# +# These environment variables are required: +# TRAMPOLINE_IMAGE: The docker image to use. +# TRAMPOLINE_DOCKERFILE: The location of the Dockerfile. +# +# You can optionally change these environment variables: +# TRAMPOLINE_IMAGE_UPLOAD: +# (true|false): Whether to upload the Docker image after the +# successful builds. +# TRAMPOLINE_BUILD_FILE: The script to run in the docker container. +# TRAMPOLINE_WORKSPACE: The workspace path in the docker container. +# Defaults to /workspace. +# Potentially there are some repo specific envvars in .trampolinerc in +# the project root. + + +set -euo pipefail + +TRAMPOLINE_VERSION="2.0.5" + +if command -v tput >/dev/null && [[ -n "${TERM:-}" ]]; then + readonly IO_COLOR_RED="$(tput setaf 1)" + readonly IO_COLOR_GREEN="$(tput setaf 2)" + readonly IO_COLOR_YELLOW="$(tput setaf 3)" + readonly IO_COLOR_RESET="$(tput sgr0)" +else + readonly IO_COLOR_RED="" + readonly IO_COLOR_GREEN="" + readonly IO_COLOR_YELLOW="" + readonly IO_COLOR_RESET="" +fi + +function function_exists { + [ $(LC_ALL=C type -t $1)"" == "function" ] +} + +# Logs a message using the given color. The first argument must be one +# of the IO_COLOR_* variables defined above, such as +# "${IO_COLOR_YELLOW}". The remaining arguments will be logged in the +# given color. The log message will also have an RFC-3339 timestamp +# prepended (in UTC). You can disable the color output by setting +# TERM=vt100. +function log_impl() { + local color="$1" + shift + local timestamp="$(date -u "+%Y-%m-%dT%H:%M:%SZ")" + echo "================================================================" + echo "${color}${timestamp}:" "$@" "${IO_COLOR_RESET}" + echo "================================================================" +} + +# Logs the given message with normal coloring and a timestamp. +function log() { + log_impl "${IO_COLOR_RESET}" "$@" +} + +# Logs the given message in green with a timestamp. +function log_green() { + log_impl "${IO_COLOR_GREEN}" "$@" +} + +# Logs the given message in yellow with a timestamp. +function log_yellow() { + log_impl "${IO_COLOR_YELLOW}" "$@" +} + +# Logs the given message in red with a timestamp. +function log_red() { + log_impl "${IO_COLOR_RED}" "$@" +} + +readonly tmpdir=$(mktemp -d -t ci-XXXXXXXX) +readonly tmphome="${tmpdir}/h" +mkdir -p "${tmphome}" + +function cleanup() { + rm -rf "${tmpdir}" +} +trap cleanup EXIT + +RUNNING_IN_CI="${RUNNING_IN_CI:-false}" + +# The workspace in the container, defaults to /workspace. +TRAMPOLINE_WORKSPACE="${TRAMPOLINE_WORKSPACE:-/workspace}" + +pass_down_envvars=( + # TRAMPOLINE_V2 variables. + # Tells scripts whether they are running as part of CI or not. + "RUNNING_IN_CI" + # Indicates which CI system we're in. + "TRAMPOLINE_CI" + # Indicates the version of the script. + "TRAMPOLINE_VERSION" +) + +log_yellow "Building with Trampoline ${TRAMPOLINE_VERSION}" + +# Detect which CI systems we're in. If we're in any of the CI systems +# we support, `RUNNING_IN_CI` will be true and `TRAMPOLINE_CI` will be +# the name of the CI system. Both envvars will be passing down to the +# container for telling which CI system we're in. +if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then + # descriptive env var for indicating it's on CI. + RUNNING_IN_CI="true" + TRAMPOLINE_CI="kokoro" + if [[ "${TRAMPOLINE_USE_LEGACY_SERVICE_ACCOUNT:-}" == "true" ]]; then + if [[ ! -f "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" ]]; then + log_red "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json does not exist. Did you forget to mount cloud-devrel-kokoro-resources/trampoline? Aborting." + exit 1 + fi + # This service account will be activated later. + TRAMPOLINE_SERVICE_ACCOUNT="${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" + else + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + gcloud auth list + fi + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet + fi + pass_down_envvars+=( + # KOKORO dynamic variables. + "KOKORO_BUILD_NUMBER" + "KOKORO_BUILD_ID" + "KOKORO_JOB_NAME" + "KOKORO_GIT_COMMIT" + "KOKORO_GITHUB_COMMIT" + "KOKORO_GITHUB_PULL_REQUEST_NUMBER" + "KOKORO_GITHUB_PULL_REQUEST_COMMIT" + # For Build Cop Bot + "KOKORO_GITHUB_COMMIT_URL" + "KOKORO_GITHUB_PULL_REQUEST_URL" + ) +elif [[ "${TRAVIS:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="travis" + pass_down_envvars+=( + "TRAVIS_BRANCH" + "TRAVIS_BUILD_ID" + "TRAVIS_BUILD_NUMBER" + "TRAVIS_BUILD_WEB_URL" + "TRAVIS_COMMIT" + "TRAVIS_COMMIT_MESSAGE" + "TRAVIS_COMMIT_RANGE" + "TRAVIS_JOB_NAME" + "TRAVIS_JOB_NUMBER" + "TRAVIS_JOB_WEB_URL" + "TRAVIS_PULL_REQUEST" + "TRAVIS_PULL_REQUEST_BRANCH" + "TRAVIS_PULL_REQUEST_SHA" + "TRAVIS_PULL_REQUEST_SLUG" + "TRAVIS_REPO_SLUG" + "TRAVIS_SECURE_ENV_VARS" + "TRAVIS_TAG" + ) +elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="github-workflow" + pass_down_envvars+=( + "GITHUB_WORKFLOW" + "GITHUB_RUN_ID" + "GITHUB_RUN_NUMBER" + "GITHUB_ACTION" + "GITHUB_ACTIONS" + "GITHUB_ACTOR" + "GITHUB_REPOSITORY" + "GITHUB_EVENT_NAME" + "GITHUB_EVENT_PATH" + "GITHUB_SHA" + "GITHUB_REF" + "GITHUB_HEAD_REF" + "GITHUB_BASE_REF" + ) +elif [[ "${CIRCLECI:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="circleci" + pass_down_envvars+=( + "CIRCLE_BRANCH" + "CIRCLE_BUILD_NUM" + "CIRCLE_BUILD_URL" + "CIRCLE_COMPARE_URL" + "CIRCLE_JOB" + "CIRCLE_NODE_INDEX" + "CIRCLE_NODE_TOTAL" + "CIRCLE_PREVIOUS_BUILD_NUM" + "CIRCLE_PROJECT_REPONAME" + "CIRCLE_PROJECT_USERNAME" + "CIRCLE_REPOSITORY_URL" + "CIRCLE_SHA1" + "CIRCLE_STAGE" + "CIRCLE_USERNAME" + "CIRCLE_WORKFLOW_ID" + "CIRCLE_WORKFLOW_JOB_ID" + "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" + "CIRCLE_WORKFLOW_WORKSPACE_ID" + ) +fi + +# Configure the service account for pulling the docker image. +function repo_root() { + local dir="$1" + while [[ ! -d "${dir}/.git" ]]; do + dir="$(dirname "$dir")" + done + echo "${dir}" +} + +# Detect the project root. In CI builds, we assume the script is in +# the git tree and traverse from there, otherwise, traverse from `pwd` +# to find `.git` directory. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + PROGRAM_PATH="$(realpath "$0")" + PROGRAM_DIR="$(dirname "${PROGRAM_PATH}")" + PROJECT_ROOT="$(repo_root "${PROGRAM_DIR}")" +else + PROJECT_ROOT="$(repo_root $(pwd))" +fi + +log_yellow "Changing to the project root: ${PROJECT_ROOT}." +cd "${PROJECT_ROOT}" + +# To support relative path for `TRAMPOLINE_SERVICE_ACCOUNT`, we need +# to use this environment variable in `PROJECT_ROOT`. +if [[ -n "${TRAMPOLINE_SERVICE_ACCOUNT:-}" ]]; then + + mkdir -p "${tmpdir}/gcloud" + gcloud_config_dir="${tmpdir}/gcloud" + + log_yellow "Using isolated gcloud config: ${gcloud_config_dir}." + export CLOUDSDK_CONFIG="${gcloud_config_dir}" + + log_yellow "Using ${TRAMPOLINE_SERVICE_ACCOUNT} for authentication." + gcloud auth activate-service-account \ + --key-file "${TRAMPOLINE_SERVICE_ACCOUNT}" + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet +fi + +required_envvars=( + # The basic trampoline configurations. + "TRAMPOLINE_IMAGE" + "TRAMPOLINE_BUILD_FILE" +) + +if [[ -f "${PROJECT_ROOT}/.trampolinerc" ]]; then + source "${PROJECT_ROOT}/.trampolinerc" +fi + +log_yellow "Checking environment variables." +for e in "${required_envvars[@]}" +do + if [[ -z "${!e:-}" ]]; then + log "Missing ${e} env var. Aborting." + exit 1 + fi +done + +# We want to support legacy style TRAMPOLINE_BUILD_FILE used with V1 +# script: e.g. "github/repo-name/.kokoro/run_tests.sh" +TRAMPOLINE_BUILD_FILE="${TRAMPOLINE_BUILD_FILE#github/*/}" +log_yellow "Using TRAMPOLINE_BUILD_FILE: ${TRAMPOLINE_BUILD_FILE}" + +# ignore error on docker operations and test execution +set +e + +log_yellow "Preparing Docker image." +# We only download the docker image in CI builds. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + # Download the docker image specified by `TRAMPOLINE_IMAGE` + + # We may want to add --max-concurrent-downloads flag. + + log_yellow "Start pulling the Docker image: ${TRAMPOLINE_IMAGE}." + if docker pull "${TRAMPOLINE_IMAGE}"; then + log_green "Finished pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="true" + else + log_red "Failed pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="false" + fi +else + # For local run, check if we have the image. + if docker images "${TRAMPOLINE_IMAGE}:latest" | grep "${TRAMPOLINE_IMAGE}"; then + has_image="true" + else + has_image="false" + fi +fi + + +# The default user for a Docker container has uid 0 (root). To avoid +# creating root-owned files in the build directory we tell docker to +# use the current user ID. +user_uid="$(id -u)" +user_gid="$(id -g)" +user_name="$(id -un)" + +# To allow docker in docker, we add the user to the docker group in +# the host os. +docker_gid=$(cut -d: -f3 < <(getent group docker)) + +update_cache="false" +if [[ "${TRAMPOLINE_DOCKERFILE:-none}" != "none" ]]; then + # Build the Docker image from the source. + context_dir=$(dirname "${TRAMPOLINE_DOCKERFILE}") + docker_build_flags=( + "-f" "${TRAMPOLINE_DOCKERFILE}" + "-t" "${TRAMPOLINE_IMAGE}" + "--build-arg" "UID=${user_uid}" + "--build-arg" "USERNAME=${user_name}" + ) + if [[ "${has_image}" == "true" ]]; then + docker_build_flags+=("--cache-from" "${TRAMPOLINE_IMAGE}") + fi + + log_yellow "Start building the docker image." + if [[ "${TRAMPOLINE_VERBOSE:-false}" == "true" ]]; then + echo "docker build" "${docker_build_flags[@]}" "${context_dir}" + fi + + # ON CI systems, we want to suppress docker build logs, only + # output the logs when it fails. + if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + if docker build "${docker_build_flags[@]}" "${context_dir}" \ + > "${tmpdir}/docker_build.log" 2>&1; then + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + cat "${tmpdir}/docker_build.log" + fi + + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + log_yellow "Dumping the build logs:" + cat "${tmpdir}/docker_build.log" + exit 1 + fi + else + if docker build "${docker_build_flags[@]}" "${context_dir}"; then + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + exit 1 + fi + fi +else + if [[ "${has_image}" != "true" ]]; then + log_red "We do not have ${TRAMPOLINE_IMAGE} locally, aborting." + exit 1 + fi +fi + +# We use an array for the flags so they are easier to document. +docker_flags=( + # Remove the container after it exists. + "--rm" + + # Use the host network. + "--network=host" + + # Run in priviledged mode. We are not using docker for sandboxing or + # isolation, just for packaging our dev tools. + "--privileged" + + # Run the docker script with the user id. Because the docker image gets to + # write in ${PWD} you typically want this to be your user id. + # To allow docker in docker, we need to use docker gid on the host. + "--user" "${user_uid}:${docker_gid}" + + # Pass down the USER. + "--env" "USER=${user_name}" + + # Mount the project directory inside the Docker container. + "--volume" "${PROJECT_ROOT}:${TRAMPOLINE_WORKSPACE}" + "--workdir" "${TRAMPOLINE_WORKSPACE}" + "--env" "PROJECT_ROOT=${TRAMPOLINE_WORKSPACE}" + + # Mount the temporary home directory. + "--volume" "${tmphome}:/h" + "--env" "HOME=/h" + + # Allow docker in docker. + "--volume" "/var/run/docker.sock:/var/run/docker.sock" + + # Mount the /tmp so that docker in docker can mount the files + # there correctly. + "--volume" "/tmp:/tmp" + # Pass down the KOKORO_GFILE_DIR and KOKORO_KEYSTORE_DIR + # TODO(tmatsuo): This part is not portable. + "--env" "TRAMPOLINE_SECRET_DIR=/secrets" + "--volume" "${KOKORO_GFILE_DIR:-/dev/shm}:/secrets/gfile" + "--env" "KOKORO_GFILE_DIR=/secrets/gfile" + "--volume" "${KOKORO_KEYSTORE_DIR:-/dev/shm}:/secrets/keystore" + "--env" "KOKORO_KEYSTORE_DIR=/secrets/keystore" +) + +# Add an option for nicer output if the build gets a tty. +if [[ -t 0 ]]; then + docker_flags+=("-it") +fi + +# Passing down env vars +for e in "${pass_down_envvars[@]}" +do + if [[ -n "${!e:-}" ]]; then + docker_flags+=("--env" "${e}=${!e}") + fi +done + +# If arguments are given, all arguments will become the commands run +# in the container, otherwise run TRAMPOLINE_BUILD_FILE. +if [[ $# -ge 1 ]]; then + log_yellow "Running the given commands '" "${@:1}" "' in the container." + readonly commands=("${@:1}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" +else + log_yellow "Running the tests in a Docker container." + docker_flags+=("--entrypoint=${TRAMPOLINE_BUILD_FILE}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" +fi + + +test_retval=$? + +if [[ ${test_retval} -eq 0 ]]; then + log_green "Build finished with ${test_retval}" +else + log_red "Build finished with ${test_retval}" +fi + +# Only upload it when the test passes. +if [[ "${update_cache}" == "true" ]] && \ + [[ $test_retval == 0 ]] && \ + [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]]; then + log_yellow "Uploading the Docker image." + if docker push "${TRAMPOLINE_IMAGE}"; then + log_green "Finished uploading the Docker image." + else + log_red "Failed uploading the Docker image." + fi + # Call trampoline_after_upload_hook if it's defined. + if function_exists trampoline_after_upload_hook; then + trampoline_after_upload_hook + fi + +fi + +exit "${test_retval}" diff --git a/.trampolinerc b/.trampolinerc new file mode 100644 index 000000000..995ee2911 --- /dev/null +++ b/.trampolinerc @@ -0,0 +1,51 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Template for .trampolinerc + +# Add required env vars here. +required_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Add env vars which are passed down into the container here. +pass_down_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Prevent unintentional override on the default image. +if [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]] && \ + [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + echo "Please set TRAMPOLINE_IMAGE if you want to upload the Docker image." + exit 1 +fi + +# Define the default value if it makes sense. +if [[ -z "${TRAMPOLINE_IMAGE_UPLOAD:-}" ]]; then + TRAMPOLINE_IMAGE_UPLOAD="" +fi + +if [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + TRAMPOLINE_IMAGE="" +fi + +if [[ -z "${TRAMPOLINE_DOCKERFILE:-}" ]]; then + TRAMPOLINE_DOCKERFILE="" +fi + +if [[ -z "${TRAMPOLINE_BUILD_FILE:-}" ]]; then + TRAMPOLINE_BUILD_FILE="" +fi diff --git a/CHANGELOG.md b/CHANGELOG.md index f7286e9bf..5ef22e8d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,35 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [1.27.2](https://www.github.com/googleapis/python-bigquery/compare/v1.27.1...v1.27.2) (2020-08-18) + + +### Bug Fixes + +* rationalize platform constraints for 'pyarrow' extra ([#235](https://www.github.com/googleapis/python-bigquery/issues/235)) ([c9a0567](https://www.github.com/googleapis/python-bigquery/commit/c9a0567f59491b769a9e2fd535430423e39d4fa8)) + +### [1.27.1](https://www.github.com/googleapis/python-bigquery/compare/v1.27.0...v1.27.1) (2020-08-18) + + +### Bug Fixes + +* tweak pyarrow extra to soothe PyPI ([#230](https://www.github.com/googleapis/python-bigquery/issues/230)) ([c15efbd](https://www.github.com/googleapis/python-bigquery/commit/c15efbd1ee4488898fc862768eef701443f492f6)) + +## [1.27.0](https://www.github.com/googleapis/python-bigquery/compare/v1.26.1...v1.27.0) (2020-08-15) + + +### Features + +* add support and tests for struct fields ([#146](https://www.github.com/googleapis/python-bigquery/issues/146)) ([fee2ba8](https://www.github.com/googleapis/python-bigquery/commit/fee2ba80e338d093ee61565359268da91a5c9913)) +* add support for getting and setting table IAM policy ([#144](https://www.github.com/googleapis/python-bigquery/issues/144)) ([f59fc9a](https://www.github.com/googleapis/python-bigquery/commit/f59fc9a482d9f9ae63e2b2bfc80b9a3481d09bde)) +* **bigquery:** add client_options to base class ([#216](https://www.github.com/googleapis/python-bigquery/issues/216)) ([478597a](https://www.github.com/googleapis/python-bigquery/commit/478597a38167fa57b60ae7f65b581f3fe75ddc7c)) + + +### Bug Fixes + +* converting to dataframe with out of bounds timestamps ([#209](https://www.github.com/googleapis/python-bigquery/issues/209)) ([8209203](https://www.github.com/googleapis/python-bigquery/commit/8209203e967f0624ad306166c0af6f6f1027c550)), closes [#168](https://www.github.com/googleapis/python-bigquery/issues/168) +* raise error if inserting rows with unknown fields ([#163](https://www.github.com/googleapis/python-bigquery/issues/163)) ([8fe7254](https://www.github.com/googleapis/python-bigquery/commit/8fe725429541eed34ddc01cffc8b1ee846c14162)) + ### [1.26.1](https://www.github.com/googleapis/python-bigquery/compare/v1.26.0...v1.26.1) (2020-07-25) ### Documentation @@ -482,7 +511,7 @@ ### New Features -- Add options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) +- Add `exists_ok` and `not_found_ok` options to ignore errors when creating/deleting datasets/tables. ([#7491](https://github.com/googleapis/google-cloud-python/pull/7491)) - Accept a string in Table and Dataset constructors. ([#7483](https://github.com/googleapis/google-cloud-python/pull/7483)) ### Documentation diff --git a/docs/conf.py b/docs/conf.py index 251e1f4ca..155606c97 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) +# For plugins that can not read conf.py. +# See also: https://github.com/docascode/sphinx-docfx-yaml/issues/85 +sys.path.insert(0, os.path.abspath(".")) + __version__ = "" # -- General configuration ------------------------------------------------ diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d814eec8c..47851d42c 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import datetime import decimal import re +import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -419,9 +420,23 @@ def _record_field_to_json(fields, row_value): Returns: Mapping[str, Any]: A JSON-serializable dictionary. """ - record = {} isdict = isinstance(row_value, dict) + # If row is passed as a tuple, make the length sanity check to avoid either + # uninformative index errors a few lines below or silently omitting some of + # the values from the result (we cannot know exactly which fields are missing + # or redundant, since we don't have their names). + if not isdict and len(row_value) != len(fields): + msg = "The number of row fields ({}) does not match schema length ({}).".format( + len(row_value), len(fields) + ) + raise ValueError(msg) + + record = {} + + if isdict: + processed_fields = set() + for subindex, subfield in enumerate(fields): subname = subfield.name subvalue = row_value.get(subname) if isdict else row_value[subindex] @@ -430,6 +445,20 @@ def _record_field_to_json(fields, row_value): if subvalue is not None: record[subname] = _field_to_json(subfield, subvalue) + if isdict: + processed_fields.add(subname) + + # Unknown fields should not be silently dropped, include them. Since there + # is no schema information available for them, include them as strings + # to make them JSON-serializable. + if isdict: + not_processed = set(row_value.keys()) - processed_fields + + for field_name in not_processed: + value = row_value[field_name] + if value is not None: + record[field_name] = six.text_type(value) + return record diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index ff6525399..953b7d0fe 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -287,13 +287,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema): """ if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) - for field in bq_schema: - if field.field_type in schema._STRUCT_TYPES: - raise ValueError( - "Uploading dataframes with struct (record) column types " - "is not supported. See: " - "https://github.com/googleapis/google-cloud-python/issues/8191" - ) + if six.PY2: + for field in bq_schema: + if field.field_type in schema._STRUCT_TYPES: + raise ValueError( + "Uploading dataframes with struct (record) column types " + "is not supported under Python2. See: " + "https://github.com/googleapis/python-bigquery/issues/21" + ) bq_schema_index = {field.name: field for field in bq_schema} bq_schema_unused = set(bq_schema_index.keys()) else: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index a3d1b8846..52ddffe7d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -46,6 +46,7 @@ import google.api_core.client_options import google.api_core.exceptions +from google.api_core.iam import Policy from google.api_core import page_iterator import google.cloud._helpers from google.cloud import exceptions @@ -176,7 +177,10 @@ def __init__( client_options=None, ): super(Client, self).__init__( - project=project, credentials=credentials, _http=_http + project=project, + credentials=credentials, + client_options=client_options, + _http=_http, ) kw_args = {"client_info": client_info} @@ -605,6 +609,63 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) + def get_iam_policy( + self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + if requested_policy_version != 1: + raise ValueError("only IAM policy version 1 is supported") + + body = {"options": {"requestedPolicyVersion": 1}} + + path = "{}:getIamPolicy".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return Policy.from_api_repr(response) + + def set_iam_policy( + self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + if not isinstance(policy, (Policy)): + raise TypeError("policy must be a Policy") + + body = {"policy": policy.to_api_repr()} + + if updateMask is not None: + body["updateMask"] = updateMask + + path = "{}:setIamPolicy".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return Policy.from_api_repr(response) + + def test_iam_permissions( + self, table, permissions, retry=DEFAULT_RETRY, timeout=None, + ): + if not isinstance(table, (Table, TableReference)): + raise TypeError("table must be a Table or TableReference") + + body = {"permissions": permissions} + + path = "{}:testIamPermissions".format(table.path) + + response = self._call_api( + retry, method="POST", path=path, data=body, timeout=timeout, + ) + + return response + def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): """[Beta] Fetch the model referenced by ``model_ref``. diff --git a/google/cloud/bigquery/iam.py b/google/cloud/bigquery/iam.py new file mode 100644 index 000000000..df9db36b7 --- /dev/null +++ b/google/cloud/bigquery/iam.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BigQuery API IAM policy definitions + +For all allowed roles and permissions, see: + +https://cloud.google.com/bigquery/docs/access-control +""" + +# BigQuery-specific IAM roles available for tables and views + +BIGQUERY_DATA_EDITOR_ROLE = "roles/bigquery.dataEditor" +"""When applied to a table or view, this role provides permissions to +read and update data and metadata for the table or view.""" + +BIGQUERY_DATA_OWNER_ROLE = "roles/bigquery.dataOwner" +"""When applied to a table or view, this role provides permissions to +read and update data and metadata for the table or view, share the +table/view, and delete the table/view.""" + +BIGQUERY_DATA_VIEWER_ROLE = "roles/bigquery.dataViewer" +"""When applied to a table or view, this role provides permissions to +read data and metadata from the table or view.""" + +BIGQUERY_METADATA_VIEWER_ROLE = "roles/bigquery.metadataViewer" +"""When applied to a table or view, this role provides persmissions to +read metadata from the table or view.""" diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 0f4c80686..f2ed6337e 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -86,8 +86,8 @@ class ScalarQueryParameter(_AbstractQueryParameter): 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, datetime.date]): The scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + The scalar parameter value. """ def __init__(self, name, type_, value): @@ -105,9 +105,8 @@ def positional(cls, type_, value): 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, - datetime.datetime, - datetime.date]): The scalar parameter value. + value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + The scalar parameter value. Returns: google.cloud.bigquery.query.ScalarQueryParameter: Instance without name diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 10b4198d3..d9e5f7773 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,6 +21,7 @@ import functools import logging import operator +import pytz import warnings import six @@ -1726,7 +1727,35 @@ def to_dataframe( bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, ) - df = record_batch.to_pandas(date_as_object=date_as_object) + + # When converting timestamp values to nanosecond precision, the result + # can be out of pyarrow bounds. To avoid the error when converting to + # Pandas, we set the timestamp_as_object parameter to True, if necessary. + # + # NOTE: Python 3+ only, as timestamp_as_object parameter is only supported + # in pyarrow>=1.0, but the latter is not compatible with Python 2. + if six.PY2: + extra_kwargs = {} + else: + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } + + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break + else: + timestamp_as_object = False + + extra_kwargs = {"timestamp_as_object": timestamp_as_object} + + df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) + for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) return df diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index 5ae21ea6f..5147743b6 100644 --- a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/encryption_config.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/model_pb2.py b/google/cloud/bigquery_v2/proto/model_pb2.py index 7b66be8f7..f485c4568 100644 --- a/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 2411c4863..07d7e4c4b 100644 --- a/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index bfe77f934..15f6715a2 100644 --- a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto -"""Generated protocol buffer code.""" + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/noxfile.py b/noxfile.py index bb6a10e1e..4664278f1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -73,6 +73,10 @@ def unit(session): def system(session): """Run the system test suite.""" + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": + session.skip("RUN_SYSTEM_TESTS is set to false, skipping") + # Sanity check: Only run system tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") @@ -190,3 +194,36 @@ def docs(session): os.path.join("docs", ""), os.path.join("docs", "_build", "html", ""), ) + + +@nox.session(python="3.8") +def docfx(session): + """Build the docfx yaml files for this library.""" + + session.install("-e", ".") + session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + "-T", # show full traceback on exception + "-N", # no colors + "-D", + ( + "extensions=sphinx.ext.autodoc," + "sphinx.ext.autosummary," + "docfx_yaml.extension," + "sphinx.ext.intersphinx," + "sphinx.ext.coverage," + "sphinx.ext.napoleon," + "sphinx.ext.todo," + "sphinx.ext.viewcode," + "recommonmark" + ), + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), + ) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 5de21f7e9..7fe839119 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,6 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.25.0 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 google-auth-oauthlib==0.4.1 -ipython==7.16.1 -matplotlib==3.3.0 +ipython==7.16.1; python_version < '3.7' +ipython==7.17.0; python_version >= '3.7' +matplotlib==3.3.1 pytz==2020.1 diff --git a/setup.py b/setup.py index 61e836a73..18bb78926 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.26.1" +version = "1.27.2" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' @@ -31,7 +31,7 @@ dependencies = [ 'enum34; python_version < "3.4"', "google-api-core >= 1.21.0, < 2.0dev", - "google-cloud-core >= 1.1.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.5.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] @@ -47,10 +47,10 @@ ], "pandas": ["pandas>=0.17.1"], # Exclude PyArrow dependency from Windows Python 2.7. - 'pyarrow: platform_system != "Windows" or python_version >= "3.4"': [ - # Bad Linux release for 0.14.0. - # https://issues.apache.org/jira/browse/ARROW-5868 - "pyarrow>=0.4.1, != 0.14.0" + "pyarrow": [ + "pyarrow >= 1.0.0, < 2.0dev; python_version >= '3.5'", + # Pyarrow >= 0.17.0 is not compatible with Python 2 anymore. + "pyarrow < 0.17.0; python_version < '3.0' and platform_system != 'Windows'", ], "tqdm": ["tqdm >= 4.0.0, <5.0.0dev"], "fastparquet": [ @@ -59,7 +59,8 @@ # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below # (building the wheel fails), thus needs to be restricted. # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite <= 0.31.0", + "llvmlite<=0.34.0;python_version>='3.6'", + "llvmlite<=0.31.0;python_version<'3.6'", ], } diff --git a/synth.metadata b/synth.metadata index b7e46157b..46c63367d 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,22 +3,16 @@ { "git": { "name": ".", - "remote": "git@github.com:googleapis/python-bigquery.git", - "sha": "416c0daf40e481c80fb5327b48baa915f0e7aa2f" + "remote": "git@github.com:tmatsuo/python-bigquery.git", + "sha": "5ed817523a85a6f332951e10c0bf7dbb86d7e1cf" } }, { "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "b8ff6a41e195cda91bbfb20d9f11d5e58b7cc218" + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "868615a5c1c1059c636bb3d82a555edb1d5a251e", + "internalRef": "324294521" } } ], diff --git a/synth.py b/synth.py index 5125c398e..ac20c9aec 100644 --- a/synth.py +++ b/synth.py @@ -59,7 +59,7 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100, samples=True) +templated_files = common.py_library(cov_level=100, samples=True, split_system_tests=True) # BigQuery has a custom multiprocessing note s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) diff --git a/tests/system.py b/tests/system.py index cd5454a87..be79a6d20 100644 --- a/tests/system.py +++ b/tests/system.py @@ -71,6 +71,7 @@ from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests +from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset @@ -130,6 +131,8 @@ PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") +PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version def _has_rows(result): @@ -1074,6 +1077,48 @@ def test_load_table_from_dataframe_w_explicit_schema(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 3) + @unittest.skipIf( + pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION, + "Only `pyarrow version >=0.17.0` is supported", + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_struct_datatype(self): + """Test that a DataFrame with struct datatype can be uploaded if a + BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/21 + """ + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( + Config.CLIENT.project, dataset_id + ) + table_schema = [ + bigquery.SchemaField( + "bar", + "RECORD", + fields=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + mode="REQUIRED", + ), + ] + table = retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] + dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) + + load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual(table.schema, table_schema) + self.assertEqual(table.num_rows, 3) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), @@ -1407,6 +1452,54 @@ def test_copy_table(self): got_rows = self._fetch_single_page(dest_table) self.assertTrue(len(got_rows) > 0) + def test_get_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_ref = Table(dataset.table(table_id)) + self.assertFalse(_table_exists(table_ref)) + + table = retry_403(Config.CLIENT.create_table)(table_ref) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + member = "serviceAccount:{}".format(Config.CLIENT.get_service_account_email()) + BINDING = { + "role": BIGQUERY_DATA_VIEWER_ROLE, + "members": {member}, + } + + policy = Config.CLIENT.get_iam_policy(table) + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.bindings, []) + + policy.bindings.append(BINDING) + returned_policy = Config.CLIENT.set_iam_policy(table, policy) + self.assertEqual(returned_policy.bindings, policy.bindings) + + def test_test_iam_permissions(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_ref = Table(dataset.table(table_id)) + self.assertFalse(_table_exists(table_ref)) + + table = retry_403(Config.CLIENT.create_table)(table_ref) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + + # Test some default permissions. + permissions = [ + "bigquery.tables.get", + "bigquery.tables.getData", + "bigquery.tables.update", + ] + + response = Config.CLIENT.test_iam_permissions(table, [permissions]) + self.assertEqual(set(response["permissions"]), set(permissions)) + def test_job_cancel(self): DATASET_ID = _make_dataset_id("job_cancel") JOB_ID_PREFIX = "fetch_" + DATASET_ID diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index fa6d27c98..28ebe8144 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -18,6 +18,7 @@ import unittest import mock +import six class Test_not_null(unittest.TestCase): @@ -847,6 +848,26 @@ def test_w_non_empty_list(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) + def test_w_list_missing_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + + def test_w_list_too_many_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = [42, "two", "three"] + + with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + self._call_fut(fields, original) + def test_w_non_empty_dict(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), @@ -890,6 +911,25 @@ def test_w_explicit_none_value(self): # None values should be dropped regardless of the field type self.assertEqual(converted, {"one": "42"}) + def test_w_dict_unknown_fields(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = { + "whoami": datetime.date(2020, 7, 20), + "one": 111, + "two": "222", + "void": None, + } + + converted = self._call_fut(fields, original) + + # Unknown fields should be included (if not None), but converted as strings. + self.assertEqual( + converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, + ) + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 4f4b5f447..e229e04a2 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -20,6 +20,7 @@ import warnings import mock +import six try: import pandas @@ -299,7 +300,10 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): ) ) assert pyarrow.types.is_struct(actual) - assert actual.num_children == len(fields) + try: + assert actual.num_fields == len(fields) + except AttributeError: # py27 + assert actual.num_children == len(fields) assert actual.equals(expected) @@ -344,7 +348,10 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): ) assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) - assert actual.value_type.num_children == len(fields) + try: + assert actual.value_type.num_fields == len(fields) + except AttributeError: # py27 + assert actual.value_type.num_children == len(fields) assert actual.value_type.equals(expected_value_type) @@ -542,9 +549,17 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): # instead. schema.SchemaField("field3", "UNKNOWN_TYPE"), ) - actual = module_under_test.bq_to_arrow_schema(fields) + with warnings.catch_warnings(record=True) as warned: + actual = module_under_test.bq_to_arrow_schema(fields) assert actual is None + if six.PY3: + assert len(warned) == 1 + warning = warned[0] + assert "field3" in str(warning) + else: + assert len(warned) == 0 + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_get_column_or_index_not_found(module_under_test): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2c4c1342c..8b63f7e57 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -221,7 +221,8 @@ def test__call_api_applying_custom_retry_on_timeout(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY - client = self._make_one() + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( client._connection, "api_request", side_effect=[TimeoutError, "result"], @@ -674,7 +675,8 @@ def test_create_bqstorage_client(self): mock_client.assert_called_once_with(credentials=creds) def test_create_bqstorage_client_missing_dependency(self): - client = self._make_one() + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import @@ -1748,6 +1750,216 @@ def test_get_table_sets_user_agent(self): ) self.assertIn("my-application/1.2.3", expected_user_agent) + def test_get_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + BODY = {"options": {"requestedPolicyVersion": 1}} + ETAG = "CARDI" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + RETURNED = { + "resourceId": PATH, + "etag": ETAG, + "version": VERSION, + "bindings": [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ], + } + EXPECTED = { + binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] + } + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.etag, RETURNED["etag"]) + self.assertEqual(policy.version, RETURNED["version"]) + self.assertEqual(dict(policy), EXPECTED) + + def test_get_iam_policy_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.get_iam_policy(table_resource_string) + + def test_get_iam_policy_w_invalid_version(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(ValueError): + client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) + + def test_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + MASK = "bindings,etag" + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_no_mask(self): + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + RETURNED = {"etag": "foo", "version": 1, "bindings": []} + + policy = Policy() + BODY = {"policy": policy.to_api_repr()} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + def test_set_iam_policy_invalid_policy(self): + from google.api_core.iam import Policy + + policy = Policy() + invalid_policy_repr = policy.to_api_repr() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError): + client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) + + def test_set_iam_policy_w_invalid_table(self): + from google.api_core.iam import Policy + + policy = Policy() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.set_iam_policy(table_resource_string, policy) + + def test_test_iam_permissions(self): + PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + BODY = {"permissions": PERMISSIONS} + RETURNED = {"permissions": PERMISSIONS} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + + def test_test_iam_permissions_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + + with self.assertRaises(TypeError): + client.test_iam_permissions(table_resource_string, PERMISSIONS) + def test_update_dataset_w_invalid_field(self): from google.cloud.bigquery.dataset import Dataset @@ -7161,19 +7373,22 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_struct_fields_error(self): + def test_load_table_from_dataframe_struct_fields(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}] - dataframe = pandas.DataFrame(data=records) + records = [(3.14, {"foo": 1, "bar": 1})] + dataframe = pandas.DataFrame( + data=records, columns=["float_column", "struct_column"] + ) schema = [ SchemaField("float_column", "FLOAT"), SchemaField( - "agg_col", + "struct_column", "RECORD", fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")], ), @@ -7184,14 +7399,49 @@ def test_load_table_from_dataframe_struct_fields_error(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - with pytest.raises(ValueError) as exc_info, load_patch: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + if six.PY2: + with pytest.raises(ValueError) as exc_info, load_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) + + err_msg = str(exc_info.value) + assert "struct" in err_msg + assert "not support" in err_msg + + else: + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + job_config=job_config, + location=self.LOCATION, + ) - err_msg = str(exc_info.value) - assert "struct" in err_msg - assert "not support" in err_msg + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -7470,17 +7720,24 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): ) with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) call_args = to_parquet_spy.call_args assert call_args is not None assert call_args.kwargs.get("compression") == "gzip" + assert len(warned) == 2 + warning = warned[0] + assert "Loading dataframe data without pyarrow" in str(warning) + warning = warned[1] + assert "Please install the pyarrow package" in str(warning) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index 733445337..9cd3631e1 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -17,6 +17,7 @@ import json import textwrap import unittest +import warnings import freezegun import mock @@ -1834,26 +1835,34 @@ def test_time_partitioning_hit(self): "expirationMs": str(year_ms), "requirePartitionFilter": False, } - expected = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) + with warnings.catch_warnings(record=True) as warned: + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) self.assertEqual(config.time_partitioning, expected) + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + def test_time_partitioning_setter(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType field = "creation_date" year_ms = 86400 * 1000 * 365 - time_partitioning = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) + + with warnings.catch_warnings(record=True) as warned: + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + config = self._get_target_class()() config.time_partitioning = time_partitioning expected = { @@ -1864,6 +1873,10 @@ def test_time_partitioning_setter(self): } self.assertEqual(config._properties["load"]["timePartitioning"], expected) + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + def test_time_partitioning_setter_w_none(self): from google.cloud.bigquery.table import TimePartitioningType @@ -5595,7 +5608,10 @@ def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch("google.cloud.bigquery.table.pyarrow", None): - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + with warnings.catch_warnings(record=True) as warned: + df = job.to_dataframe( + date_as_object=False, create_bqstorage_client=False + ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 1) # verify the number of rows @@ -5604,6 +5620,10 @@ def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): self.assertEqual(df.date.dtype.name, "object") + assert len(warned) == 1 + warning = warned[0] + assert "without pyarrow" in str(warning) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index a42592e3c..7b07626ad 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -399,7 +399,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -560,7 +560,7 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -624,7 +624,7 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): # Set up the context with monkeypatch so that it's reset for subsequent # tests. - monkeypatch.setattr(magics.context, "credentials", mock_credentials) + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) @@ -772,9 +772,16 @@ def test_bigquery_magic_w_missing_query(): ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + cell_body = " \n \n \t\t \n " - with io.capture_output() as captured_io: + with io.capture_output() as captured_io, default_patch: ip.run_cell_magic("bigquery", "df", cell_body) output = captured_io.stderr diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 2c9d0f64e..80223e8e1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime as dt import itertools import logging import time @@ -1778,7 +1779,8 @@ def test_to_arrow_w_unknown_type(self): api_request = mock.Mock(return_value={"rows": rows}) row_iterator = self._make_one(_mock_client(), api_request, path, schema) - tbl = row_iterator.to_arrow(create_bqstorage_client=False) + with warnings.catch_warnings(record=True) as warned: + tbl = row_iterator.to_arrow(create_bqstorage_client=False) self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -1799,6 +1801,10 @@ def test_to_arrow_w_unknown_type(self): self.assertEqual(ages, [33, 29]) self.assertEqual(sports, ["volleyball", "basketball"]) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("sport" in str(warning)) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2266,6 +2272,68 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") + @pytest.mark.xfail( + six.PY2, + reason=( + "Requires pyarrow>-1.0 to work, but the latter is not compatible " + "with Python 2 anymore." + ), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField("some_timestamp", "TIMESTAMP")] + rows = [ + {"f": [{"v": "81953424000.0"}]}, # 4567-01-01 00:00:00 UTC + {"f": [{"v": "253402214400.0"}]}, # 9999-12-31 00:00:00 UTC + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) # verify the number of rows + self.assertEqual(list(df.columns), ["some_timestamp"]) + self.assertEqual( + list(df["some_timestamp"]), + [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + ) + + @pytest.mark.xfail( + six.PY2, + reason=( + "Requires pyarrow>-1.0 to work, but the latter is not compatible " + "with Python 2 anymore." + ), + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField("some_datetime", "DATETIME")] + rows = [ + {"f": [{"v": "4567-01-01T00:00:00"}]}, + {"f": [{"v": "9999-12-31T00:00:00"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 2) # verify the number of rows + self.assertEqual(list(df.columns), ["some_datetime"]) + self.assertEqual( + list(df["some_datetime"]), + [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + ) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_warning_wo_pyarrow(self): from google.cloud.bigquery.client import PyarrowMissingWarning @@ -2370,13 +2438,18 @@ def test_to_dataframe_progress_bar_wo_pyarrow( for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) with mock.patch("google.cloud.bigquery.table.pyarrow", None): - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) progress_bar_mock.assert_called() progress_bar_mock().update.assert_called() progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): @@ -2499,12 +2572,17 @@ def test_to_dataframe_w_empty_results_wo_pyarrow(self): api_request = mock.Mock(return_value={"rows": []}) row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - df = row_iterator.to_dataframe() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_no_results_wo_pyarrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2522,12 +2600,17 @@ def empty_iterable(dtypes=None): row_iterator.to_dataframe_iterable = empty_iterable - df = row_iterator.to_dataframe() + with warnings.catch_warnings(record=True) as warned: + df = row_iterator.to_dataframe() self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -2787,11 +2870,19 @@ def test_to_dataframe_w_bqstorage_v1beta1_no_streams(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) - got = row_iterator.to_dataframe(bqstorage_client) + with warnings.catch_warnings(record=True) as warned: + got = row_iterator.to_dataframe(bqstorage_client) + column_names = ["colA", "colC", "colB"] self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue( + "Support for BigQuery Storage v1beta1 clients is deprecated" in str(warning) + ) + @unittest.skipIf( bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" ) @@ -3493,7 +3584,10 @@ def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) - with mock.patch("google.cloud.bigquery.table.pyarrow", None): + mock_pyarrow = mock.patch("google.cloud.bigquery.table.pyarrow", None) + catch_warnings = warnings.catch_warnings(record=True) + + with mock_pyarrow, catch_warnings as warned: got = row_iterator.to_dataframe( dtypes={ "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( @@ -3522,6 +3616,10 @@ def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): ["low", "medium", "low", "medium", "high", "low"], ) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("without pyarrow" in str(warning)) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self):