# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import uuid

# [START bqml_ncaa_tutorial_import_and_client]
from google.cloud import bigquery

# [END bqml_ncaa_tutorial_import_and_client]
import pytest

# [START bqml_ncaa_tutorial_import_and_client]
client = bigquery.Client()
# We use a unique dataset ID for this example to avoid collisions with
# other invocations of this tutorial.  In practice, you could leverage
# a persistent dataset and not create/destroy it with each invocation.
dataset_id = f"bqml_tutorial_{str(uuid.uuid4().hex)}"
full_dataset_id = f"{client.project}.{dataset_id}"
# [END bqml_ncaa_tutorial_import_and_client]


@pytest.fixture
def delete_dataset():
    yield
    client.delete_dataset(full_dataset_id, delete_contents=True)


def test_ncaa_tutorial(delete_dataset):
    # [START bqml_ncaa_tutorial_create_dataset]
    dataset = bigquery.Dataset(full_dataset_id)
    dataset.location = "US"
    client.create_dataset(dataset)
    # [END bqml_ncaa_tutorial_create_dataset]

    # Create the tables used by the tutorial
    # Note: the queries are saved to a file. This should be updated to use the
    # saved queries once the library supports running saved queries.
    query_files = ["feature_input_query.sql", "training_data_query.sql"]
    resources_directory = os.path.join(os.path.dirname(__file__), "resources")
    for fname in query_files:
        query_filepath = os.path.join(resources_directory, fname)
        sql = open(query_filepath, encoding="utf-8").read().format(dataset_id)
        client.query(sql).result()

    # [START bqml_ncaa_tutorial_create_model]
    sql = """
        CREATE OR REPLACE MODEL `{0}.ncaa_model`
        OPTIONS (
            model_type='linear_reg',
            max_iteration=50 ) AS
        SELECT
            * EXCEPT (
                game_id, season, scheduled_date,
                total_three_points_made,
                total_three_points_att),
            total_three_points_att as label
        FROM
            `{0}.wide_games`
        WHERE
            # remove the game to predict
            game_id != 'f1063e80-23c7-486b-9a5e-faa52beb2d83'
    """.format(
        dataset_id
    )
    df = client.query(sql).to_dataframe()
    print(df)
    # [END bqml_ncaa_tutorial_create_model]

    # [START bqml_ncaa_tutorial_get_training_statistics]
    sql = """
        SELECT
            *
        FROM
            ML.TRAINING_INFO(MODEL `{}.ncaa_model`)
    """.format(
        dataset_id
    )
    df = client.query(sql).to_dataframe()
    print(df)
    # [END bqml_ncaa_tutorial_get_training_statistics]

    # [START bqml_ncaa_tutorial_evaluate_model]
    sql = """
        WITH eval_table AS (
            SELECT
                *,
                total_three_points_att AS label
            FROM
                `{0}.wide_games` )
        SELECT
            *
        FROM
            ML.EVALUATE(MODEL `{0}.ncaa_model`,
                TABLE eval_table)
    """.format(
        dataset_id
    )
    df = client.query(sql).to_dataframe()
    print(df)
    # [END bqml_ncaa_tutorial_evaluate_model]

    # [START bqml_ncaa_tutorial_predict_outcomes]
    sql = """
        WITH game_to_predict AS (
            SELECT
                *
            FROM
                `{0}.wide_games`
            WHERE
                game_id='f1063e80-23c7-486b-9a5e-faa52beb2d83' )
        SELECT
            truth.game_id AS game_id,
            total_three_points_att,
            predicted_total_three_points_att
        FROM (
            SELECT
                game_id,
                predicted_label AS predicted_total_three_points_att
            FROM
                ML.PREDICT(MODEL `{0}.ncaa_model`,
                table game_to_predict) ) AS predict
        JOIN (
            SELECT
                game_id,
                total_three_points_att AS total_three_points_att
            FROM
                game_to_predict) AS truth
        ON
            predict.game_id = truth.game_id
    """.format(
        dataset_id
    )
    df = client.query(sql).to_dataframe()
    print(df)
    # [END bqml_ncaa_tutorial_predict_outcomes]