# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script will run after ``nox -s docfx`` is run. docfx is the api doc format used by 
google cloud. It is described here: https://github.com/googleapis/docuploader?tab=readme-ov-file#requirements-for-docfx-yaml-tarballs.

One of the file used by docfx is toc.yml which is used to generate the table of contents sidebar.
This script will patch file to create subfolders for each of the clients
"""


import glob
import yaml
import os
import shutil

# set working directory to /docs
os.chdir(f"{os.path.dirname(os.path.abspath(__file__))}/{os.pardir}")


def add_sections(toc_file_path, section_list, output_file_path=None):
    """
    Add new sections to the autogenerated docfx table of contents file

    Takes in a list of TocSection objects, which should point to a directory of rst files
    within the main /docs directory, which represents a self-contained section of content

    :param toc_file_path: path to the autogenerated toc file
    :param section_list: list of TocSection objects to add
    :param output_file_path: path to save the updated toc file. If None, save to the input file
    """
    # remove any sections that are already in the toc
    remove_sections(toc_file_path, [section.title for section in section_list])
    # add new sections
    current_toc = yaml.safe_load(open(toc_file_path, "r"))
    for section in section_list:
        print(f"Adding section {section.title}...")
        current_toc[0]["items"].insert(-1, section.to_dict())
        section.copy_markdown()
    # save file
    if output_file_path is None:
        output_file_path = toc_file_path
    with open(output_file_path, "w") as f:
        yaml.dump(current_toc, f)


def remove_sections(toc_file_path, section_list, output_file_path=None):
    """
    Remove sections from the autogenerated docfx table of contents file

    Takes in a list of string section names to remove from the toc file

    :param toc_file_path: path to the autogenerated toc file
    :param section_list: list of section names to remove
    :param output_file_path: path to save the updated toc file. If None, save to the input file
    """
    current_toc = yaml.safe_load(open(toc_file_path, "r"))
    print(f"Removing sections {section_list}...")
    new_items = [d for d in current_toc[0]["items"] if d["name"] not in section_list]
    current_toc[0]["items"] = new_items
    # save file
    if output_file_path is None:
        output_file_path = toc_file_path
    with open(output_file_path, "w") as f:
        yaml.dump(current_toc, f)


class TocSection:
    def __init__(self, dir_name, index_file_name):
        """
        :param dir_name: name of the directory containing the rst files
        :param index_file_name: name of an index file within dir_name. This file
            will not be included in the table of contents, but provides an ordered
            list of the other files which should be included
        """
        self.dir_name = dir_name
        self.index_file_name = index_file_name
        index_file_path = os.path.join(dir_name, index_file_name)
        # find set of files referenced by the index file
        with open(index_file_path, "r") as f:
            self.title = None
            in_toc = False
            self.items = []
            for line in f:
                # ignore empty lines
                if not line.strip():
                    continue
                # add files explictly included in the toc
                if line.startswith(".. include::"):
                    file_base = os.path.splitext(line.split("::")[1].strip())[0]
                    self.items.append(
                        self.extract_toc_entry(
                            file_base, file_title=file_base.capitalize()
                        )
                    )
                    continue
                if line.startswith(".. toctree::"):
                    in_toc = True
                    continue
                # ignore directives
                if ":" in line:
                    continue
                # set tile as first line with no directive
                if self.title is None:
                    self.title = line.strip()
                if not in_toc:
                    continue
                # bail when toc indented block is done
                if not line.startswith(" ") and not line.startswith("\t"):
                    in_toc = False
                    continue
                # extract entries
                self.items.append(self.extract_toc_entry(line.strip()))

    def extract_toc_entry(self, file_name, file_title=None):
        """
        Given the name of a file, extract the title and href for the toc entry,
        and return as a dictionary
        """
        # load the file to get the title
        with open(f"{self.dir_name}/{file_name}.rst", "r") as f2:
            if file_title is None:
                # use first line as title if not provided
                file_title = f2.readline().strip()
            return {"name": file_title, "href": f"{file_name}.md"}

    def to_dict(self):
        """
        Convert the TocSection object to a dictionary that can be written to a yaml file
        """
        return {"name": self.title, "items": self.items}

    def copy_markdown(self):
        """
        Copy markdown files from _build/markdown/dir_name to _build/html/docfx_yaml

        This is necessary because the markdown files in sub-directories
        are not copied over by the docfx build by default
        """
        for file in os.listdir("_build/markdown/" + self.dir_name):
            shutil.copy(
                f"_build/markdown/{self.dir_name}/{file}",
                f"_build/html/docfx_yaml",
            )

    def validate_section(self, toc):
        # Make sure each rst file is listed in the toc.
        items_in_toc = [
            d["items"] for d in toc[0]["items"] if d["name"] == self.title and ".rst"
        ][0]
        items_in_dir = [f for f in os.listdir(self.dir_name) if f.endswith(".rst")]
        # subtract 1 for index
        assert len(items_in_toc) == len(items_in_dir) - 1
        for file in items_in_dir:
            if file != self.index_file_name:
                base_name, _ = os.path.splitext(file)
                assert any(d["href"] == f"{base_name}.md" for d in items_in_toc)
        # make sure the markdown files are present in the docfx_yaml directory
        md_files = [d["href"] for d in items_in_toc]
        for file in md_files:
            assert os.path.exists(f"_build/html/docfx_yaml/{file}")


class UIDFilteredTocSection(TocSection):
    def __init__(self, toc_file_path, section_name, title, uid_prefix):
        """Creates a filtered section denoted by section_name in the toc_file_path to items with the given UID prefix.

        The section is then renamed to the title.
        """
        current_toc = yaml.safe_load(open(toc_file_path, "r"))
        self.uid_prefix = uid_prefix

        # Since we are looking for a specific section_name there should only
        # be one match.
        section_items = [
            d for d in current_toc[0]["items"] if d["name"] == section_name
        ][0]["items"]
        filtered_items = [d for d in section_items if d["uid"].startswith(uid_prefix)]
        self.items = filtered_items
        self.title = title

    def copy_markdown(self):
        """
        No-op because we are filtering on UIDs, not markdown files.
        """
        pass

    def validate_section(self, toc):
        uids_in_toc = set()

        # A UID-filtered TOC tree looks like the following:
        # - items:
        #   <optional> items: <more stuff>
        #   name: <name>
        #   uid: <fully qualified path to a class>
        #
        # Walk through the TOC tree to find all UIDs recursively.
        def find_uids_in_items(items):
            uids_in_toc.add(items["uid"])
            for subitem in items.get("items", []):
                find_uids_in_items(subitem)

        items_in_toc = [d["items"] for d in toc[0]["items"] if d["name"] == self.title][
            0
        ]
        for item in items_in_toc:
            find_uids_in_items(item)

        # Now that we have all the UIDs, first match all of them
        # with corresponding .yml files.
        for uid in uids_in_toc:
            assert os.path.exists(f"_build/html/docfx_yaml/{uid}.yml")

        # Also validate that every uid yml file that starts with the uid_prefix
        # exists in the section.
        for filename in glob.glob(
            f"{self.uid_prefix}*.yml", root_dir="_build/html/docfx_yaml"
        ):
            assert filename[:-4] in uids_in_toc


def validate_toc(toc_file_path, expected_section_list, added_sections):
    current_toc = yaml.safe_load(open(toc_file_path, "r"))
    # make sure the set of sections matches what we expect
    found_sections = [d["name"] for d in current_toc[0]["items"]]
    assert (
        found_sections == expected_section_list
    ), f"Expected {expected_section_list}, found {found_sections}"
    # make sure each customs ection is in the toc
    for section in added_sections:
        assert section.title in found_sections
        section.validate_section(current_toc)
    print("Toc validation passed")


if __name__ == "__main__":
    # Add secrtions for the async_data_client and classic_client directories
    toc_path = "_build/html/docfx_yaml/toc.yml"

    custom_sections = [
        TocSection(dir_name="data_client", index_file_name="data_client_usage.rst"),
        UIDFilteredTocSection(
            toc_file_path=toc_path,
            section_name="Bigtable Admin V2",
            title="Admin Client",
            uid_prefix="google.cloud.bigtable_admin_v2",
        ),
        TocSection(dir_name="classic_client", index_file_name="usage.rst"),
    ]
    add_sections(toc_path, custom_sections)
    # Remove the Bigtable section, since it has duplicated data
    remove_sections(toc_path, ["Bigtable", "Bigtable Admin V2"])
    # run validation to make sure yaml is structured as we expect
    validate_toc(
        toc_file_path=toc_path,
        expected_section_list=[
            "Overview",
            "bigtable APIs",
            "Changelog",
            "Multiprocessing",
            "Data Client",
            "Admin Client",
            "Classic Client",
        ],
        added_sections=custom_sections,
    )