diff --git a/CHANGELOG.md b/CHANGELOG.md index d84d7ad8..e2e115eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changelog +# 2.1.0 + * Add the following tables: + * release_asset + * branches + * commit_files + * commit_parents + * commit_pull_request + * commit_users_emails + * deployments + * deployment_statuses + * issue_assignees + * issue_labels + * repository_teams + * repository_topics + * repositories + * workflows + * workflow_runs + * workflow_pull_requests + * collaborator_details + * Add ability to create a child table with no endpoint to call, for normalizing data from parent which has a column of `array` type. + * Add ability to inherit fields from parent streams (both normal and array-like columns) + * Create option to add a custom column name when the inherited array is not an array of objects, but an array of strings. + * Add ability to look through a path on the response for the array of values. + * Add custom filter param for endpoints which require a different filter than `since`. + + # 2.0.0 * Schema updates [#170](https://github.com/singer-io/tap-github/pull/170) [#169](https://github.com/singer-io/tap-github/pull/169) * Update data types of fields in `events` and `issue_events` stream diff --git a/setup.py b/setup.py index b6c06fef..c55e6029 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup(name='tap-github', - version='2.0.0', + version='2.1.0', description='Singer.io tap for extracting data from the GitHub API', author='Stitch', url='http://singer.io', diff --git a/tap_github/client.py b/tap_github/client.py index 9913a8c2..6e70b1dc 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -45,6 +45,9 @@ class MovedPermanentlyError(GithubException): class ConflictError(GithubException): pass +class DisabledResourceError(GithubException): + pass + class RateLimitExceeded(GithubException): pass @@ -81,6 +84,10 @@ class TooManyRequests(GithubException): "raise_exception": ConflictError, "message": "The request could not be completed due to a conflict with the current state of the server." }, + 410: { + "raise_exception": DisabledResourceError, + "message": "The request resource is disabled for the repository." + }, 422: { "raise_exception": UnprocessableError, "message": "The request was not able to process right now." @@ -105,7 +112,7 @@ def raise_for_error(resp, source, stream, client, should_skip_404): except JSONDecodeError: response_json = {} - if error_code == 404 and should_skip_404: + if (error_code == 404 or error_code == 410) and should_skip_404: # Add not accessible stream into list. client.not_accessible_repos.add(stream) details = ERROR_CODE_EXCEPTION_MAPPING.get(error_code).get("message") @@ -196,10 +203,11 @@ def authed_get(self, source, url, headers={}, stream="", should_skip_404 = True) self.session.headers.update(headers) resp = self.session.request(method='get', url=url, timeout=self.get_request_timeout()) if resp.status_code != 200: + LOGGER.info(f'Found a non 200 response: {url}, {resp.status_code}') raise_for_error(resp, source, stream, self, should_skip_404) timer.tags[metrics.Tag.http_status_code] = resp.status_code rate_throttling(resp, self.max_sleep_seconds) - if resp.status_code == 404: + if resp.status_code == 404 or resp.status_code == 410: # Return an empty response body since we're not raising a NotFoundException resp._content = b'{}' # pylint: disable=protected-access return resp @@ -215,16 +223,17 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 # Fetch the next page if next found in the response. if 'next' in r.links: url = r.links['next']['url'] + LOGGER.info(f'Found a next link: {url}') else: # Break the loop if all pages are fetched. break - def verify_repo_access(self, url_for_repo, repo): + def verify_repo_access(self, url_for_repo, repo, should_skip_404 = True): """ Call rest API to verify that the user has sufficient permissions to access this repository. """ try: - self.authed_get("verifying repository access", url_for_repo) + self.authed_get("verifying repository access", url_for_repo, should_skip_404 = should_skip_404) except NotFoundException: # Throwing user-friendly error message as it checks token access message = "HTTP-error-code: 404, Error: Please check the repository name \'{}\' or you do not have sufficient permissions to access this repository.".format(repo) @@ -242,7 +251,7 @@ def verify_access_for_repo(self): LOGGER.info("Verifying access of repository: %s", repo) # Verifying for Repo access - self.verify_repo_access(url_for_repo, repo) + self.verify_repo_access(url_for_repo, repo, False) def extract_orgs_from_config(self): """ diff --git a/tap_github/schemas/branches.json b/tap_github/schemas/branches.json new file mode 100644 index 00000000..ebac9b9d --- /dev/null +++ b/tap_github/schemas/branches.json @@ -0,0 +1,391 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "name": { + "type": ["null", "string"] + }, + "commit_sha": { + "type": ["null", "string"] + }, + "commit": { + "type": ["null", "object"], + "properties": { + "sha": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"], + "format": "uri" + } + } + }, + "protected": { + "type": ["null", "boolean"] + }, + "protection": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "enabled": { + "type": ["null", "boolean"] + }, + "required_status_checks": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "enforcement_level": { + "type": ["null", "string"] + }, + "contexts": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "checks": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "context": { + "type": ["null", "string"] + }, + "app_id": { + "type": ["null", "integer"] + } + } + } + }, + "contexts_url": { + "type": ["null", "string"] + }, + "strict": { + "type": ["null", "boolean"] + } + } + }, + "enforce_admins": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"], + "format": "uri" + }, + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "required_pull_request_reviews": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"], + "format": "uri" + }, + "dismissal_restrictions": { + "type": ["null", "object"], + "properties": { + "users": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } + }, + "teams": { + "type": ["null", "array"], + "items": { + "$ref": "shared/teams.json#/" + } + }, + "apps": { + "type": ["null", "array"], + "items": { + "$ref": "shared/app.json#/" + } + }, + "users_url": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + } + } + }, + "bypass_pull_request_allowances": { + "type": ["null", "object"], + "properties": { + "users": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } + }, + "teams": { + "type": ["null", "array"], + "items": { + "$ref": "shared/teams.json#/" + } + }, + "apps": { + "type": ["null", "array"], + "items": { + "id": { + "type": ["null", "number"] + }, + "slug": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "external_url": { + "type": ["null", "string"], + "format": "uri" + }, + "html_url": { + "type": ["null", "string"], + "format": "uri" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "events": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "installations_count": { + "type": ["null", "integer"] + }, + "client_id": { + "type": ["null", "string"] + }, + "client_secret": { + "type": ["null", "string"] + }, + "webhook_secret": { + "type": ["null", "string"] + }, + "pem": { + "type": ["null", "string"] + } + } + } + } + }, + "dismiss_stale_reviews": { + "type": ["null", "boolean"] + }, + "require_code_owner_reviews": { + "type": ["null", "boolean"] + }, + "required_approving_review_count": { + "type": ["null", "integer"] + }, + "require_last_push_approval": { + "type": ["null", "boolean"] + } + } + }, + "restrictions": { + "type": ["null", "object"], + "properties": { + "users": { + "type": ["null", "array"], + "items": { + "$ref": "shared/user.json#/" + } + }, + "teams": { + "type": ["null", "array"], + "items": { + "$ref": "shared/teams.json#/" + } + }, + "apps": { + "type": ["null", "array"], + "items": { + "id": { + "type": ["null", "number"] + }, + "slug": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "external_url": { + "type": ["null", "string"], + "format": "uri" + }, + "html_url": { + "type": ["null", "string"], + "format": "uri" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "events": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "installations_count": { + "type": ["null", "integer"] + }, + "client_id": { + "type": ["null", "string"] + }, + "client_secret": { + "type": ["null", "string"] + }, + "webhook_secret": { + "type": ["null", "string"] + }, + "pem": { + "type": ["null", "string"] + } + } + }, + "users_url": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "apps_url": { + "type": ["null", "string"] + } + } + }, + "required_linear_history": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "allow_force_pushes": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "allow_deletions": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "block_creations": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "required_conversation_resolution": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "name": { + "type": ["null", "string"] + }, + "protection_url": { + "type": ["null", "string"] + }, + "required_signatures": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"], + "format": "uri" + }, + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "lock_branch": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + }, + "allow_fork_syncing": { + "type": ["null", "object"], + "properties": { + "enabled": { + "type": ["null", "boolean"] + } + } + } + } + }, + "protection_url": { + "type": ["null", "string"] + } + } + } \ No newline at end of file diff --git a/tap_github/schemas/collaborator_details.json b/tap_github/schemas/collaborator_details.json new file mode 100644 index 00000000..c500845d --- /dev/null +++ b/tap_github/schemas/collaborator_details.json @@ -0,0 +1,104 @@ +{ + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "bio": { + "type": ["null", "string"] + }, + "blog": { + "type": ["null", "string"] + }, + "company": { + "type": ["null", "string"] + }, + "followers": { + "type": ["null", "integer"] + }, + "following": { + "type": ["null", "integer"] + }, + "hireable": { + "type": ["null", "boolean"] + }, + "location": { + "type": ["null", "string"] + }, + "twitter_username": { + "type": ["null", "string"] + }, + "public_gists": { + "type": ["null", "integer"] + }, + "public_repos": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/collaborators.json b/tap_github/schemas/collaborators.json index 9f71ac07..a997a5fb 100644 --- a/tap_github/schemas/collaborators.json +++ b/tap_github/schemas/collaborators.json @@ -67,9 +67,6 @@ }, "role_name": { "type": ["null", "string"] - }, - "_sdc_repository": { - "type": ["string"] } } } diff --git a/tap_github/schemas/commit_files.json b/tap_github/schemas/commit_files.json new file mode 100644 index 00000000..86efcbaf --- /dev/null +++ b/tap_github/schemas/commit_files.json @@ -0,0 +1,44 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "commit_sha": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "filename": { + "type": ["null", "string"] + }, + "additions": { + "type": ["null", "number"] + }, + "deletions": { + "type": ["null", "number"] + }, + "changes": { + "type": ["null", "number"] + }, + "status": { + "type": ["null", "string"] + }, + "raw_url": { + "type": ["null", "string"] + }, + "blob_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "patch": { + "type": ["null", "string"] + }, + "previous_filename": { + "type": ["null", "string"] + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/commit_parents.json b/tap_github/schemas/commit_parents.json new file mode 100644 index 00000000..42744838 --- /dev/null +++ b/tap_github/schemas/commit_parents.json @@ -0,0 +1,22 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "children_sha": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"], + "format": "uri" + }, + "html_url": { + "type": ["null", "string"], + "format": "uri" + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/commit_pull_request.json b/tap_github/schemas/commit_pull_request.json new file mode 100644 index 00000000..9d9797df --- /dev/null +++ b/tap_github/schemas/commit_pull_request.json @@ -0,0 +1,15 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "commit_sha": { + "type": ["null", "string"] + }, + "pull_request_id": { + "type": ["null", "integer"] + } + } +} + \ No newline at end of file diff --git a/tap_github/schemas/commit_users_emails.json b/tap_github/schemas/commit_users_emails.json new file mode 100644 index 00000000..0cc70e8a --- /dev/null +++ b/tap_github/schemas/commit_users_emails.json @@ -0,0 +1,18 @@ +{ + "type": ["null", "object"], + "properties": { + "email": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "username": { + "type": ["null", "string"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/commits.json b/tap_github/schemas/commits.json index cf873448..29de80bc 100644 --- a/tap_github/schemas/commits.json +++ b/tap_github/schemas/commits.json @@ -7,15 +7,6 @@ "node_id": { "type": ["null", "string"] }, - "pr_id": { - "type": ["null", "string"] - }, - "pr_number": { - "type": ["null", "integer"] - }, - "id": { - "type": ["null", "string"] - }, "updated_at": { "type": ["null", "string"], "format": "date-time" @@ -26,55 +17,8 @@ "url": { "type": ["null", "string"] }, - "parents": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "additionalProperties": false, - "properties": { - "sha": { - "type": ["null", "string"] - }, - "url": { - "type": ["null", "string"] - }, - "html_url": { - "type": ["null", "string"] - } - } - } - }, - "files": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "properties": { - "filename": { - "type": ["null", "string"] - }, - "additions": { - "type": ["null", "number"] - }, - "deletions": { - "type": ["null", "number"] - }, - "changes": { - "type": ["null", "number"] - }, - "status": { - "type": ["null", "string"] - }, - "raw_url": { - "type": ["null", "string"] - }, - "blob_url": { - "type": ["null", "string"] - }, - "patch": { - "type": ["null", "string"] - } - } - } + "message": { + "type": ["null", "string"] }, "html_url": { "type": ["null", "string"] @@ -253,6 +197,18 @@ } } }, + "author_email": { + "type": ["null", "string"] + }, + "author_name": { + "type": ["null", "string"] + }, + "committer_email": { + "type": ["null", "string"] + }, + "committer_name": { + "type": ["null", "string"] + }, "committer": { "$ref": "shared/user.json#/" }, diff --git a/tap_github/schemas/deployment_statuses.json b/tap_github/schemas/deployment_statuses.json new file mode 100644 index 00000000..17da4d78 --- /dev/null +++ b/tap_github/schemas/deployment_statuses.json @@ -0,0 +1,59 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "creator_id": { + "type": ["null", "integer"] + }, + "creator": { + "$ref": "shared/user.json#/" + }, + "description": { + "type": ["null", "string"] + }, + "environment": { + "type": ["null", "string"] + }, + "target_url": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "deployment_url": { + "type": ["null", "string"] + }, + "repository_url": { + "type": ["null", "string"] + }, + "environment_url": { + "type": ["null", "string"] + }, + "log_url": { + "type": ["null", "string"] + }, + "performed_via_github_app": { + "$ref": "shared/app.json#/" + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/deployments.json b/tap_github/schemas/deployments.json new file mode 100644 index 00000000..76d00c67 --- /dev/null +++ b/tap_github/schemas/deployments.json @@ -0,0 +1,68 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "ref": { + "type": ["null", "string"] + }, + "task": { + "type": ["null", "string"] + }, + "payload": { + "type": ["null", "object", "string"] + }, + "original_environment": { + "type": ["null", "string"] + }, + "environment": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator_id": { + "type": ["null", "integer"] + }, + "creator": { + "$ref": "shared/user.json#/" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "statuses_url": { + "type": ["null", "string"] + }, + "repository_url": { + "type": ["null", "string"] + }, + "transient_environment": { + "type": ["null", "boolean"] + }, + "production_environment": { + "type": ["null", "boolean"] + }, + "performed_via_github_app": { + "$ref": "shared/app.json#/" + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/issue_assignees.json b/tap_github/schemas/issue_assignees.json new file mode 100644 index 00000000..2542d54f --- /dev/null +++ b/tap_github/schemas/issue_assignees.json @@ -0,0 +1,75 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "issue_id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/issue_labels.json b/tap_github/schemas/issue_labels.json index 32a097df..d97ee204 100644 --- a/tap_github/schemas/issue_labels.json +++ b/tap_github/schemas/issue_labels.json @@ -4,6 +4,9 @@ "_sdc_repository": { "type": ["null", "string"] }, + "issue_id": { + "type": ["null", "integer"] + }, "id": { "type": ["null", "number"] }, diff --git a/tap_github/schemas/issues.json b/tap_github/schemas/issues.json index 93365708..8667a62d 100644 --- a/tap_github/schemas/issues.json +++ b/tap_github/schemas/issues.json @@ -9,35 +9,6 @@ "url": { "type": ["null", "string"] }, - "labels": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "properties": { - "id": { - "type": ["null", "integer"] - }, - "node_id": { - "type": ["null", "string"] - }, - "url": { - "type": ["null", "string"] - }, - "name": { - "type": ["null", "string"] - }, - "description": { - "type": ["null", "string"] - }, - "color": { - "type": ["null", "string"] - }, - "default": { - "type": ["null", "boolean"] - } - } - } - }, "repository_url": { "type": ["null", "string"] }, @@ -57,12 +28,6 @@ "assignee": { "$ref": "shared/user.json#/" }, - "assignees": { - "type": ["null", "array"], - "items": { - "$ref": "shared/user.json#/" - } - }, "milestone": { "type": ["null", "object"], "properties": { diff --git a/tap_github/schemas/labels.json b/tap_github/schemas/labels.json new file mode 100644 index 00000000..32a097df --- /dev/null +++ b/tap_github/schemas/labels.json @@ -0,0 +1,29 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "default": { + "type": ["null", "boolean"] + } + } +} diff --git a/tap_github/schemas/release_assets.json b/tap_github/schemas/release_assets.json new file mode 100644 index 00000000..8f9f74b9 --- /dev/null +++ b/tap_github/schemas/release_assets.json @@ -0,0 +1,121 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "release_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "browser_download_url": { + "type": ["null", "string"], + "format": "uri" + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "label": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "content_type": { + "type": ["null", "string"] + }, + "size": { + "type": ["null", "integer"] + }, + "download_count": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "uploader_id": { + "type": ["null", "integer"] + }, + "uploader": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + }, + "starred_at": { + "type": ["null", "string"] + } + } + } + } + } \ No newline at end of file diff --git a/tap_github/schemas/releases.json b/tap_github/schemas/releases.json index b903a026..e4647e76 100644 --- a/tap_github/schemas/releases.json +++ b/tap_github/schemas/releases.json @@ -35,121 +35,6 @@ "reactions": { "$ref": "shared/reactions.json#/" }, - "assets": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "properties": { - "url": { - "type": ["null", "string"] - }, - "browser_download_url": { - "type": ["null", "string"], - "format": "uri" - }, - "id": { - "type": ["null", "integer"] - }, - "node_id": { - "type": ["null", "string"] - }, - "name": { - "type": ["null", "string"] - }, - "label": { - "type": ["null", "string"] - }, - "state": { - "type": ["null", "string"] - }, - "content_type": { - "type": ["null", "string"] - }, - "size": { - "type": ["null", "integer"] - }, - "download_count": { - "type": ["null", "integer"] - }, - "created_at": { - "type": ["null", "string"], - "format": "date-time" - }, - "updated_at": { - "type": ["null", "string"], - "format": "date-time" - }, - "uploader": { - "type": ["null", "object"], - "properties": { - "name": { - "type": ["null", "string"] - }, - "email": { - "type": ["null", "string"] - }, - "login": { - "type": ["null", "string"] - }, - "id": { - "type": ["null", "integer"] - }, - "node_id": { - "type": ["null", "string"] - }, - "avatar_url": { - "type": ["null", "string"] - }, - "gravatar_id": { - "type": ["null", "string"] - }, - "url": { - "type": ["null", "string"] - }, - "html_url": { - "type": ["null", "string"] - }, - "followers_url": { - "type": ["null", "string"] - }, - "following_url": { - "type": ["null", "string"] - }, - "gists_url": { - "type": ["null", "string"] - }, - "starred_url": { - "type": ["null", "string"] - }, - "subscriptions_url": { - "type": ["null", "string"] - }, - "organizations_url": { - "type": ["null", "string"] - }, - "repos_url": { - "type": ["null", "string"] - }, - "events_url": { - "type": ["null", "string"] - }, - "received_events_url": { - "type": ["null", "string"] - }, - "type": { - "type": ["null", "string"] - }, - "site_admin": { - "type": ["null", "boolean"] - }, - "starred_at": { - "type": ["null", "string"] - } - } - } - } - } - }, "mentions_count": { "type": ["null", "integer"] }, diff --git a/tap_github/schemas/repositories.json b/tap_github/schemas/repositories.json new file mode 100644 index 00000000..d202520e --- /dev/null +++ b/tap_github/schemas/repositories.json @@ -0,0 +1,333 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "full_name": { + "type": ["null", "string"] + }, + "owner_id": { + "type": ["null", "integer"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "private": { + "type": ["null", "boolean"] + }, + "html_url": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "fork": { + "type": ["null", "boolean"] + }, + "url": { + "type": ["null", "string"] + }, + "homepage": { + "type": ["null", "string"] + }, + "language": { + "type": ["null", "string"] + }, + "forks_count": { + "type": ["null", "integer"] + }, + "stargazers_count": { + "type": ["null", "integer"] + }, + "watchers_count": { + "type": ["null", "integer"] + }, + "size": { + "type": ["null", "integer"] + }, + "default_branch": { + "type": ["null", "string"] + }, + "open_issues_count": { + "type": ["null", "integer"] + }, + "is_template": { + "type": ["null", "boolean"] + }, + "has_issues": { + "type": ["null", "boolean"] + }, + "has_projects": { + "type": ["null", "boolean"] + }, + "has_wiki": { + "type": ["null", "boolean"] + }, + "has_pages": { + "type": ["null", "boolean"] + }, + "has_downloads": { + "type": ["null", "boolean"] + }, + "has_discussions": { + "type": ["null", "boolean"] + }, + "archived": { + "type": ["null", "boolean"] + }, + "disabled": { + "type": ["null", "boolean"] + }, + "visibility": { + "type": ["null", "string"] + }, + "pushed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "role_name": { + "type": ["null", "string"] + }, + "temp_clone_token": { + "type": ["null", "string"] + }, + "delete_branch_on_merge": { + "type": ["null", "boolean"] + }, + "subscribers_count": { + "type": ["null", "integer"] + }, + "network_count": { + "type": ["null", "integer"] + }, + "forks": { + "type": ["null", "integer"] + }, + "open_issues": { + "type": ["null", "integer"] + }, + "watchers": { + "type": ["null", "integer"] + }, + "allow_forking": { + "type": ["null", "boolean"] + }, + "web_commit_signoff_required": { + "type": ["null", "boolean"] + }, + "code_of_conduct": { + "type": ["null", "object"], + "properties": { + "key": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "body": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "license": { + "type": ["null", "object"], + "properties": { + "key": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "spdx_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + } + } + }, + "security_and_analysis": { + "type": ["null", "object"], + "properties": { + "advanced_security": { + "type": ["null", "object"], + "properties": { + "status": { + "type": ["null", "string"] + } + } + }, + "secret_scanning": { + "type": ["null", "object"], + "properties": { + "status": { + "type": ["null", "string"] + } + } + }, + "secret_scanning_push_protection": { + "type": ["null", "object"], + "properties": { + "status": { + "type": ["null", "string"] + } + } + } + } + }, + "archive_url": { + "type": ["null", "string"] + }, + "assignees_url": { + "type": ["null", "string"] + }, + "blobs_url": { + "type": ["null", "string"] + }, + "branches_url": { + "type": ["null", "string"] + }, + "collaborators_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "commits_url": { + "type": ["null", "string"] + }, + "compare_url": { + "type": ["null", "string"] + }, + "contents_url": { + "type": ["null", "string"] + }, + "contributors_url": { + "type": ["null", "string"] + }, + "deployments_url": { + "type": ["null", "string"] + }, + "downloads_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "forks_url": { + "type": ["null", "string"] + }, + "git_commits_url": { + "type": ["null", "string"] + }, + "git_refs_url": { + "type": ["null", "string"] + }, + "git_tags_url": { + "type": ["null", "string"] + }, + "git_url": { + "type": ["null", "string"] + }, + "issue_comment_url": { + "type": ["null", "string"] + }, + "issue_events_url": { + "type": ["null", "string"] + }, + "issues_url": { + "type": ["null", "string"] + }, + "keys_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "languages_url": { + "type": ["null", "string"] + }, + "merges_url": { + "type": ["null", "string"] + }, + "milestones_url": { + "type": ["null", "string"] + }, + "notifications_url": { + "type": ["null", "string"] + }, + "pulls_url": { + "type": ["null", "string"] + }, + "releases_url": { + "type": ["null", "string"] + }, + "ssh_url": { + "type": ["null", "string"] + }, + "stargazers_url": { + "type": ["null", "string"] + }, + "statuses_url": { + "type": ["null", "string"] + }, + "subscribers_url": { + "type": ["null", "string"] + }, + "subscription_url": { + "type": ["null", "string"] + }, + "tags_url": { + "type": ["null", "string"] + }, + "teams_url": { + "type": ["null", "string"] + }, + "trees_url": { + "type": ["null", "string"] + }, + "clone_url": { + "type": ["null", "string"] + }, + "mirror_url": { + "type": ["null", "string"] + }, + "hooks_url": { + "type": ["null", "string"] + }, + "svn_url": { + "type": ["null", "string"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/repository_teams.json b/tap_github/schemas/repository_teams.json new file mode 100644 index 00000000..88be83e4 --- /dev/null +++ b/tap_github/schemas/repository_teams.json @@ -0,0 +1,48 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "name": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "privacy": { + "type": ["null", "string"] + }, + "permission": { + "type": ["null", "string"] + }, + "members_url": { + "type": ["null", "string"] + }, + "repositories_url": { + "type": ["null", "string"] + }, + "parent": { + "type": ["null", "object", "string"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/repository_topics.json b/tap_github/schemas/repository_topics.json new file mode 100644 index 00000000..16107085 --- /dev/null +++ b/tap_github/schemas/repository_topics.json @@ -0,0 +1,12 @@ +{ + "type": ["null", "object"], + "properties": { + "repository": { + "type": ["null", "string"] + }, + "topic": { + "type": ["null", "string"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/shared/app.json b/tap_github/schemas/shared/app.json new file mode 100644 index 00000000..7045891c --- /dev/null +++ b/tap_github/schemas/shared/app.json @@ -0,0 +1,63 @@ +{ + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number"] + }, + "slug": { + "type": ["null", "string"] + }, + "node_id": { + "type": ["null", "string"] + }, + "owner": { + "$ref": "shared/user.json#/" + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "external_url": { + "type": ["null", "string"], + "format": "uri" + }, + "html_url": { + "type": ["null", "string"], + "format": "uri" + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "permissions": { + "$ref": "shared/pull_permissions.json#/" + }, + "events": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "installations_count": { + "type": ["null", "integer"] + }, + "client_id": { + "type": ["null", "string"] + }, + "client_secret": { + "type": ["null", "string"] + }, + "webhook_secret": { + "type": ["null", "string"] + }, + "pem": { + "type": ["null", "string"] + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/shared/teams.json b/tap_github/schemas/shared/teams.json new file mode 100644 index 00000000..805e1a5c --- /dev/null +++ b/tap_github/schemas/shared/teams.json @@ -0,0 +1,61 @@ +{ + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "permissions": { + "type": ["null", "object"], + "properties": { + "pull": { + "type": ["null", "boolean"] + }, + "triage": { + "type": ["null", "boolean"] + }, + "push": { + "type": ["null", "boolean"] + }, + "maintain": { + "type": ["null", "boolean"] + }, + "admin": { + "type": ["null", "boolean"] + } + } + }, + "name": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "privacy": { + "type": ["null", "string"] + }, + "permission": { + "type": ["null", "string"] + }, + "members_url": { + "type": ["null", "string"] + }, + "repositories_url": { + "type": ["null", "string"] + }, + "parent": { + "type": ["null", "object", "string"] + } + } +} \ No newline at end of file diff --git a/tap_github/schemas/workflow_run_pull_requests.json b/tap_github/schemas/workflow_run_pull_requests.json new file mode 100644 index 00000000..99ec125b --- /dev/null +++ b/tap_github/schemas/workflow_run_pull_requests.json @@ -0,0 +1,77 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "workflow_run_id": { + "type": ["null", "integer"] + }, + "id": { + "type": ["null", "integer"] + }, + "number": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "head_sha": { + "type": ["null", "string"] + }, + "head": { + "type": ["null", "object"], + "properties": { + "ref": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + } + } + } + } + }, + "base_sha": { + "type": ["null", "string"] + }, + "base": { + "type": ["null", "object"], + "properties": { + "ref": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + } + } + } + } + } +} + } + \ No newline at end of file diff --git a/tap_github/schemas/workflow_runs.json b/tap_github/schemas/workflow_runs.json new file mode 100644 index 00000000..05f4c38e --- /dev/null +++ b/tap_github/schemas/workflow_runs.json @@ -0,0 +1,251 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "check_suite_id": { + "type": ["null", "integer"] + }, + "check_suite_node_id": { + "type": ["null", "string"] + }, + "head_branch": { + "type": ["null", "string"] + }, + "head_sha": { + "type": ["null", "string"] + }, + "path": { + "type": ["null", "string"] + }, + "run_number": { + "type": ["null", "integer"] + }, + "run_attempt": { + "type": ["null", "integer"] + }, + "referenced_workflows": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "path": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "ref": { + "type": ["null", "string"] + } + } + } + }, + "event": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "conclusion": { + "type": ["null", "string"] + }, + "workflow_id": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "pull_requests": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "number": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "head": { + "type": ["null", "object"], + "properties": { + "ref": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + } + } + } + } + }, + "base": { + "type": ["null", "object"], + "properties": { + "ref": { + "type": ["null", "string"] + }, + "sha": { + "type": ["null", "string"] + }, + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + } + } + } + } + } + } + } + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "actor_id": { + "type": ["null", "integer"] + }, + "actor": { + "$ref": "shared/user.json#/" + }, + "triggering_actor_id": { + "type": ["null", "integer"] + }, + "triggering_actor": { + "$ref": "shared/user.json#/" + }, + "run_started_at": { + "type": ["null", "string"] + }, + "jobs_url": { + "type": ["null", "string"] + }, + "logs_url": { + "type": ["null", "string"] + }, + "check_suite_url": { + "type": ["null", "string"] + }, + "artifacts_url": { + "type": ["null", "string"] + }, + "cancel_url": { + "type": ["null", "string"] + }, + "rerun_url": { + "type": ["null", "string"] + }, + "previous_attempt_url": { + "type": ["null", "string"] + }, + "workflow_url": { + "type": ["null", "string"] + }, + "head_commit": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "tree_id": { + "type": ["null", "string"] + }, + "message": { + "type": ["null", "string"] + }, + "timestamp": { + "type": ["null", "string"], + "format": "date-time" + }, + "author": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + } + } + }, + "committer": { + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + } + } + }, + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + } + } + } + } + }, + "repository_id": { + "type": ["null", "integer"] + }, + "head_repository_id": { + "type": ["null", "integer"] + }, + "display_title": { + "type": ["null", "string"] + } + } + } + \ No newline at end of file diff --git a/tap_github/schemas/workflows.json b/tap_github/schemas/workflows.json new file mode 100644 index 00000000..ad0deb48 --- /dev/null +++ b/tap_github/schemas/workflows.json @@ -0,0 +1,45 @@ +{ + "type": ["null", "object"], + "properties": { + "_sdc_repository": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "number"] + }, + "node_id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "path": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "badge_url": { + "type": ["null", "string"] + }, + "deleted_at": { + "type": ["null", "string"], + "format": "date-time" + } + } + } + \ No newline at end of file diff --git a/tap_github/streams.py b/tap_github/streams.py index 278dd05a..0d1a46d9 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -1,20 +1,34 @@ -from datetime import datetime +from datetime import datetime, timedelta import singer from singer import (metrics, bookmarks, metadata) LOGGER = singer.get_logger() DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +PER_PAGE_NUMBER = 100 +DATE_RANGE_WINDOW = 7 -def get_bookmark(state, repo, stream_name, bookmark_key, start_date): +def get_bookmark(state, repo, stream_name, bookmark_key, start_date, is_incremental = True): """ Return bookmark value if available in the state otherwise return start date """ - repo_stream_dict = bookmarks.get_bookmark(state, repo, stream_name) - if repo_stream_dict: - return repo_stream_dict.get(bookmark_key) - + if is_incremental: + repo_stream_dict = bookmarks.get_bookmark(state, repo, stream_name) + if repo_stream_dict: + return repo_stream_dict.get(bookmark_key) return start_date +def get_date_ranges(start_date, end_date, date_range_window=DATE_RANGE_WINDOW): + """ + Return a list of date ranges to be used for the API calls. + """ + start_date = datetime.strptime(start_date, DATE_FORMAT) + end_date = datetime.strptime(end_date, DATE_FORMAT) + while start_date < end_date: + temp_end_date=start_date + timedelta(days=date_range_window) + date_ranges=(start_date.strftime(DATE_FORMAT),temp_end_date.strftime(DATE_FORMAT)) + start_date = temp_end_date + yield date_ranges + def get_schema(catalog, stream_id): """ Return catalog of the specified stream. @@ -27,7 +41,9 @@ def get_child_full_url(domain, child_object, repo_path, parent_id, grand_parent_ Build the child stream's URL based on the parent and the grandparent's ids. """ - if child_object.use_repository: + if child_object.no_path: + return + elif child_object.use_repository: # The `use_repository` represents that the url contains /repos and the repository name. child_full_url = '{}/repos/{}/{}'.format( domain, @@ -60,7 +76,9 @@ class Stream: replication_keys = None key_properties = [] path = None - filter_param = False + since_filter_param = "" + since_filter_param_custom = "" + additional_filters = "" id_keys = [] use_organization = False children = [] @@ -68,14 +86,24 @@ class Stream: use_repository = False headers = {'Accept': '*/*'} parent = None + inherit_parent_fields = [] + inherit_array_parent_fields = "" + custom_column_name = "" + no_path = False + result_path = "" def build_url(self, base_url, repo_path, bookmark): """ Build the full url with parameters and attributes. """ - if self.filter_param: + if self.since_filter_param: # Add the since parameter for incremental streams - query_string = '?since={}'.format(bookmark) + query_string = '?since={}{}'.format(bookmark,self.since_filter_param) + elif self.since_filter_param_custom: + # Add additional custom filter for incremental streams + query_string = f'?{self.since_filter_param_custom}'.format(**bookmark) + elif self.additional_filters: + query_string = f'?{self.additional_filters}' else: query_string = '' @@ -144,57 +172,94 @@ def get_child_records(self, """ child_object = STREAMS[child_stream]() - child_bookmark_value = get_bookmark(state, repo_path, child_object.tap_stream_id, "since", start_date) + is_stream_incremental = child_object.replication_method == "INCREMENTAL" and child_object.replication_keys + child_bookmark_value = get_bookmark(state, repo_path, child_object.tap_stream_id, "since", start_date, is_stream_incremental) if not parent_id: parent_id = grand_parent_id child_full_url = get_child_full_url(client.base_url, child_object, repo_path, parent_id, grand_parent_id) stream_catalog = get_schema(catalog, child_object.tap_stream_id) - with metrics.record_counter(child_object.tap_stream_id) as counter: - for response in client.authed_get_all_pages( - child_object.tap_stream_id, - child_full_url, - stream = child_object.tap_stream_id - ): - records = response.json() - extraction_time = singer.utils.now() + if child_full_url is not None: + for response in client.authed_get_all_pages( + child_object.tap_stream_id, + child_full_url, + stream = child_object.tap_stream_id + ): + records = response.json() + if child_object.result_path: records = records.get(child_object.result_path,[]) + extraction_time = singer.utils.now() + + if isinstance(records, list): + # Loop through all the records of response + for record in records: + record['_sdc_repository'] = repo_path + for column, field in child_object.inherit_parent_fields: + record[column] = parent_record.get(field) + child_object.add_fields_at_1st_level(record = record, parent_record = parent_record) - if isinstance(records, list): - # Loop through all the records of response - for record in records: - record['_sdc_repository'] = repo_path - child_object.add_fields_at_1st_level(record = record, parent_record = parent_record) + with singer.Transformer() as transformer: - with singer.Transformer() as transformer: + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) - rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + if child_object.tap_stream_id in selected_stream_ids and record.get(child_object.replication_keys, start_date) >= child_bookmark_value: + singer.write_record(child_object.tap_stream_id, rec, time_extracted=extraction_time) + counter.increment() - if child_object.tap_stream_id in selected_stream_ids and record.get(child_object.replication_keys, start_date) >= child_bookmark_value: - singer.write_record(child_object.tap_stream_id, rec, time_extracted=extraction_time) - counter.increment() + # Loop thru each child and nested child in the parent and fetch all the child records. + for nested_child in child_object.children: + if nested_child in stream_to_sync: + # Collect id of child record to pass in the API of its sub-child. + child_id = tuple(record.get(key) for key in STREAMS[nested_child]().id_keys) + # Here, grand_parent_id is the id of 1st level parent(main parent) which is required to + # pass in the API of the current child's sub-child. + child_object.get_child_records(client, catalog, nested_child, child_id, repo_path, state, start_date, bookmark_dttm, stream_to_sync, selected_stream_ids, grand_parent_id, record) - # Loop thru each child and nested child in the parent and fetch all the child records. - for nested_child in child_object.children: - if nested_child in stream_to_sync: - # Collect id of child record to pass in the API of its sub-child. - child_id = tuple(record.get(key) for key in STREAMS[nested_child]().id_keys) - # Here, grand_parent_id is the id of 1st level parent(main parent) which is required to - # pass in the API of the current child's sub-child. - child_object.get_child_records(client, catalog, nested_child, child_id, repo_path, state, start_date, bookmark_dttm, stream_to_sync, selected_stream_ids, grand_parent_id, record) + else: + # Write JSON response directly if it is a single record only. + records['_sdc_repository'] = repo_path + for column, field in child_object.inherit_parent_fields: + records[column] = parent_record.get(field) + child_object.add_fields_at_1st_level(record = records, parent_record = parent_record) - else: - # Write JSON response directly if it is a single record only. - records['_sdc_repository'] = repo_path - child_object.add_fields_at_1st_level(record = records, parent_record = parent_record) + with singer.Transformer() as transformer: + + rec = transformer.transform(records, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + if child_object.tap_stream_id in selected_stream_ids and records.get(child_object.replication_keys, start_date) >= child_bookmark_value : + singer.write_record(child_object.tap_stream_id, rec, time_extracted=extraction_time) + elif child_object.no_path: + records = [] + extraction_time = singer.utils.now() + if child_object.inherit_array_parent_fields: + for record in parent_record.get(child_object.inherit_array_parent_fields,[]): + if col_name := child_object.custom_column_name: + records.append({col_name: record}) + else: + records.append(record) + else: records.append({}) + for record in records: + for column, field in child_object.inherit_parent_fields: + record[column] = parent_record.get(field) + child_object.add_fields_at_1st_level(record = record, parent_record = parent_record) with singer.Transformer() as transformer: - rec = transformer.transform(records, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) - if child_object.tap_stream_id in selected_stream_ids and records.get(child_object.replication_keys, start_date) >= child_bookmark_value : + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + if child_object.tap_stream_id in selected_stream_ids: singer.write_record(child_object.tap_stream_id, rec, time_extracted=extraction_time) + counter.increment() + + # Loop thru each child and nested child in the parent and fetch all the child records. + for nested_child in child_object.children: + if nested_child in stream_to_sync: + # Collect id of child record to pass in the API of its sub-child. + child_id = tuple(record.get(key) for key in STREAMS[nested_child]().id_keys) + if STREAMS[nested_child]().id_keys and not all(child_id): continue + # Here, grand_parent_id is the id of 1st level parent(main parent) which is required to + # pass in the API of the current child's sub-child. + child_object.get_child_records(client, catalog, nested_child, child_id, repo_path, state, start_date, bookmark_dttm, stream_to_sync, selected_stream_ids, grand_parent_id, record) # pylint: disable=unnecessary-pass def add_fields_at_1st_level(self, record, parent_record = None): @@ -202,6 +267,16 @@ def add_fields_at_1st_level(self, record, parent_record = None): Add fields in the record explicitly at the 1st level of JSON. """ pass + + def get_field(self,record, field_path): + """ + Get a field of a record from a field path + """ + response = record + for path in field_path: + response = response.get(path) + if not response: return + return response class FullTableStream(Stream): def sync_endpoint(self, @@ -211,7 +286,8 @@ def sync_endpoint(self, repo_path, start_date, selected_stream_ids, - stream_to_sync + stream_to_sync, + config, ): """ A common function sync full table streams. @@ -221,7 +297,6 @@ def sync_endpoint(self, full_url = self.build_url(client.base_url, repo_path, None) stream_catalog = get_schema(catalog, self.tap_stream_id) - with metrics.record_counter(self.tap_stream_id) as counter: for response in client.authed_get_all_pages( self.tap_stream_id, @@ -230,6 +305,7 @@ def sync_endpoint(self, stream = self.tap_stream_id ): records = response.json() + if self.result_path: records = records.get(self.result_path,[]) extraction_time = singer.utils.now() # Loop through all records for record in records: @@ -249,20 +325,21 @@ def sync_endpoint(self, if child in stream_to_sync: parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) - - # Sync child stream, if it is selected or its nested child is selected. - self.get_child_records(client, - catalog, - child, - parent_id, - repo_path, - state, - start_date, - record.get(self.replication_keys), - stream_to_sync, - selected_stream_ids, - parent_record = record) - + if STREAMS[child]().id_keys and not all(parent_id): + pass + else: + # Sync child stream, if it is selected or its nested child is selected. + self.get_child_records(client, + catalog, + child, + parent_id, + repo_path, + state, + start_date, + record.get(self.replication_keys), + stream_to_sync, + selected_stream_ids, + parent_record = record) return state class IncrementalStream(Stream): @@ -273,7 +350,8 @@ def sync_endpoint(self, repo_path, start_date, selected_stream_ids, - stream_to_sync + stream_to_sync, + config, ): """ @@ -301,10 +379,10 @@ def sync_endpoint(self, stream = self.tap_stream_id ): records = response.json() + if self.result_path: records = records.get(self.result_path,[]) extraction_time = singer.utils.now() # Loop through all records for record in records: - record['_sdc_repository'] = repo_path self.add_fields_at_1st_level(record = record, parent_record = None) @@ -318,7 +396,6 @@ def sync_endpoint(self, # Keep only records whose bookmark is after the last_datetime if bookmark_dttm >= min_bookmark_value: - if self.tap_stream_id in selected_stream_ids and bookmark_dttm >= parent_bookmark_value: rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) @@ -329,28 +406,121 @@ def sync_endpoint(self, if child in stream_to_sync: parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) - - # Sync child stream, if it is selected or its nested child is selected. - self.get_child_records(client, - catalog, - child, - parent_id, - repo_path, - state, - start_date, - record.get(self.replication_keys), - stream_to_sync, - selected_stream_ids, - parent_record = record) + if STREAMS[child]().id_keys and not all(parent_id): + pass + else: + # Sync child stream, if it is selected or its nested child is selected. + self.get_child_records(client, + catalog, + child, + parent_id, + repo_path, + state, + start_date, + record.get(self.replication_keys), + stream_to_sync, + selected_stream_ids, + parent_record = record) else: LOGGER.warning("Skipping this record for %s stream with %s = %s as it is missing replication key %s.", self.tap_stream_id, self.key_properties, record[self.key_properties], self.replication_keys) - # Write bookmark for incremental stream. self.write_bookmarks(self.tap_stream_id, selected_stream_ids, max_bookmark_value, repo_path, state) return state + +class IncrementalDateStream(Stream): + def sync_endpoint(self, + client, + state, + catalog, + repo_path, + start_date, + selected_stream_ids, + stream_to_sync, + config, + ): + + """ + A common function sync incremental streams. Sync an incremental stream for which records are not + in descending order. For, incremental streams iterate all records, write only newly updated records and + write the latest bookmark value. + """ + + parent_bookmark_value = get_bookmark(state, repo_path, self.tap_stream_id, "since", start_date) + current_time = datetime.today().strftime(DATE_FORMAT) + min_bookmark_value = self.get_min_bookmark(self.tap_stream_id, selected_stream_ids, current_time, repo_path, start_date, state) + + max_bookmark_value = min_bookmark_value + LOGGER.info(f'Starting stream with bookmark {min_bookmark_value} and current time {current_time}') + for start_date, end_date in get_date_ranges(min_bookmark_value, current_time, config.get('date_range_window', DATE_RANGE_WINDOW)): + # build full url + full_url = self.build_url(client.base_url, repo_path, {'from': start_date, 'until': end_date}) + + stream_catalog = get_schema(catalog, self.tap_stream_id) + + with metrics.record_counter(self.tap_stream_id) as counter: + for response in client.authed_get_all_pages( + self.tap_stream_id, + full_url, + self.headers, + stream = self.tap_stream_id + ): + records = response.json() + if self.result_path: records = records.get(self.result_path,[]) + extraction_time = singer.utils.now() + # Loop through all records + for record in records: + record['_sdc_repository'] = repo_path + self.add_fields_at_1st_level(record = record, parent_record = None) + + with singer.Transformer() as transformer: + if record.get(self.replication_keys): + if record[self.replication_keys] >= max_bookmark_value: + # Update max_bookmark_value + max_bookmark_value = record[self.replication_keys] + + bookmark_dttm = record[self.replication_keys] + + # Keep only records whose bookmark is after the last_datetime + if bookmark_dttm >= min_bookmark_value: + if self.tap_stream_id in selected_stream_ids and bookmark_dttm >= parent_bookmark_value: + rec = transformer.transform(record, stream_catalog['schema'], metadata=metadata.to_map(stream_catalog['metadata'])) + + singer.write_record(self.tap_stream_id, rec, time_extracted=extraction_time) + counter.increment() + + for child in self.children: + if child in stream_to_sync: + + parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) + if STREAMS[child]().id_keys and not all(parent_id): + pass + else: + # Sync child stream, if it is selected or its nested child is selected. + self.get_child_records(client, + catalog, + child, + parent_id, + repo_path, + state, + start_date, + record.get(self.replication_keys), + stream_to_sync, + selected_stream_ids, + parent_record = record) + # Write bookmark for incremental stream. + self.write_bookmarks(self.tap_stream_id, selected_stream_ids, max_bookmark_value, repo_path, state) + else: + LOGGER.warning("Skipping this record for %s stream with %s = %s as it is missing replication key %s.", + self.tap_stream_id, self.key_properties, record[self.key_properties], self.replication_keys) + if max_bookmark_value < start_date: max_bookmark_value = start_date + # Write bookmark for incremental stream. + self.write_bookmarks(self.tap_stream_id, selected_stream_ids, max_bookmark_value, repo_path, state) + singer.write_state(state) + + return state class IncrementalOrderedStream(Stream): @@ -361,7 +531,8 @@ def sync_endpoint(self, repo_path, start_date, selected_stream_ids, - stream_to_sync + stream_to_sync, + config, ): """ A sync function for streams that have records in the descending order of replication key value. For such streams, @@ -387,6 +558,7 @@ def sync_endpoint(self, stream = self.tap_stream_id ): records = response.json() + if self.result_path: records = records.get(self.result_path,[]) extraction_time = singer.utils.now() for record in records: record['_sdc_repository'] = repo_path @@ -419,6 +591,7 @@ def sync_endpoint(self, for child in self.children: if child in stream_to_sync: parent_id = tuple(record.get(key) for key in STREAMS[child]().id_keys) + LOGGER.info(f"Syncing child {child}") # Sync child stream, if it is selected or its nested child is selected. self.get_child_records(client, @@ -432,6 +605,8 @@ def sync_endpoint(self, stream_to_sync, selected_stream_ids, parent_record = record) + # Write bookmark for incremental stream. + self.write_bookmarks(self.tap_stream_id, selected_stream_ids, bookmark_value, repo_path, state) else: LOGGER.warning("Skipping this record for %s stream with %s = %s as it is missing replication key %s.", self.tap_stream_id, self.key_properties, record[self.key_properties], self.replication_keys) @@ -452,6 +627,7 @@ class Reviews(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "submitted_at" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "pulls/{}/reviews" use_repository = True id_keys = ['number'] @@ -465,13 +641,14 @@ def add_fields_at_1st_level(self, record, parent_record = None): class ReviewComments(IncrementalOrderedStream): ''' - https://docs.github.com/en/rest/pulls/comments#get-a-review-comment-for-a-pull-request + https://docs.github.com/en/rest/pulls/comments#list-review-comments-on-a-pull-request ''' tap_stream_id = "review_comments" replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] - path = "pulls/{}/comments?sort=updated_at&direction=desc" + additional_filters = f"sort=updated_at&direction=asc&per_page{PER_PAGE_NUMBER}" + path = "pulls/{}/comments" use_repository = True id_keys = ['number'] parent = 'pull_requests' @@ -490,6 +667,7 @@ class PRCommits(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "pulls/{}/commits" use_repository = True id_keys = ['number'] @@ -513,8 +691,10 @@ class PullRequests(IncrementalOrderedStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] - path = "pulls?state=all&sort=updated&direction=desc" + additional_filters = f"state=all&sort=updated&direction=asc&per_page{PER_PAGE_NUMBER}" + path = "pulls" children = ['reviews', 'review_comments', 'pr_commits'] + has_children = True pk_child_fields = ["number"] class ProjectCards(IncrementalStream): @@ -525,6 +705,7 @@ class ProjectCards(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "projects/columns/{}/cards" tap_stream_id = "project_cards" parent = 'project_columns' @@ -538,6 +719,7 @@ class ProjectColumns(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "projects/{}/columns" children = ["project_cards"] parent = "projects" @@ -552,9 +734,11 @@ class Projects(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] - path = "projects?state=all" + additional_filters = f"state=all&per_page{PER_PAGE_NUMBER}" + path = "projects" tap_stream_id = "projects" children = ["project_columns"] + has_children = True child_objects = [ProjectColumns()] class TeamMemberships(FullTableStream): @@ -582,6 +766,7 @@ class TeamMembers(FullTableStream): tap_stream_id = "team_members" replication_method = "FULL_TABLE" key_properties = ["team_slug", "id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "orgs/{}/teams/{}/members" use_organization = True id_keys = ['slug'] @@ -604,12 +789,14 @@ class Teams(FullTableStream): tap_stream_id = "teams" replication_method = "FULL_TABLE" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "orgs/{}/teams" use_organization = True - children= ["team_members"] + children = ["team_members"] + has_children = True pk_child_fields = ['slug'] -class Commits(IncrementalStream): +class Commits(IncrementalDateStream): ''' https://docs.github.com/en/rest/commits/commits#list-commits-on-a-repository ''' @@ -618,13 +805,84 @@ class Commits(IncrementalStream): replication_keys = "updated_at" key_properties = ["sha"] path = "commits" - filter_param = True + children= ["commit_users_emails", "commit_files", "commit_parents", "commit_pull_request"] + has_children = True + since_filter_param_custom = "since={from}&until={until}&per_page=30" def add_fields_at_1st_level(self, record, parent_record = None): """ Add fields in the record explicitly at the 1st level of JSON. """ - record['updated_at'] = record['commit']['committer']['date'] + if not record: return + record['updated_at'] = self.get_field(record,['commit','committer','date']) + record['message'] = self.get_field(record,['commit','message']) + record['comit_name'] = self.get_field(record,['commit','committer','name']) + record['author_email'] = self.get_field(record,['commit','author','email']) + record['author_id'] = self.get_field(record,['author','id']) + record['author_name'] = self.get_field(record,['commit','author','name']) + record['author_login'] = self.get_field(record,['author','login']) + record['committer_email'] = self.get_field(record,['commit','committer','email']) + record['committer_name'] = self.get_field(record,['commit','committer','name']) + +class CommitFiles(IncrementalStream): + ''' + Child of "commits" - https://docs.github.com/en/rest/commits/commits#get-a-commit + ''' + tap_stream_id = "commit_files" + replication_method = "INCREMENTAL" + key_properties = ["commit_sha", "filename"] + id_keys = ["sha"] + use_repository = True + path = "commits/{}" + inherit_parent_fields = [("commit_sha","sha"), ("_sdc_repository","_sdc_repository")] + parent = 'commits' + result_path = "files" + +class CommitParents(FullTableStream): + ''' + Child of "commits" - https://docs.github.com/en/rest/commits/commits#list-commits-on-a-repository + ''' + tap_stream_id = "commit_parents" + replication_method = "INCREMENTAL" + key_properties = ["children_sha","sha"] + no_path = True + inherit_parent_fields = [("children_sha","sha"), ("_sdc_repository","_sdc_repository")] + inherit_array_parent_fields = "parents" + parent = 'commits' + +class CommitPullRequest(IncrementalStream): + ''' + https://docs.github.com/en/rest/commits/commits#list-pull-requests-associated-with-a-commit + ''' + tap_stream_id = "commit_pull_request" + replication_method = "INCREMENTAL" + key_properties = ["commit_sha","pull_request_id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "commits/{}/pulls" + use_repository = True + id_keys = ["sha"] + inherit_parent_fields = [("commit_sha","sha"), ("_sdc_repository","_sdc_repository")] + parent = 'commits' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['pull_request_id'] = self.get_field(record,['id']) + + +class UserEmail(IncrementalStream): + ''' + Created from fields of Commits table + ''' + tap_stream_id = "commit_users_emails" + replication_method = "INCREMENTAL" + key_properties = ["email"] + id_keys = ["author_email"] + no_path = True + inherit_parent_fields = [("email","author_email"),("id","author_id"),("name","author_name"),("username","author_login")] + parent = 'commits' class Comments(IncrementalOrderedStream): ''' @@ -634,8 +892,8 @@ class Comments(IncrementalOrderedStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] - filter_param = True - path = "issues/comments?sort=updated&direction=desc" + since_filter_param = f"&sort=updated&direction=asc&per_page={PER_PAGE_NUMBER}" + path = "issues/comments" class Issues(IncrementalOrderedStream): ''' @@ -645,8 +903,34 @@ class Issues(IncrementalOrderedStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] - filter_param = True - path = "issues?state=all&sort=updated&direction=desc" + since_filter_param = f"&state=all&sort=updated&direction=asc&per_page={PER_PAGE_NUMBER}" + path = "issues" + children = ["issue_assignees","issue_labels"] + has_children = True + +class IssueAssignees(IncrementalOrderedStream): + ''' + Child of "issues" - https://docs.github.com/en/rest/issues/issues#list-repository-issues + ''' + tap_stream_id = "issue_assignees" + replication_method = "INCREMENTAL" + key_properties = ["issue_id","id"] + no_path = True + inherit_parent_fields = [("issue_id","id"), ("_sdc_repository","_sdc_repository")] + inherit_array_parent_fields = "assignees" + parent = 'issues' + +class IssueLabels(IncrementalOrderedStream): + ''' + Child of "issues" - https://docs.github.com/en/rest/issues/issues#list-repository-issues + ''' + tap_stream_id = "issue_labels" + replication_method = "INCREMENTAL" + key_properties = ["issue_id","id"] + no_path = True + inherit_parent_fields = [("issue_id","id"), ("_sdc_repository","_sdc_repository")] + inherit_array_parent_fields = "labels" + parent = 'issues' class Assignees(FullTableStream): ''' @@ -655,6 +939,7 @@ class Assignees(FullTableStream): tap_stream_id = "assignees" replication_method = "FULL_TABLE" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "assignees" class Releases(FullTableStream): @@ -664,15 +949,57 @@ class Releases(FullTableStream): tap_stream_id = "releases" replication_method = "FULL_TABLE" key_properties = ["id"] - path = "releases?sort=created_at&direction=desc" + additional_filters = f"sort=created_at&direction=desc&per_page{PER_PAGE_NUMBER}" + path = "releases" + children = ["release_assets"] + has_children = True + +class ReleaseAssets(FullTableStream): + ''' + Child of "releases" - https://docs.github.com/en/rest/releases/releases#list-releases + ''' + tap_stream_id = "release_assets" + replication_method = "FULL_TABLE" + key_properties = ["id"] + use_repository = True + id_keys = ["id"] + no_path = True + inherit_parent_fields = [("release_id","id"), ("_sdc_repository","_sdc_repository")] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + inherit_array_parent_fields = "assets" + parent = 'releases' -class IssueLabels(FullTableStream): + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['uploader_id'] = self.get_field(record,['uploader','id']) + +class Branches(FullTableStream): + ''' + https://docs.github.com/en/rest/branches/branches#list-branches + ''' + tap_stream_id = "branches" + replication_method = "FULL_TABLE" + key_properties = ["name"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "branches" + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['commit_sha'] = self.get_field(record,['commit','sha']) +class Labels(FullTableStream): ''' https://docs.github.com/en/rest/issues/labels#list-labels-for-a-repository ''' - tap_stream_id = "issue_labels" + tap_stream_id = "labels" replication_method = "FULL_TABLE" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "labels" class IssueEvents(IncrementalOrderedStream): @@ -683,7 +1010,8 @@ class IssueEvents(IncrementalOrderedStream): replication_method = "INCREMENTAL" replication_keys = "created_at" key_properties = ["id"] - path = "issues/events?sort=created_at&direction=desc" + additional_filters = f"sort=created_at&direction=desc&per_page{PER_PAGE_NUMBER}" + path = "issues/events" class Events(IncrementalStream): ''' @@ -693,6 +1021,7 @@ class Events(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "created_at" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "events" class CommitComments(IncrementalStream): @@ -703,6 +1032,7 @@ class CommitComments(IncrementalStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "comments" class IssueMilestones(IncrementalOrderedStream): @@ -713,7 +1043,8 @@ class IssueMilestones(IncrementalOrderedStream): replication_method = "INCREMENTAL" replication_keys = "updated_at" key_properties = ["id"] - path = "milestones?direction=desc&sort=updated_at" + additional_filters = f"direction=desc&sort=updated_at&per_page{PER_PAGE_NUMBER}" + path = "milestones" class Collaborators(FullTableStream): ''' @@ -722,7 +1053,23 @@ class Collaborators(FullTableStream): tap_stream_id = "collaborators" replication_method = "FULL_TABLE" key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "collaborators" + children = ["collaborator_details"] + has_children = True + +class CollaboratorDetails(FullTableStream): + ''' + https://docs.github.com/en/rest/users/users#get-a-user + ''' + tap_stream_id = "collaborator_details" + replication_method = "FULL_TABLE" + key_properties = ["id"] + id_keys = ["login"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "users/{}" + parent = 'collaborators' + class StarGazers(FullTableStream): ''' @@ -731,6 +1078,7 @@ class StarGazers(FullTableStream): tap_stream_id = "stargazers" replication_method = "FULL_TABLE" key_properties = ["user_id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" path = "stargazers" headers = {'Accept': 'application/vnd.github.v3.star+json'} @@ -740,15 +1088,163 @@ def add_fields_at_1st_level(self, record, parent_record = None): """ record['user_id'] = record['user']['id'] +class Repositories(FullTableStream): + ''' + https://docs.github.com/en/rest/repos/repos#list-organization-repositories + ''' + tap_stream_id = "repositories" + replication_method = "FULL_TABLE" + key_properties = ["id"] + use_organization = True + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "orgs/{}/repos" + children = ["repository_topics"] + has_children = True + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['owner_id'] = self.get_field(record,['owner','id']) + +class RepositoryTeams(FullTableStream): + ''' + https://docs.github.com/en/rest/repos/repos#list-repository-teams + ''' + tap_stream_id = "repository_teams" + replication_method = "FULL_TABLE" + key_properties = ["_sdc_repository","id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "teams" + +class RepositoryTopics(FullTableStream): + ''' + Child of "repositories" - https://docs.github.com/en/rest/repos/repos#list-organization-repositories + ''' + tap_stream_id = "repository_topics" + replication_method = "FULL_TABLE" + key_properties = ["repository","topic"] + no_path = True + id_keys = ["full_name"] + inherit_parent_fields = [("repository","full_name")] + inherit_array_parent_fields = "topics" + custom_column_name = "topic" + parent = 'repositories' + +class Deployments(FullTableStream): + ''' + https://docs.github.com/en/rest/deployments/deployments#list-deployments + ''' + tap_stream_id = "deployments" + replication_method = "FULL_TABLE" + key_properties = ["id"] + additional_filters = f"sort=created_at&direction=desc&per_page{PER_PAGE_NUMBER}" + path = "deployments" + children = ["deployment_statuses"] + has_children = True + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['creator_id'] = self.get_field(record,['creator','id']) + +class DeploymentStatuses(FullTableStream): + ''' + https://docs.github.com/en/rest/deployments/statuses#list-deployment-statuses + ''' + tap_stream_id = "deployment_statuses" + replication_method = "FULL_TABLE" + use_repository = True + key_properties = ["deployment_id","id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "deployments/{}/statuses" + id_keys = ["id"] + inherit_parent_fields = [("deployment_id","id"),("_sdc_repository","_sdc_repository")] + parent = 'deployments' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['creator_id'] = self.get_field(record,['creator','id']) + +class Workflows(FullTableStream): + ''' + https://docs.github.com/en/rest/actions/workflows#list-repository-workflows + ''' + tap_stream_id = "workflows" + replication_method = "FULL_TABLE" + key_properties = ["id"] + additional_filters = f"per_page{PER_PAGE_NUMBER}" + path = "actions/workflows" + result_path = "workflows" + +class WorkflowRuns(IncrementalDateStream): + ''' + https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-repository + ''' + tap_stream_id = "workflow_runs" + replication_method = "INCREMENTAL" + replication_keys = "created_at" + use_repository = True + key_properties = ["id"] + path = "actions/runs" + result_path = "workflow_runs" + since_filter_param_custom = "per_page=100&created={from}..{until}" + children = ["workflow_run_pull_requests"] + has_children = True + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['actor_id'] = self.get_field(record,['actor','id']) + record['triggering_actor_id'] = self.get_field(record,['triggering_actor','id']) + record['repository_id'] = self.get_field(record,['repository','id']) + +class WorkflowPullRequests(IncrementalStream): + ''' + Child of "workflow_runs" - https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-repository + ''' + tap_stream_id = "workflow_run_pull_requests" + replication_method = "INCREMENTAL" + key_properties = ["workflow_run_id","id"] + no_path = True + inherit_parent_fields = [("workflow_run_id","id"), ("_sdc_repository","_sdc_repository")] + inherit_array_parent_fields = "pull_requests" + parent = 'workflow_runs' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + if not record: return + record['head_sha'] = self.get_field(record,['head','sha']) + record['base_sha'] = self.get_field(record,['base','sha']) # Dictionary of the stream classes STREAMS = { + "repositories": Repositories, + "repository_teams": RepositoryTeams, + "repository_topics": RepositoryTopics, "commits": Commits, + "commit_files": CommitFiles, + "commit_parents": CommitParents, + "commit_pull_request": CommitPullRequest, "comments": Comments, "issues": Issues, + "issue_assignees": IssueAssignees, + "issue_labels": IssueLabels, "assignees": Assignees, "releases": Releases, - "issue_labels": IssueLabels, + "release_assets": ReleaseAssets, + "branches": Branches, + "labels": Labels, "issue_events": IssueEvents, "events": Events, "commit_comments": CommitComments, @@ -764,5 +1260,12 @@ def add_fields_at_1st_level(self, record, parent_record = None): "team_members": TeamMembers, "team_memberships": TeamMemberships, "collaborators": Collaborators, - "stargazers": StarGazers + "collaborator_details": CollaboratorDetails, + "stargazers": StarGazers, + "commit_users_emails": UserEmail, + "deployments": Deployments, + "deployment_statuses": DeploymentStatuses, + "workflows": Workflows, + "workflow_runs": WorkflowRuns, + "workflow_run_pull_requests": WorkflowPullRequests } diff --git a/tap_github/sync.py b/tap_github/sync.py index a83610ad..e9dde313 100644 --- a/tap_github/sync.py +++ b/tap_github/sync.py @@ -4,7 +4,8 @@ from tap_github.streams import STREAMS LOGGER = singer.get_logger() -STREAM_TO_SYNC_FOR_ORGS = ['teams', 'team_members', 'team_memberships'] +STREAM_TO_SYNC_FOR_ORGS = ['teams', 'team_members', 'team_memberships', 'repositories', 'repository_topics'] +schemas_sent = [] def get_selected_streams(catalog): ''' @@ -47,6 +48,7 @@ def get_ordered_stream_list(currently_syncing, streams_to_sync): """ Get an ordered list of remaining streams to sync other streams followed by synced streams. """ + LOGGER.info(f'Currently syncing stream: {currently_syncing}') stream_list = list(sorted(streams_to_sync)) if currently_syncing in stream_list: index = stream_list.index(currently_syncing) @@ -58,6 +60,7 @@ def get_ordered_repos(state, repositories): Get an ordered list of remaining repos to sync followed by synced repos. """ syncing_repo = state.get("currently_syncing_repo") + LOGGER.info(f'Currently syncing repo from state: {syncing_repo}') if syncing_repo in repositories: index = repositories.index(syncing_repo) repositories = repositories[index:] + repositories[:index] @@ -163,7 +166,9 @@ def write_schemas(stream_id, catalog, selected_streams): if stream_id in selected_streams: # Get catalog object for particular stream. stream = [cat for cat in catalog['streams'] if cat['tap_stream_id'] == stream_id ][0] - singer.write_schema(stream_id, stream['schema'], stream['key_properties']) + if stream_id not in schemas_sent: + singer.write_schema(stream_id, stream['schema'], stream['key_properties']) + schemas_sent.append(stream_id) for child in stream_obj.children: write_schemas(child, catalog, selected_streams) @@ -201,7 +206,7 @@ def sync(client, config, state, catalog): for repo in get_ordered_repos(state, repositories): update_currently_syncing_repo(state, repo) LOGGER.info("Starting sync of repository: %s", repo) - do_sync(catalog, streams_to_sync_for_repos, selected_stream_ids, client, start_date, state, repo) + do_sync(catalog, streams_to_sync_for_repos, selected_stream_ids, client, start_date, state, repo, config) if client.not_accessible_repos: # Give warning messages for a repo that is not accessible by a stream or is invalid. @@ -210,14 +215,14 @@ def sync(client, config, state, catalog): client.not_accessible_repos = set() update_currently_syncing_repo(state, None) -def do_sync(catalog, streams_to_sync, selected_stream_ids, client, start_date, state, repo): +def do_sync(catalog, streams_to_sync, selected_stream_ids, client, start_date, state, repo, config= {}): """ Sync all other streams except teams, team_members and team_memberships for each repo. """ currently_syncing = singer.get_currently_syncing(state) for stream_id in get_ordered_stream_list(currently_syncing, streams_to_sync): stream_obj = STREAMS[stream_id]() - + LOGGER.info(f'Starting stream {stream_id} for {repo}.') # If it is a "sub_stream", it will be synced as part of the parent stream if stream_id in streams_to_sync and not stream_obj.parent: write_schemas(stream_id, catalog, selected_stream_ids) @@ -229,7 +234,8 @@ def do_sync(catalog, streams_to_sync, selected_stream_ids, client, start_date, s repo_path = repo, start_date = start_date, selected_stream_ids = selected_stream_ids, - stream_to_sync = streams_to_sync + stream_to_sync = streams_to_sync, + config = config, ) singer.write_state(state)