Skip to content
This repository was archived by the owner on Mar 23, 2026. It is now read-only.

Commit 7c0f66a

Browse files
authored
upload raw test data for parity analytics to tinybird (CircleCI Job) (#7324)
1 parent 745e233 commit 7c0f66a

5 files changed

Lines changed: 583 additions & 16 deletions

File tree

.circleci/config.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,15 @@ jobs:
351351
command: |
352352
source .venv/bin/activate
353353
python -m scripts.metric_aggregator . amd64
354+
- run:
355+
name: Upload test metrics and implemented coverage data to tinybird
356+
command: |
357+
source .venv/bin/activate
358+
METRIC_REPORT_FILE=$(find parity_metrics -type f -iname "metric-report-raw-data-all-*.csv")
359+
METRIC_REPORT_PATH=$METRIC_REPORT_FILE \
360+
COMMUNITY_IMPL_COV_PATH=scripts/implementation_coverage_full.csv \
361+
PRO_IMPL_COV_PATH=scripts/pro/implementation_coverage_full.csv \
362+
python -m scripts.tinybird.upload_raw_test_metrics_and_coverage
354363
- store_artifacts:
355364
path: parity_metrics/
356365
- store_artifacts:

localstack/aws/handlers/metric_handler.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,30 @@ def __iter__(self):
109109
]
110110
)
111111

112+
def __eq__(self, other):
113+
# ignore header in comparison, because timestamp will be different
114+
if self.service != other.service:
115+
return False
116+
if self.operation != other.operation:
117+
return False
118+
if self.parameters != other.parameters:
119+
return False
120+
if self.response_code != other.response_code:
121+
return False
122+
if self.response_data != other.response_data:
123+
return False
124+
if self.exception != other.exception:
125+
return False
126+
if self.origin != other.origin:
127+
return False
128+
if self.xfail != other.xfail:
129+
return False
130+
if self.aws_validated != other.aws_validated:
131+
return False
132+
if self.node_id != other.node_id:
133+
return False
134+
return True
135+
112136

113137
class MetricHandler:
114138
metric_data: List[Metric] = []
@@ -159,21 +183,21 @@ def update_metric_collection(
159183
parameters = ",".join(item.parameters_after_parse or [])
160184

161185
response_data = response.data.decode("utf-8") if response.status_code >= 300 else ""
162-
163-
MetricHandler.metric_data.append(
164-
Metric(
165-
service=context.service_operation.service,
166-
operation=context.service_operation.operation,
167-
headers=context.request.headers,
168-
parameters=parameters,
169-
response_code=response.status_code,
170-
response_data=response_data,
171-
exception=context.service_exception.__class__.__name__
172-
if context.service_exception
173-
else "",
174-
origin="internal" if is_internal else "external",
175-
)
186+
metric = Metric(
187+
service=context.service_operation.service,
188+
operation=context.service_operation.operation,
189+
headers=context.request.headers,
190+
parameters=parameters,
191+
response_code=response.status_code,
192+
response_data=response_data,
193+
exception=context.service_exception.__class__.__name__
194+
if context.service_exception
195+
else "",
196+
origin="internal" if is_internal else "external",
176197
)
198+
# refrain from adding duplicates
199+
if metric not in MetricHandler.metric_data:
200+
MetricHandler.metric_data.append(metric)
177201

178202
# cleanup
179203
del self.metrics_handler_items[context]

scripts/capture_notimplemented_responses.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def run_script(services: list[str], path: None):
191191
)
192192
aggregated_w = csv.DictWriter(
193193
aggregatefile,
194-
fieldnames=["service", "operation", "implemented_count", "full_count", "percentage"],
194+
fieldnames=["service", "implemented_count", "full_count", "percentage"],
195195
)
196196

197197
full_w.writeheader()
@@ -232,7 +232,6 @@ def run_script(services: list[str], path: None):
232232
aggregated_w.writerow(
233233
{
234234
"service": response["service"],
235-
"operation": response["operation"],
236235
"implemented_count": implemented_count,
237236
"full_count": all_count,
238237
"percentage": f"{implemented_percentage * 100:.1f}",
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
"""Helper script to retrieve historical data and load into tinybird parity dashboard
2+
3+
The script is intended to be run locally. It was executed once, to retrieve the data from the past successful master builds
4+
in order to get more data into the parity dashboard for a hackathon project.
5+
6+
"""
7+
8+
import datetime
9+
import http.client
10+
import json
11+
import os
12+
import urllib
13+
14+
from scripts.tinybird.upload_raw_test_metrics_and_coverage import (
15+
send_implemented_coverage,
16+
send_metric_report,
17+
)
18+
19+
PROJECT_SLUG = "github/localstack/localstack"
20+
MASTER_BRANCH = "master"
21+
22+
23+
def send_request_to_connection(conn, url):
24+
print(f"sending request to url: {url}")
25+
headers = {"accept": "application/json"} # , "Circle-Token": api_token}
26+
conn.request(
27+
"GET",
28+
url=url,
29+
headers=headers,
30+
)
31+
32+
res = conn.getresponse()
33+
if res.getcode() == 200:
34+
data = res.read()
35+
return data
36+
else:
37+
print(f"connection failed: {res.getcode}")
38+
return None
39+
40+
41+
def extract_artifacts_url_for_path(artifacts, path):
42+
data_url = [item["url"] for item in artifacts["items"] if item["path"].startswith(path)]
43+
if len(data_url) != 1:
44+
print(f"unexpected artifacts count for {path}, unexpected content: {data_url}")
45+
return None
46+
return data_url[0]
47+
48+
49+
def collect_workflows_past_30_days():
50+
"""
51+
Retrieves the workflows run from the past 30 days from circecli on 'master' branch,
52+
and retrieves the artifacts for each successful workflow run, that are collected in the 'report' job.
53+
The artifacts for coverage implementation, and raw-data collection are downloaded, and then processed and sent to
54+
tinybird backend.
55+
"""
56+
try:
57+
conn = http.client.HTTPSConnection("circleci.com")
58+
# api_token = os.getenv("API_TOKEN")
59+
60+
end = datetime.datetime.utcnow()
61+
start = end - datetime.timedelta(days=30)
62+
63+
get_workflows_request = f"/api/v2/insights/{PROJECT_SLUG}/workflows/main?&branch={MASTER_BRANCH}&start-date={start.isoformat()}&end-date={end.isoformat()}"
64+
65+
data = send_request_to_connection(conn, get_workflows_request)
66+
67+
if not data:
68+
print(f"could not resolve {get_workflows_request}")
69+
return
70+
71+
# this is just for tracking the current status - we already uploaded data for all of these workflows-ids:
72+
already_sent = [
73+
"0b4e29e5-b6c2-42b6-8f2d-9bbd3d3bc8aa",
74+
"3780cc96-10a0-4c41-9b5a-98d16b83dd94",
75+
"7ec971e9-4ee2-4269-857e-f3641961ecde",
76+
"3e02b8c5-6c9b-40d0-84df-c4e2d0a7797d",
77+
"015202d7-5071-4773-b223-854ccffe969f",
78+
"c8dd0d5d-b00c-4507-9129-669c3cc9f55a",
79+
"a87bf4f8-3adb-4d0a-b11c-32c0a3318ee9",
80+
"0b1a2ddb-ed17-426c-ba0c-23c4771ecb22",
81+
"97d01dac-15a1-4791-8e90-ce1fed09538d",
82+
"83fb8b2f-dab2-465f-be52-83342820f448",
83+
"2ae81ec5-2d18-48bf-b4ad-6bed8309f281",
84+
"63aa8ee8-4242-43fa-8408-4720c8fdd04b",
85+
"32c09e00-0733-443e-9b3a-9ca7e2ae32eb",
86+
"e244742d-c90b-4301-9d0f-1c6a06e3eec9",
87+
"0821f4ca-640d-4cce-9af8-a593f261aa75",
88+
"b181f475-192c-49c5-9f80-f33201a2d11b",
89+
"90b57b93-4a01-4612-bd92-fe9c4566da64",
90+
"dd8e4e20-2f85-41d3-b664-39304feec01b",
91+
"6122ea91-f0e4-4ea4-aca6-b67feec9d81b",
92+
"c035931f-90b0-4c48-a82c-0b7e343ebf49",
93+
"d8b03fae-b7e2-4871-a480-84edd531bfb9",
94+
"f499c3c1-ac46-403a-8a73-2daaebcf063d",
95+
"a310a406-b37a-4556-89e3-a6475bbb114f",
96+
"bab3f52c-0ed2-4390-b4b4-d34b5cb6e1ad",
97+
"c2245fe6-258f-4248-a296-224fe3f213d1",
98+
"67e8e834-3ab6-497e-b2d3-1e6df4575380",
99+
"3b367c58-f208-4e98-aa92-816cd649094b",
100+
"cc63b1b1-61ff-44f9-b3bf-cc24e23cf54b",
101+
"4eff4f42-770e-414a-ad5d-dde8e49b244f",
102+
"8092d5a8-c9a8-4812-ac22-d620a5e04003",
103+
"d682debe-17d7-4e31-9df1-e2f70758302f",
104+
"b8a3e0ea-25ca-47df-afec-48ac3a0de811",
105+
"450f335f-cd9c-45f3-a69f-1db5f9f16082",
106+
"4467264f-8a57-4a05-ad0d-8d224221ec69",
107+
"9e91a4d6-147b-4a64-bcb6-2d311164c3d8",
108+
"4a0c989a-31e7-4d9d-afdc-dc31c697fd11",
109+
"5b1a604c-12a9-4b9c-ba1e-abd8be05e135",
110+
"a9291b6e-eefe-466f-8802-64083abbfb0f",
111+
"0210fe7b-55a9-4bb0-a496-fbbff2831dd5",
112+
"1d5056aa-4d8c-4435-8a90-b3b48c8849e6",
113+
"1b339b55-fd27-4527-aff3-4a31109297e4",
114+
"f9c79715-ff09-4a1a-acea-ac4acd0eedc4",
115+
"93cddbf6-b48d-4086-b089-869ff2b7af0f",
116+
"f96e2531-cde6-490f-be26-076b3b3deaa4",
117+
"2dec1ba3-c306-4868-95bf-668689c10f4f",
118+
"ce8bedd9-618c-4475-b76e-b429ac49f84b",
119+
"7f2ae078-41cd-4f64-88ec-ef0f45185020",
120+
"271ba76a-3c7d-4b6e-abbd-294050608ebf",
121+
"afa647e9-ad38-467f-9ebc-fa7283586c19",
122+
"2cef06d8-98dc-415e-a8af-758689711c68",
123+
"8c859042-b37a-4447-9d3e-07d1ae160765",
124+
"b5ba1234-1983-4805-a9be-c4ca9c52b799",
125+
"b6614e63-4538-4583-8f9d-0c220db602a8",
126+
"71453fae-a689-4e28-995f-bd6e2c7cadaf",
127+
"53e43bae-3c70-4df5-8490-fe9208fbd952",
128+
"d1776b0e-7ddc-42e0-bd2d-7561ae72ae8b",
129+
"ad88f81e-6526-44f4-9208-ea64efdbde87",
130+
"503226e6-6671-4248-9fba-7b31f4684c0c",
131+
"c8e688aa-b63d-4e11-a14e-4ea1a2ad5257",
132+
"48002330-8ecb-41c5-9acc-95ae260a7a15",
133+
"e5550424-bec4-48a1-9354-0ad1f14510c4",
134+
"304dc6fc-9807-46b6-9665-fe8d6cc2d9b7",
135+
"24fe00ef-6c48-4260-9bca-125e2b16e7b2",
136+
"12e6470d-f923-4358-9fbb-185ff981903c",
137+
"32b53e7f-f0d3-446b-9b56-9cb4cdd5134d",
138+
"fe786b67-dc09-41e0-aba5-33e7aa8dcdf7",
139+
"a7c06a4b-2954-4660-8072-3c10c7d2823b",
140+
"c1dedfce-2619-484b-8a10-bc9b2bda39ff",
141+
"618a7511-e82b-4e7f-9d4a-4b4a4247f6e0",
142+
"00bec0f4-7844-4ad9-8d01-e3833aae9697",
143+
"8cb2fb8f-b840-4f5b-b151-744fb425298c",
144+
"8c2a8d3d-f05a-4c27-9df6-bc7f4f6106b8",
145+
"9dfc79d6-952e-4ae4-9dd8-493ac9a30065",
146+
"edf9a307-0e80-4a80-97f4-f53c78910554",
147+
"3c9c12e5-0fe7-4b1a-b224-7570808f8e19",
148+
]
149+
# TODO check "next_page_token"
150+
# -> wasn't required for the initial run, as on master everything was on one page for the past 30 days
151+
workflows = json.loads(data.decode("utf-8"))
152+
count = 0
153+
for item in workflows.get("items"):
154+
if item["status"] == "success":
155+
workflow_id = item["id"]
156+
if workflow_id in already_sent:
157+
continue
158+
print(f"checking workflow_id {workflow_id}")
159+
date_created_at = item["created_at"]
160+
converted_date = datetime.datetime.strptime(
161+
date_created_at, "%Y-%m-%dT%H:%M:%S.%fZ"
162+
)
163+
# create the same time format we use when uploading data in the cirlce ci
164+
timestamp = converted_date.strftime("%Y-%m-%d %H:%M:%S")
165+
166+
# get the details for the job (we need the job_number of the report step)
167+
job_request = f"/api/v2/workflow/{workflow_id}/job"
168+
job_data = send_request_to_connection(conn, job_request)
169+
if not job_data:
170+
print("could not retrieve job_data")
171+
return
172+
jobs = json.loads(job_data.decode("utf-8"))
173+
report_job = [item for item in jobs["items"] if item["name"] == "report"]
174+
if len(report_job) != 1:
175+
print(f"report job should be exactly 1, unexpected content: {report_job}")
176+
return
177+
job_number = report_job[0]["job_number"]
178+
179+
# request artificats for the report job
180+
artifacts_request = (
181+
f"/api/v2/project/github/localstack/localstack/{job_number}/artifacts"
182+
)
183+
artifacts_data = send_request_to_connection(conn, artifacts_request)
184+
if not artifacts_data:
185+
print("could not retrieve artifacts data")
186+
return
187+
188+
artifacts = json.loads(artifacts_data.decode("utf-8"))
189+
190+
# extract the required urls for metric-data-raw, and coverage data for community/pro
191+
metric_data_url = extract_artifacts_url_for_path(
192+
artifacts=artifacts, path="parity_metrics/metric-report-raw-data-all"
193+
)
194+
community_cov_url = extract_artifacts_url_for_path(
195+
artifacts=artifacts, path="community/implementation_coverage_full.csv"
196+
)
197+
pro_cov_url = extract_artifacts_url_for_path(
198+
artifacts=artifacts, path="pro/implementation_coverage_full.csv"
199+
)
200+
201+
if not metric_data_url or not community_cov_url or not pro_cov_url:
202+
print("At least one artifact url could not be found. existing..")
203+
return
204+
205+
# download files locally
206+
metric_report_file_path = "./metric_report_raw.csv"
207+
print(f"trying to download {metric_data_url}")
208+
urllib.request.urlretrieve(metric_data_url, metric_report_file_path)
209+
210+
community_coverage_file_path = "./community_coverage.csv"
211+
print(f"trying to download {community_cov_url}")
212+
urllib.request.urlretrieve(community_cov_url, community_coverage_file_path)
213+
214+
pro_coverage_file_path = "./pro_coverage.csv"
215+
print(f"trying to download {pro_cov_url}")
216+
urllib.request.urlretrieve(pro_cov_url, pro_coverage_file_path)
217+
218+
# update required ENVs with the data from the current workflow/job
219+
os.environ["CIRCLE_BRANCH"] = MASTER_BRANCH
220+
os.environ["CIRCLE_PULL_REQUESTS"] = ""
221+
os.environ["CIRCLE_BUILD_NUM"] = str(job_number)
222+
os.environ["CIRCLE_BUILD_URL"] = ""
223+
os.environ["CIRCLE_WORKFLOW_ID"] = str(workflow_id)
224+
225+
# trigger the tinybird_upload
226+
send_metric_report(metric_report_file_path, timestamp)
227+
send_implemented_coverage(
228+
community_coverage_file_path, timestamp=timestamp, type="community"
229+
)
230+
send_implemented_coverage(pro_coverage_file_path, timestamp=timestamp, type="pro")
231+
already_sent.append(workflow_id)
232+
count = count + 1
233+
# print(already_sent)
234+
235+
finally:
236+
print(already_sent)
237+
if timestamp:
238+
print(f"last timestamp: {timestamp}")
239+
if count:
240+
print(f"sent {count} workflow data to tinybird")
241+
if conn:
242+
conn.close()
243+
244+
245+
def main():
246+
collect_workflows_past_30_days()
247+
248+
249+
if __name__ == "__main__":
250+
main()

0 commit comments

Comments
 (0)