22import os
33import glob
44import ast
5- from datasets import load_dataset
5+ import subprocess
66import json
77import shutil
88import argparse
9+ import pypdf
10+ import tqdm
11+
12+ from datasets import load_dataset
913from transformers import AutoTokenizer
14+
1015from commit0 .harness .constants import SPLIT
1116from commit0 .harness .utils import clone_repo
1217from commit0 .cli import write_commit0_dot_file
13- import pypdf
1418
1519import logging
1620
@@ -162,129 +166,133 @@ def get_blank_repo_metrics(
162166 return blank_repo_metrics
163167
164168
165- def render_mds (subfolder = "docs" ):
166- all_submissions = {}
169+ # def render_leaderboard(split):
170+
171+ # for branch_name, branch_info in all_submissions.items():
172+ # if branch_info['split'] != split: continue
173+ # repos_resolved = 0
174+ # cum_passed = 0
175+ # total_duration = 0.0
176+ # for repo_name, repo_test_info in branch_info.items():
177+ # for testname, test_info in repo_test_info.items():
178+ # if "failed_to_run" not in test_info:
179+ # total_duration += test_info["duration"]
180+ # if ('failed' not in test_info['summary']) or (test_info['summary']['failed'] == 0):
181+ # repos_resolved += 1
182+ # # f"{test_info['summary']['collected']} ; duration: {test_info['duration']:.2f}s"
183+ # cum_passed += test_info["summary"]["passed"]
184+ # break # assume we ran all tests. will add functionality for checking diff tests later, as we need it.
185+ # analysis_link = f"[Analysis]({f'analysis_{branch_name}'})"
186+ # leaderboard += f"\n||[{branch_info['display_name']}]({branch_info['project_page']})|" \
187+ # f"{repos_resolved}|" \
188+ # f"{cum_passed}|" \
189+ # f"{total_duration:.2f}" \
190+ # f"{branch_info['submission_date']}|" \
191+ # f"{analysis_link}||"
192+ # return leaderboard
193+
167194
168- method_repo_pytests = {}
169- for branch_name in glob .glob (os .path .join (analysis_files_path , "*" )):
195+ def render_mds (subfolder = "docs" ):
196+ leaderboard = {}
197+ leaderboard ["lite" ] = f"## Leaderboard (Lite)\n \n "
198+ leaderboard ["all" ] = f"## Leaderboard (All)\n \n "
199+
200+ for split in tqdm .tqdm (["lite" , "all" ]):
201+ total_num_repos = 0
202+ total_num_tests = 0
203+ for repo_name in SPLIT [split ]:
204+ total_num_repos += 1
205+ all_tests = subprocess .run (["commit0" , "get-tests" , repo_name ], capture_output = True , text = True )
206+ total_num_tests += len (all_tests .stdout .strip ().splitlines ())
207+ leaderboard [split ] += f"""
208+ | | Name | Repos Resolved /{ total_num_repos } | Net Pass Rate /{ total_num_tests } | Test Duration (s) | Date | Analysis | |
209+ |--|------|-----------|------|----------|------|---|--------| |"""
210+
211+ for branch_name in tqdm .tqdm (glob .glob (os .path .join (analysis_files_path , "*" ))):
170212 branch_name = os .path .basename (branch_name )
171213 if branch_name in {"blank" , "repos" , "submission_repos" }:
172214 continue
173- all_submissions [branch_name ] = {}
215+ submission_page = """# Submission Name: REPLACE_NAME_HERE (REPLACE_SPLIT_HERE)
216+
217+ | | Repository | Resolved | Pass Rate | Test Duration (s) | Analysis | |
218+ |-|------------|---------|-----| -----|-----||"""
219+ repos_resolved = 0
220+ cum_passed = 0
221+ total_duration = 0.
174222 for repo_file in glob .glob (
175223 os .path .join (analysis_files_path , branch_name , "*.json" )
176224 ):
177-
178225 repo_metrics_output_file = os .path .join (
179226 analysis_files_path , branch_name , repo_file
180227 )
181228 repo_metrics = json .load (open (repo_metrics_output_file ))
182229 repo_name = os .path .basename (repo_file [: - len (".json" )])
183-
184- all_submissions [branch_name ][repo_name ] = {}
185-
186- method_repo_pytests [
187- f"{ branch_name } _{ repo_name } "
188- ] = f"# Submission Name: { branch_name } \n # Repository: { repo_name } "
189- if "pytest_results" in repo_metrics :
190- repo_metrics = repo_metrics ["pytest_results" ]
230+ submission_repo_page = f"# Submission Name: { branch_name } \n # Repository: { repo_name } "
231+ if "split" not in locals ():
232+ split = repo_metrics ["submission_info" ]["split" ]
233+ project_page_link = repo_metrics ["submission_info" ]["project_page" ]
234+ display_name = repo_metrics ["submission_info" ]["display_name" ]
235+ submission_date = repo_metrics ["submission_info" ]['submission_date' ]
236+ submission_page = submission_page .replace ("REPLACE_NAME_HERE" , display_name ).replace ("REPLACE_SPLIT_HERE" , split )
191237 for pytest_group , pytest_info in repo_metrics .items ():
238+ if pytest_group == "submission_info" : continue
192239 pytest_group = os .path .basename (pytest_group .strip ("/" ))
193240 patch_diff = (
194241 f"""\n \n ### Patch diff\n ```diff\n { pytest_info ['patch_diff' ]} ```"""
195242 )
196243 if "failed_to_run" in pytest_info :
197- all_submissions [branch_name ][repo_name ][pytest_group ] = {
198- "failed_to_run" : pytest_info ["failed_to_run" ]
199- }
200- method_repo_pytests [
201- f"{ branch_name } _{ repo_name } "
202- ] += f"""\n ## Failed to run pytests\n ```\n { pytest_info ['failed_to_run' ]} \n ```"""
244+ submission_repo_page += f"""\n ## Failed to run pytests\n ```\n { pytest_info ['failed_to_run' ]} \n ```"""
203245 else :
204- all_submissions [branch_name ][repo_name ][pytest_group ] = {
205- "summary" : pytest_info ["summary" ],
206- "duration" : pytest_info ["duration" ],
207- }
208- method_repo_pytests [
209- f"{ branch_name } _{ repo_name } "
210- ] += f"""\n ## Pytest Summary: { pytest_group }
246+ submission_repo_page += f"""\n ## Pytest Summary: { pytest_group }
211247| status | count |
212248|:---------|:-----:|
213249"""
250+ total_duration += pytest_info ["duration" ]
251+ cum_passed += pytest_info ["summary" ]["passed" ]
214252 for category , count in pytest_info ["summary" ].items ():
215253 if category not in {"duration" }:
216- method_repo_pytests [
217- f"{ branch_name } _{ repo_name } "
218- ] += f"""| { category } | { count } |\n """
254+ submission_repo_page += f"""| { category } | { count } |\n """
219255 else :
220- method_repo_pytests [
221- f"{ branch_name } _{ repo_name } "
222- ] += f"""| { category } | { float (count ):.2f} s |\n """
256+ submission_repo_page += f"""| { category } | { float (count ):.2f} s |\n """
223257
224- method_repo_pytests [
225- f"{ branch_name } _{ repo_name } "
226- ] += f"\n ## Failed pytest outputs: { pytest_group } \n \n "
258+ submission_repo_page += f"\n ## Failed pytest outputs: { pytest_group } \n \n "
227259 for testname , failure in pytest_info ["failures" ].items ():
228260 shortened_testname = os .path .basename (testname )
229- method_repo_pytests [ f" { branch_name } _ { repo_name } " ] += (
261+ submission_repo_page += (
230262 f"### { shortened_testname } \n \n <details><summary> <pre>{ shortened_testname } "
231263 f"</pre></summary><pre>\n { failure ['failure_string' ]} \n </pre>\n </details>\n "
232264 )
233-
234265 back_button = f"[back to { branch_name } summary]({ f'analysis_{ branch_name } ' } )\n \n "
235266 with open (
236267 os .path .join (subfolder , f"analysis_{ branch_name } _{ repo_name } .md" ), "w"
237268 ) as wf :
238269 wf .write (
239270 back_button
240- + method_repo_pytests [ f" { branch_name } _ { repo_name } " ]
271+ + submission_repo_page
241272 + patch_diff
242273 )
274+ resolved = ('summary' in pytest_info ) and (('failed' not in pytest_info ['summary' ]) or (pytest_info ['summary' ]['failed' ] == 0 ))
275+ if resolved : repos_resolved += 1
276+ pytest_details = "Pytest failed" if "failed_to_run" in pytest_info else f"{ pytest_info ['summary' ]['passed' ]} / { pytest_info ['summary' ]['collected' ]} "
277+ duration = "Failed."
278+ if 'duration' in pytest_info : duration = f"{ pytest_info ['duration' ]:.2f} "
279+ submission_page += f"""
280+ | | { repo_name } | { 'Yes' if resolved else 'No' } | { pytest_details } | { duration } | { f'analysis_{ branch_name } _{ repo_name } ' } | |"""
281+ analysis_link = f"[Analysis]({ f'analysis_{ branch_name } ' } )"
282+ leaderboard [split ] += f"\n ||[{ display_name } ]({ project_page_link } )|" \
283+ f"{ repos_resolved } |" \
284+ f"{ cum_passed } |" \
285+ f"{ total_duration :.2f} " \
286+ f"{ submission_date } |" \
287+ f"{ analysis_link } ||"
288+
243289
244- # Render general page. Has buttons to all methods
245- leaderboard = """
246- | | Name | Summary | |
247- |--|--------|----------|--|"""
248- # Render method page. Per method, buttons to all repos.
249- method_to_repos = {}
250- # Render method & repo page. Has "back" button.
251- for branch_name , branch_info in all_submissions .items ():
252- cum_pytests = {"passed" : 0 }
253- method_to_repos [branch_name ] = """
254- | | Repository | Summary | |
255- |-|------------|---------|-|"""
256- total_duration = 0.0
257- for repo_name , repo_test_info in branch_info .items ():
258- for testname , test_info in repo_test_info .items ():
259- if "failed_to_run" in test_info :
260- summary_pytests_string = "failure"
261- else :
262- total_duration += test_info ["duration" ]
263- summary_pytests_string = (
264- f"`{ testname } `: { test_info ['summary' ]['passed' ]} / "
265- f"{ test_info ['summary' ]['collected' ]} ; duration: { test_info ['duration' ]:.2f} s"
266- )
267- for category , count in test_info ["summary" ].items ():
268- if category not in cum_pytests :
269- cum_pytests [category ] = 0
270- if isinstance (count , int ):
271- cum_pytests [category ] += int (count )
272- elif isinstance (count , float ):
273- cum_pytests [category ] += float (count )
274- method_to_repos [branch_name ] += (
275- f"\n ||[{ repo_name } ]({ f'analysis_{ branch_name } _{ repo_name } ' } )|"
276- f"{ summary_pytests_string } ||"
277- )
278- break # assume we ran all tests. will add functionality for checking diff tests later, as we need it.
279- summary_pytests_string = (
280- f"{ cum_pytests ['passed' ]} / { cum_pytests ['collected' ]} ; duration: { total_duration :.2f} s"
281- )
282- leaderboard += f"\n ||[{ branch_name } ]({ f'analysis_{ branch_name } ' } )|{ summary_pytests_string } ||"
283290 back_button = f"[back to all submissions]({ f'analysis' } )\n \n "
284291 with open (os .path .join (subfolder , f"analysis_{ branch_name } .md" ), "w" ) as wf :
285- wf .write (back_button + "\n " + method_to_repos [branch_name ])
292+ wf .write (back_button + "\n " + submission_page )
293+
286294 with open (os .path .join (subfolder , "analysis.md" ), "w" ) as wf :
287- wf .write (leaderboard )
295+ wf .write (leaderboard [ "lite" ] + leaderboard [ "all" ] )
288296
289297
290298def get_args ():
@@ -378,6 +386,7 @@ def main(args):
378386
379387 path_to_logs = f"{ os .getcwd ()} /logs/pytest/{ repo_name } /{ branch_name } "
380388 pytest_results = get_pytest_info (path_to_logs , repo_name , branch_name )
389+ pytest_results ["submission_info" ] = example
381390 json .dump (pytest_results , open (repo_metrics_output_file , "w" ), indent = 4 )
382391
383392 if args .analyze_submissions :
@@ -394,7 +403,7 @@ def main(args):
394403 print (f"{ e } : when removing { subfolder } " )
395404
396405 for submission in submission_dataset :
397- branch_name = submission ["name " ]
406+ branch_name = submission ["branch " ]
398407 os .makedirs (os .path .join (analysis_files_path , branch_name ), exist_ok = True )
399408 if not args .keep_previous_eval :
400409 for repo_log_path in glob .glob (f"{ os .getcwd ()} /logs/pytest/*" ):
0 commit comments