-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathreporting.py
More file actions
948 lines (808 loc) · 39.5 KB
/
reporting.py
File metadata and controls
948 lines (808 loc) · 39.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
"""
License GPLv3 or higher.
(C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
Reporting functions for codeaudit
"""
import re
import os
from pathlib import Path
import sys
from importlib.metadata import version
import pandas as pd
import html
import datetime
from codeaudit.security_checks import perform_validations, ast_security_checks
from codeaudit.filehelpfunctions import (
get_filename_from_path,
collect_python_source_files,
read_in_source_file,
has_python_files,
is_ast_parsable,
)
from codeaudit.altairplots import multi_bar_chart
from codeaudit.totals import (
get_statistics,
overview_count,
overview_per_file,
total_modules,
)
from codeaudit.checkmodules import (
get_imported_modules,
check_module_vulnerability,
get_all_modules,
get_imported_modules_by_file,
)
from codeaudit.htmlhelpfunctions import json_to_html, dict_list_to_html_table
from codeaudit.pypi_package_scan import get_pypi_download_info, get_package_source
from codeaudit.privacy_lint import data_egress_scan, has_privacy_findings
from codeaudit.suppression import filter_sast_results
from codeaudit.api_interfaces import _collect_issue_lines
from importlib.resources import files
CA_VERSION = version("codeaudit")
PYTHON_CODE_AUDIT_TEXT = '<a href="https://github.com/nocomplexity/codeaudit" target="_blank"><b>Python Code Audit</b></a>'
DISCLAIMER_TEXT = (
"<p><b>Disclaimer:</b> <i>This SAST tool "
+ PYTHON_CODE_AUDIT_TEXT
+ " provides a powerful, automatic security analysis for Python source code. However, it's not a substitute for human review in combination with business knowledge. Undetected vulnerabilities may still exist.</i></p>"
)
NOSEC_WARNING = "<p><b>INFO</b>: The --nosec flag is active. Security findings with in-line suppressions will be excluded from the report.</p>"
SIMPLE_CSS_FILE = files("codeaudit") / "simple.css"
DEFAULT_OUTPUT_FILE = "codeaudit-report.html"
def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
"""Generates an overview report of code complexity and security indicators.
This function analyzes a Python project to produce a high-level overview of
complexity and security-related metrics. The input may be either:
- A local directory containing Python source files
- The name of a package hosted on PyPI.org
So:
codeaudit overview <package-name|directory> [reportname.html]
For PyPI packages, the source distribution (sdist) is downloaded,
extracted to a temporary directory, scanned, and removed after the report
is generated.
The report includes summary statistics, security risk indicators based on
complexity and total lines of code, a list of discovered modules, per-file
metrics, and a visual overview. Results are written to a static HTML file.
Examples:
Generate an overview report for a local project directory::
codeaudit overview /projects/mycolleaguesproject
Generate an overview report for a PyPI package::
codeaudit overview linkaudit #A nice project on PyPI.org
codeaudit overview pydantic #A complex project on PyPI.org from a security perspective?
Args:
directory (str): Path to a local directory containing Python source files
or the name of a package available on PyPI.org.
filename (str, optional): Name (and optional path) of the HTML file to
write the overview report to. The filename should use the ``.html``
extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
Returns:
None. The function writes a static HTML overview report to disk.
Raises:
SystemExit: If the provided path is not a directory, contains no Python
files, or is neither a valid local directory nor a valid PyPI
package name.
"""
clean_up = False
advice = None
if os.path.exists(directory):
# Check if the path is actually a directory
if not os.path.isdir(directory):
print(f"ERROR: '{directory}' is not a directory.")
print(
"This function only works for directories containing Python files (*.py)."
)
exit(1)
# Check if the directory contains any .py files
if not has_python_files(directory):
print(f"ERROR: Directory '{directory}' contains no Python files.")
exit(1)
elif get_pypi_download_info(directory):
# If local path doesn't exist, try to treat it as a PyPI package
print(
f"No local directory with name:{directory} found locally. Checking if package exist on PyPI..."
)
package_name = (
directory # The variable input_path is now equal to the package name
)
print(f"Package: {package_name} exist on PyPI.org!")
pypi_data = get_pypi_download_info(package_name)
url = pypi_data["download_url"]
release = pypi_data["release"]
advice = f'<p>👉 To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {package_name}</code></pre></p>'
if url is not None:
print(f"Creating Python Code Audit overview for package:\n{url}")
src_dir, tmp_handle = get_package_source(url)
directory = src_dir
clean_up = True
else:
# Neither a local directory nor a valid PyPI package
print(f"ERROR: '{directory}' is not a local directory or a valid PyPI package.")
exit(1)
result = get_statistics(directory)
modules = total_modules(directory)
df = pd.DataFrame(result)
df["Std-Modules"] = modules["Std-Modules"]
df["External-Modules"] = modules["External-Modules"]
overview_df = overview_count(df)
output = "<h1>" + f"Python Code Audit overview report" + "</h1><br>"
if clean_up:
output += f"<p>Codeaudit overview scan of package:<b> {package_name}</b></p>"
output += f"<p>Version:<b>{release}</b></p>"
else:
output += f"<p>Overview for the directory:<b> {directory}</b></p>"
output += f"<h2>Summary</h2>"
output += overview_df.to_html(escape=True, index=False)
output += "<br><br>"
security_based_on_max_complexity = overview_df.loc[0, "Maximum_Complexity"]
if security_based_on_max_complexity > 40:
output += "<p>Based on the maximum found complexity in a source file: Security concern rate is <b>❌ HIGH</b>."
else:
output += "<p>Based on the maximum found complexity in a source file: Security concern rate is <b>✅ LOW</b>."
security_based_on_loc = overview_df.loc[0, "Number_Of_Lines"]
if security_based_on_loc > 2000:
output += "<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>❌ HIGH</b>."
else:
output += "<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>✅ LOW</b>."
output += "<br>"
## Module overview
modules_discovered = get_all_modules(directory)
if clean_up:
tmp_handle.cleanup() # Clean up tmp directory if overview is created directly from PyPI package
output += "<details>"
output += "<summary>View all discovered modules.</summary>"
output += display_found_modules(modules_discovered)
output += "</details>"
output += "<h2>Detailed overview per source file</h2>"
output += "<details>"
output += "<summary>View the report details.</summary>"
df_plot = pd.DataFrame(result) # again make the df from the result variable
output += df_plot.to_html(escape=True, index=False)
output += "</details>"
# I now want only a plot for LoC, so drop other columns from Dataframe
df_plot = pd.DataFrame(result) # again make the df from the result variable
df_plot = df_plot.drop(columns=["FilePath"])
plot = multi_bar_chart(df_plot)
plot_html = plot.to_html()
output += "<br><br>"
output += "<h2>Visual Overview</h2>"
output += extract_altair_html(plot_html)
output += "<p><b>💬 Advice:</b></p>"
if advice is not None and advice != "":
output += advice
else:
output += f'<p>👉 To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {directory}</code></pre></p>'
create_htmlfile(output, filename)
def display_found_modules(modules_discovered):
"""Formats discovered Python modules into an HTML string.
Args:
modules_discovered (dict): Dictionary containing discovered modules with
keys 'core_modules' and 'imported_modules', each mapping to an
iterable of module names.
Returns:
str: HTML-formatted string listing standard library modules and
imported external packages.
"""
core_modules = modules_discovered["core_modules"]
external_modules = modules_discovered["imported_modules"]
output = "<p><b>Used Python Standard libraries:</b></p>"
output += (
"<ul>\n"
+ "\n".join(f" <li>{module}</li>" for module in core_modules)
+ "\n</ul>"
)
output += "<p><b>Imported libraries (packages):</b></p>"
output += (
"<ul>\n"
+ "\n".join(f" <li>{module}</li>" for module in external_modules)
+ "\n</ul>"
)
return output
def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE, nosec=False):
"""Scans Python source code or PyPI packages for security weaknesses.
This function performs static application security testing (SAST) on a
specified input. The input can be one of the following:
* A local directory containing Python source code
* A single local Python file
* The name of a package hosted on PyPI
codeaudit filescan <pythonfile|package-name|directory> [reportname.html] [--nosec]
Based on the input type, the function analyzes the source code for potential
security issues, generates an HTML report summarizing the findings, and
writes the report to disk.
If a PyPI package name is provided, the function downloads the source
distribution (sdist), extracts it to a temporary directory, scans the
extracted source code, and cleans up all temporary files after the scan
completes.
Examples:
Scan a local directory and write the report to ``report.html``::
codeaudit filescan /path/to/custompythonmodule report.html
Scan a local directory::
codeaudit filescan /path/to/project
Scan a single Python file::
codeaudit filescan myexample.py
Scan a package hosted on PyPI::
codeaudit filescan linkaudit
codeaudit filescan requests
Specify an output report file::
codeaudit filescan /path/to/project report.html
Enable filtering of issues marked with ``#nosec`` or another marker on potential code weaknesses that mitigated or known ::
codeaudit filescan myexample.py --nosec
POSITIONAL ARGUMENTS
INPUT_PATH
Path to a local Python file or directory, or the name of a package available on PyPI.
FLAGS
-f, --filename=FILENAME
Default: 'codeaudit-report.html'
-n, --nosec=NOSEC
Default: False
Args:
-f, --filename=FILENAME
Default: 'codeaudit-report.html'
Name (and optional path) of the HTML file to write the scan report to. The filename should use the ``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
-n, --nosec=NOSEC
Default: False
Whether to filter out issues marked as reviewed or ignored in the source code. Defaults to ``False``, no filtering.
input_path (str): Path to a local Python file or directory, or the name
of a package available on PyPI.
filename (str, optional): Name (and optional path) of the HTML file to
write the scan report to. The filename should use the ``.html``
extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
nosec (bool, optional): Whether to filter out issues marked as reviewed
or ignored in the source code. Defaults to ``False``, no filtering.
Returns:
None: The function writes a static HTML security report to disk.
Raises:
None: Errors and invalid inputs are reported to stdout.
"""
# Check if the input is a valid directory or a single valid Python file
# In case no local file or directory is found, check if the givin input is pypi package name
file_path = Path(input_path)
if file_path.is_dir():
directory_scan_report(
input_path, nosec_flag=nosec, filename=filename
) # create a package aka directory scan report
elif (
file_path.suffix == ".py"
and file_path.is_file()
and is_ast_parsable(input_path)
):
# create a sast file check report
if not nosec: # no filtering on reviewed items with markers in code
scan_output = perform_validations(
input_path
) # scans for weaknesses in the file
else:
unfiltered_scan_output = perform_validations(
input_path
) # scans for weaknesses in the file
scan_output = filter_sast_results(unfiltered_scan_output)
spy_output = data_egress_scan(input_path) # scans for secrets in the file
file_report_html = single_file_report(input_path, scan_output)
secrets_report_html = secrets_report(spy_output)
name_of_file = get_filename_from_path(input_path)
html_output = "<h1>Python Code Audit Report</h1>" # prepared to be embedded to display multiple reports, so <h2> used
html_output += f"<h2>Security scan: {name_of_file}</h2>"
html_output += "<p>" + f"Location of the file: {input_path} </p>"
if nosec:
html_output += NOSEC_WARNING
html_output += file_report_html
html_output += secrets_report_html
html_output += "<br>"
html_output += DISCLAIMER_TEXT
create_htmlfile(html_output, filename)
elif get_pypi_download_info(input_path):
package_name = (
input_path # The variable input_path is now equal to the package name
)
print(f"Package: {package_name} exist on PyPI.org!")
print(
f"Now SAST scanning package from the remote location: https://pypi.org/pypi/{package_name}"
)
pypi_data = get_pypi_download_info(package_name)
url = pypi_data["download_url"]
release = pypi_data["release"]
if url is not None:
print(url)
print(release)
src_dir, tmp_handle = get_package_source(url)
directory_scan_report(
src_dir,
nosec_flag=nosec,
filename=filename,
package_name=package_name,
release=release,
) # create a package aka directory scan report
# Cleaning up temp directory
tmp_handle.cleanup() # deletes everything from temp directory
else:
print(
f"Error:A source distribution (sdist in .tar.gz format) for package: {package_name} can not be found or does not exist on PyPi.org.\n"
)
print(
f"Make a local git clone of the {package_name} using `git clone` and run `codeaudit filescan <directory-with-src-cloned-of-{package_name}>` to check for weaknesses."
)
else:
# File is NOT a valid Python file, can not be parsed or directory is invalid.
print(
f"Error: '{input_path}' isn't a valid Python file, directory path to a package or a package on PyPI.org."
)
def secrets_report(spy_output):
"""
Generate an HTML report section for detected secrets and external egress risks.
This function analyzes the provided static analysis output to determine
whether logic for connecting to external or remote services is present.
If such logic is detected, it generates an HTML report section describing
the potential external egress risk and includes a detailed, tabular analysis
of where connection-related variables are used. If no such logic is found,
a success message indicating low data exfiltration risk is returned.
Args:
filename (str): Name of the file being analyzed. This parameter is used
for contextual identification and reporting purposes.
spy_output (object): Output from the secrets or static analysis process
containing findings used to detect external service connections.
Returns:
str: An HTML string representing the secrets and external egress risk
report section.
"""
if has_privacy_findings(spy_output):
output = "<br><p>⚠️ <b>External Egress Risk</b>: Detected outbound connection logic or API keys that may facilitate data egress.</p>"
output += "<details>"
output += "<summary>View detailed analysis of possible data egress logic or external service usage.</summary>"
pylint_df = pylint_reporting(spy_output)
output += pylint_df.to_html(escape=False, index=False)
output += "</details>"
output += "<br>"
else:
output = f"<br><p>✅ No Logic for connecting to remote services found. Risk of data exfiltration to external systems is <b>low</b>.</p>"
return output
def pylint_reporting(result):
"""
Creates a pandas DataFrame of privacy findings with columns:
'line', 'found', and 'code'.
- Escapes HTML for safe rendering
- Converts newlines to <br>
- Wraps code in <pre><code> block
- Optimized for performance (fewer lookups, reusable template)
"""
rows = []
append_row = rows.append # local reference (faster in loops)
# Predefine template (faster than rebuilding strings each loop)
template = '<pre><code class="language-python">{}</code></pre>'
# Safely get dict
file_checks = result.get("file_privacy_check") or {}
for item in file_checks.values():
entries = item.get("privacy_check_result", [])
for entry in entries:
code = entry.get("code", "")
lineno = entry.get("lineno")
matched = entry.get("matched")
# Escape HTML and replace newlines (done once per entry)
escaped_code = html.escape(code).replace("\n", "<br>")
# Format HTML block (faster than f-string in tight loops)
code_html = template.format(escaped_code)
append_row(
{
"line": lineno,
"found": matched,
"code": code_html,
}
)
return pd.DataFrame(rows, columns=["line", "found", "code"])
def single_file_report(filename, scan_output):
"""Function to DRY for a codescan when used for single for CLI or within a directory scan"""
data = scan_output["result"]
df = pd.DataFrame(
[(key, lineno) for key, linenos in data.items() for lineno in linenos],
columns=["validation", "line"],
)
number_of_issues = len(df)
df["severity"] = None
df["info"] = None
for error_str in data:
severity, info_text = get_info_on_test(error_str)
matching_rows = df[df["validation"] == error_str]
if not matching_rows.empty:
# Update all matching rows
df.loc[matching_rows.index, ["severity", "info"]] = [severity, info_text]
df["code"] = None
filename_location = scan_output["file_location"]
for idx, row in df.iterrows():
line_num = row["line"]
df.at[idx, "code"] = _collect_issue_lines(filename_location, line_num)
df["code"] = df["code"].str.replace(
r"\n", "<br>", regex=True
) # to convert \n to \\n for display
df["validation"] = df["validation"].apply(
replace_second_dot
) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
df = df[
["line", "validation", "severity", "info", "code"]
] # reorder the columns before converting to html
df = df.sort_values(by="line") # sort by line number
if number_of_issues > 0:
# output = f'<p>⚠️ <b>{number_of_issues}</b> potential <b>security issues</b> found!</p>'
output = f'<p>⚠️ <b>{number_of_issues}</b> potential <b>security issue{"s" if number_of_issues != 1 else ""}</b> found!</p>'
output += "<details>"
output += "<summary>View identified security weaknesses.</summary>"
output += df.to_html(escape=False, index=False)
output += "</details>"
output += "<br>"
else:
output = "" # No weaknesses found, no message, since privacy breaches may be present.
file_overview = overview_per_file(filename)
df_overview = pd.DataFrame([file_overview])
output += "<details>"
output += (
f"<summary>View detailed analysis of security relevant file details.</summary>"
)
output += df_overview.to_html(escape=True, index=False)
output += "</details>"
output += "<br>"
output += "<details>"
output += "<summary>View used modules in this file.</summary>"
modules_found = get_imported_modules_by_file(filename)
output += display_found_modules(modules_found)
output += f'<p>To check for <b>reported vulnerabilities</b> in external modules used by this file, use the command:<br><div class="code-box">codeaudit modulescan {filename}</div><br></p>'
output += "</details>"
return output
def directory_scan_report(
directory_to_scan,
nosec_flag,
filename=DEFAULT_OUTPUT_FILE,
package_name=None,
release=None,
):
"""Reports potential security issues for all Python files found in a directory.
This function performs security validations on all files found in a specified directory.
The result is written to a HTML report.
You can specify the name and directory for the generated HTML report.
Parameters:
directory_to_scan (str) : The full path to the Python source files to be scanned. Can be present in temp directory.
filename (str, optional): The name of the HTML file to save the report to.
Defaults to `DEFAULT_OUTPUT_FILE`.
Returns:
None - A HTML report is written as output
"""
# Check if the provided path is a valid directory
if not os.path.isdir(directory_to_scan):
print(f"Error: '{directory_to_scan}' is not a valid directory.")
exit(1)
collection_ok_files = [] # create a collection of files with no issues found
output = "<h1>Python Code Audit Report</h1>"
files_to_check = collect_python_source_files(directory_to_scan)
output += "<h2>Directory scan report</h2>"
name_of_package = get_filename_from_path(directory_to_scan)
if package_name is not None:
# Use real package name and retrieved release info
output += f"<p>Below the result of the Codeaudit scan of (Package name - Release):</p>"
output += f"<p><b> {package_name} - {release} </b></p>"
else:
output += f"<p>Below the result of the Codeaudit scan of the directory:<b> {name_of_package}</b></p>"
output += f"<p>Total Python files found: <b>{len(files_to_check)}</b></p>"
if nosec_flag:
output += NOSEC_WARNING
number_of_files = len(files_to_check)
print(f"Number of files that are checked for security issues:{number_of_files}")
printProgressBar(
0, number_of_files, prefix="Progress:", suffix="Complete", length=50
)
for i, file_to_scan in enumerate(files_to_check):
printProgressBar(
i + 1, number_of_files, prefix="Progress:", suffix="Complete", length=50
)
if not nosec_flag: # no filtering on reviewed items with markers in code
scan_output = perform_validations(
file_to_scan
) # scans for weaknesses in the file
else:
unfiltered_scan_output = perform_validations(
file_to_scan
) # scans for weaknesses in the file
scan_output = filter_sast_results(unfiltered_scan_output)
spy_output = data_egress_scan(file_to_scan) # scans for secrets in the file
data = scan_output["result"]
if data or has_privacy_findings(spy_output):
file_report_html = single_file_report(file_to_scan, scan_output)
name_of_file = get_filename_from_path(file_to_scan)
output += f"<h3>Security scan: {name_of_file}</h3>"
if package_name is None:
output += "<p>" + f"Location of the file: {file_to_scan} </p>"
output += file_report_html
secrets_report_html = secrets_report(spy_output)
output += secrets_report_html
else:
file_name_with_no_issue = get_filename_from_path(file_to_scan)
collection_ok_files.append(
{"filename": file_name_with_no_issue, "directory": file_to_scan}
)
output += "<h2>Files in directory with no security issues</h2>"
output += f"<p>✅ Total Python files <b>without</b> detected security issues: {len(collection_ok_files)}</p>"
output += "<p>The Python files with no security issues <b>detected</b> by codeaudit are:<p>"
output += dict_list_to_html_table(collection_ok_files)
output += "<br>"
if package_name is not None:
output += f"<p><b>Note:</b><i>Since this check is done on a package on PyPI.org, the temporary local directories are deleted. To examine the package in detail, you should download the sources locally and run the command:<code>codeaudit filescan</code> again.</i></p>"
output += "<p><b>Disclaimer:</b><i>This scan only evaluates Python files. Please note that security vulnerabilities may also exist in other files associated with the Python module.</i></p>"
output += DISCLAIMER_TEXT
create_htmlfile(output, filename)
def report_module_information(inputfile, reportname=DEFAULT_OUTPUT_FILE):
"""
Generate a report on known vulnerabilities in Python modules and packages.
This function analyzes a single Python file to identify imported
external modules and checks those modules against the OSV vulnerability
database. The collected results are written to a static HTML report.
If the input refers to a valid PyPI package name instead of a local Python
file, the function generates a vulnerability report directly for that
package.
While processing modules, progress information is printed to standard
output.
Example:
Generate a module vulnerability report for a Python file::
codeaudit modulescan <pythonfile>|<package> [yourreportname.html]
codeaudit modulescan mypythonfile.py
Args:
inputfile (str): Path to a Python source file (*.py) to analyze, or the
name of a package available on PyPI.
reportname (str, optional): Name (and optional path) of the HTML file to
write the vulnerability report to. The filename should use the
``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
Returns:
None: The function writes a static HTML report to disk.
Raises:
SystemExit: If the input is not a valid Python file or a valid PyPI
package. File parsing and I/O errors are reported via standard
output before exiting.
"""
html_output = "<h1>Python Code Audit Report</h1>"
file_path = Path(inputfile)
if file_path.is_dir():
print(
"codeaudit modulescan only works on single python files (*.py) or packages present on PyPI.org"
)
print(
"See codeaudit modulescan -h or check the manual https://codeaudit.nocomplexity.com"
)
exit(1)
elif (
file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(inputfile)
):
source = read_in_source_file(inputfile)
used_modules = get_imported_modules(source)
# Initial call to print 0% progress
external_modules = used_modules["imported_modules"]
l = len(external_modules)
printProgressBar(0, l, prefix="Progress:", suffix="Complete", length=50)
html_output += f"<h2>Module scan report</h2>"
html_output += f"<p>Security information for file: <b>{inputfile}</b></p>"
html_output += f"<p>Total Dependencies Scanned: {l} </p>"
if external_modules:
html_output += "<details>"
html_output += "<summary>View scanned module dependencies(imported packages).</summary>"
html_output += (
"<ul>\n"
+ "\n".join(f" <li>{module}</li>" for module in external_modules)
+ "\n</ul>"
)
html_output += "</details>"
else:
html_output += "<p>✅ No external modules found!"
# Now vuln info per external module
if external_modules:
html_output += "<h3>Vulnerability information for detected modules</h3>"
for i, module in enumerate(external_modules): # sorted for nicer report
printProgressBar(i + 1, l, prefix="Progress:", suffix="Complete", length=50)
html_output += module_vulnerability_check(module) + "<br>"
html_output += f'<br><p>💡 To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {inputfile}</div><br></p>'
html_output += "<br>" + DISCLAIMER_TEXT
create_htmlfile(html_output, reportname)
elif get_pypi_download_info(inputfile):
package_name = inputfile # The input variable is now equal to the package name
html_output += f"<h2>Package scan report for known vulnerabilities</h2>"
html_output += module_vulnerability_check(package_name)
html_output += f'<br><p>💡 To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {package_name}</div><br></p>'
html_output += "<br>" + DISCLAIMER_TEXT
create_htmlfile(html_output, reportname)
else:
# File is NOT a valid Python file, or package does not exist on PyPI.
print(
f"Error: '{inputfile}' isn't a valid Python file(*.py), or a valid package on PyPI.org."
)
exit(1)
def module_vulnerability_check(module):
"""
Build the HTML fragment for the module vulnerability section of a code audit
module scan report.
The function checks whether vulnerability information is available for the
given Python package/module and returns an HTML snippet accordingly:
- If no vulnerabilities are found, a success message is rendered.
- If vulnerabilities are found, a collapsible HTML <details> section is
generated containing the formatted vulnerability data.
Args:
module (str): Name of the Python package/module to check.
Returns:
str: HTML string representing the vulnerability scan result for the module.
"""
output = ""
vuln_info = check_module_vulnerability(module)
if not vuln_info:
# here SAST scan for package? - not needed (now)- do a filescan on Python package manually - dependency trees can be deep and for complex package are never Python only.
output += f"<p>✅ No known vulnerabilities found for package: <b>{module}</b>.</p>"
else:
output += "<details>"
output += f"<summary>⚠️ View vulnerability information for package <b>{module}</b>.</summary>"
output += json_to_html(vuln_info)
output += "</details>"
return output
def create_htmlfile(html_input, outputfile):
"""Creates a clean html file based on html input given"""
output_path = Path(outputfile).expanduser().resolve()
# Validate output directory (CLI-friendly)
if not output_path.parent.is_dir():
print(
f"Error: output directory does not exist:\n {output_path.parent}",
file=sys.stderr,
)
sys.exit(1)
# Read CSS so it is included in the reporting HTML file
css_content = Path(SIMPLE_CSS_FILE).read_text(encoding="utf-8")
# Start building the HTML
output = '<!DOCTYPE html><html lang="en-US"><head>'
output += '<meta charset="UTF-8"/>'
output += "<title>Python_Code_Audit_SecurityReport</title>"
output += f"<style>\n{css_content}\n</style>"
output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>'
output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>'
output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>'
output += "</head><body>"
output += '<div class="container">'
output += html_input
now = datetime.datetime.now()
timestamp_str = now.strftime("%Y-%m-%d %H:%M")
output += (
f"<p>This Python security report was created on: <b>{timestamp_str}</b> "
f"with {PYTHON_CODE_AUDIT_TEXT} version <b>{CA_VERSION}</b></p>"
)
output += "<hr>"
output += "<footer>"
output += (
'<div class="footer-links">'
'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" '
'target="_blank">documentation</a> for help on found issues.<br>'
'Codeaudit is made with <span class="heart">❤</span> by cyber security '
'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" '
'target="_blank">open simple security solutions</a>.<br>'
'<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" '
'target="_blank">Join the community</a> and contribute to make this tool better!'
"</div>"
)
output += "</footer>"
output += "</div>"
output += "</body></html>"
# Write the HTML file
output_path.write_text(output, encoding="utf-8")
print("\n=====================================================================")
print(
"Code Audit report file created!\n"
"Paste the line below directly into your browser bar:\n"
f"\t{output_path.as_uri()}\n"
)
print("=====================================================================\n")
def extract_altair_html(plot_html):
match = re.search(r"<body[^>]*>(.*?)</body>", plot_html, re.DOTALL | re.IGNORECASE)
if match:
body_content = match.group(1).strip()
minimal_html = f"{body_content}\n"
return minimal_html
else:
return "<p>Altair plot was supposed to be here: But something went wrong! Fix needed." # Empty fallback if <body> not found
# Replace the second dot with <br>
def replace_second_dot(s):
parts = s.split(".")
if len(parts) > 2:
return ".".join(parts[:2]) + "<br>" + ".".join(parts[2:])
return s
def get_info_on_test(error):
"""
Selects row in the checks DataFrame to print help text and severity.
Args:
error (str): A string to search for in the ['construct'] column.
Returns:
tuple: (severity, info_text)
"""
severity = "tbd"
info_text = "tbd"
checks = ast_security_checks()
df = checks
# Try to find exact match in 'construct'
found_rows_exact = df[df["construct"] == error]
if not found_rows_exact.empty:
row = found_rows_exact.iloc[0] # get the first matching row
severity = row["severity"]
info_text = row["info"]
elif "extractall" in error:
# fallback if extractall is mentioned
# see also open issues : When both tarfile and zipfile module are used with aliases detection works, but static AST resolution parsing is not 100% possible. Human data flow analyse is needed since aliases can be used. So shortcut taken here, since aliases and usage should be automatic detected!
fallback_rows = df[df["construct"] == "tarfile.TarFile"]
if not fallback_rows.empty:
row = fallback_rows.iloc[0]
severity = row["severity"]
info_text = row["info"]
else:
print(f"\nERROR: No fallback row found for 'tarfile.extractall'")
exit(1)
else:
print(f"\nERROR: No row found for '{error}'")
print(f"No rows found exactly matching '{error}'.")
exit(1)
return severity, info_text
def report_implemented_tests(filename=DEFAULT_OUTPUT_FILE):
"""
Creates an HTML report of all implemented security checks.
This report provides a user-friendly overview of the static security checks
currently supported by Python Code Audit. It is intended to make it easier to review
the available validations without digging through the codebase.
The generated HTML includes:
- A table of all implemented checks
- The number of validations
- The version of Python Code Audit (codeaudit) used
- A disclaimer about version-specific reporting
The report is saved to the specified filename and is formatted to be
embeddable in larger multi-report documents.
Help me continue developing Python Code Audit as free and open-source software.
Join the community to contribute to the most complete, local first , Python Security Static scanner.
Help!! Join the journey, check: https://github.com/nocomplexity/codeaudit#contributing
Parameters:
filename (str): The output HTML filename. Defaults to 'codeaudit_checks.html'.
"""
df_checks = ast_security_checks()
df_checks["construct"] = df_checks["construct"].apply(
replace_second_dot
) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
df_checks_sorted = df_checks.sort_values(by="construct")
output = "<h1>Python Code Audit Implemented validations</h1>" # prepared to be embedded to display multiple reports, so <h2> used
number_of_test = len(df_checks)
output += df_checks_sorted.to_html(escape=False, index=False)
output += "<br>"
output += (
f"<p>Number of implemented security validations:<b>{number_of_test}</b></p>"
)
output += f"<p>Version of codeaudit: <b>{CA_VERSION}</b>"
output += "<p>Because Python and cybersecurity are constantly changing, issue reports <b>SHOULD</b> specify the codeaudit version used.</p>"
output += DISCLAIMER_TEXT
create_htmlfile(output, filename)
def printProgressBar(
iteration,
total,
prefix="",
suffix="",
decimals=1,
length=100,
fill="█",
printEnd="\r",
):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str)
printEnd - Optional : end character (e.g. "\r", "\r\n") (Str)
"""
if total == 0:
percent = "100"
filledLength = 0
bar = "-" * length
else:
percent = ("{0:." + str(decimals) + "f}").format(
100 * (iteration / float(total))
)
filledLength = int(length * iteration // total)
bar = fill * filledLength + "-" * (length - filledLength)
print(f"\r{prefix} |{bar}| {percent}% {suffix}", end=printEnd)
if total != 0 and iteration >= total:
print() # New line on completion