"""
License GPLv3 or higher.
(C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see
Disclaimer: This SAST tool " + PYTHON_CODE_AUDIT_TEXT + " provides a powerful, automatic security analysis for Python source code. However, it's not a substitute for human review in combination with business knowledge. Undetected vulnerabilities may still exist.
" ) NOSEC_WARNING = "INFO: The --nosec flag is active. Security findings with in-line suppressions will be excluded from the report.
" SIMPLE_CSS_FILE = files("codeaudit") / "simple.css" DEFAULT_OUTPUT_FILE = "codeaudit-report.html" def overview_report(directory, filename=DEFAULT_OUTPUT_FILE): """Generates an overview report of code complexity and security indicators. This function analyzes a Python project to produce a high-level overview of complexity and security-related metrics. The input may be either: - A local directory containing Python source files - The name of a package hosted on PyPI.org So: codeaudit overview👉 To perform a SAST scan on the source code, run:
codeaudit filescan {package_name}'
if url is not None:
print(f"Creating Python Code Audit overview for package:\n{url}")
src_dir, tmp_handle = get_package_source(url)
directory = src_dir
clean_up = True
else:
# Neither a local directory nor a valid PyPI package
print(f"ERROR: '{directory}' is not a local directory or a valid PyPI package.")
exit(1)
result = get_statistics(directory)
modules = total_modules(directory)
df = pd.DataFrame(result)
df["Std-Modules"] = modules["Std-Modules"]
df["External-Modules"] = modules["External-Modules"]
overview_df = overview_count(df)
output = "Codeaudit overview scan of package: {package_name}
" output += f"Version:{release}
" else: output += f"Overview for the directory: {directory}
" output += f"Based on the maximum found complexity in a source file: Security concern rate is ❌ HIGH." else: output += "
Based on the maximum found complexity in a source file: Security concern rate is ✅ LOW." security_based_on_loc = overview_df.loc[0, "Number_Of_Lines"] if security_based_on_loc > 2000: output += "
Based on the total Lines of Code (LoC) : Security concern rate is ❌ HIGH." else: output += "
Based on the total Lines of Code (LoC) : Security concern rate is ✅ LOW."
output += "
"
## Module overview
modules_discovered = get_all_modules(directory)
if clean_up:
tmp_handle.cleanup() # Clean up tmp directory if overview is created directly from PyPI package
output += "View all discovered modules.
"
output += display_found_modules(modules_discovered)
output += "
💬 Advice:
" if advice is not None and advice != "": output += advice else: output += f'👉 To perform a SAST scan on the source code, run:
codeaudit filescan {directory}'
create_htmlfile(output, filename)
def display_found_modules(modules_discovered):
"""Formats discovered Python modules into an HTML string.
Args:
modules_discovered (dict): Dictionary containing discovered modules with
keys 'core_modules' and 'imported_modules', each mapping to an
iterable of module names.
Returns:
str: HTML-formatted string listing standard library modules and
imported external packages.
"""
core_modules = modules_discovered["core_modules"]
external_modules = modules_discovered["imported_modules"]
output = "Used Python Standard libraries:
" output += ( "Imported libraries (packages):
" output += ( "" + f"Location of the file: {input_path}
" if nosec: html_output += NOSEC_WARNING html_output += file_report_html html_output += secrets_report_html html_output += "⚠️ External Egress Risk: Detected outbound connection logic or API keys that may facilitate data egress.
" output += "✅ No Logic for connecting to remote services found. Risk of data exfiltration to external systems is low.
" return output def pylint_reporting(result): """ Creates a pandas DataFrame of privacy findings with columns: 'line', 'found', and 'code'. - Escapes HTML for safe rendering - Converts newlines to block
- Optimized for performance (fewer lookups, reusable template)
"""
rows = []
append_row = rows.append # local reference (faster in loops)
# Predefine template (faster than rebuilding strings each loop)
template = '{}
'
# Safely get dict
file_checks = result.get("file_privacy_check") or {}
for item in file_checks.values():
entries = item.get("privacy_check_result", [])
for entry in entries:
code = entry.get("code", "")
lineno = entry.get("lineno")
matched = entry.get("matched")
# Escape HTML and replace newlines (done once per entry)
escaped_code = html.escape(code).replace("\n", "
")
# Format HTML block (faster than f-string in tight loops)
code_html = template.format(escaped_code)
append_row(
{
"line": lineno,
"found": matched,
"code": code_html,
}
)
return pd.DataFrame(rows, columns=["line", "found", "code"])
def single_file_report(filename, scan_output):
"""Function to DRY for a codescan when used for single for CLI or within a directory scan"""
data = scan_output["result"]
df = pd.DataFrame(
[(key, lineno) for key, linenos in data.items() for lineno in linenos],
columns=["validation", "line"],
)
number_of_issues = len(df)
df["severity"] = None
df["info"] = None
for error_str in data:
severity, info_text = get_info_on_test(error_str)
matching_rows = df[df["validation"] == error_str]
if not matching_rows.empty:
# Update all matching rows
df.loc[matching_rows.index, ["severity", "info"]] = [severity, info_text]
df["code"] = None
filename_location = scan_output["file_location"]
for idx, row in df.iterrows():
line_num = row["line"]
df.at[idx, "code"] = _collect_issue_lines(filename_location, line_num)
df["code"] = df["code"].str.replace(
r"\n", "
", regex=True
) # to convert \n to \\n for display
df["validation"] = df["validation"].apply(
replace_second_dot
) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
df = df[
["line", "validation", "severity", "info", "code"]
] # reorder the columns before converting to html
df = df.sort_values(by="line") # sort by line number
if number_of_issues > 0:
# output = f'⚠️ {number_of_issues} potential security issues found!
'
output = f'⚠️ {number_of_issues} potential security issue{"s" if number_of_issues != 1 else ""} found!
'
output += ""
output += "View identified security weaknesses.
"
output += df.to_html(escape=False, index=False)
output += ""
output += "
"
else:
output = "" # No weaknesses found, no message, since privacy breaches may be present.
file_overview = overview_per_file(filename)
df_overview = pd.DataFrame([file_overview])
output += ""
output += (
f"View detailed analysis of security relevant file details.
"
)
output += df_overview.to_html(escape=True, index=False)
output += ""
output += "
"
output += ""
output += "View used modules in this file.
"
modules_found = get_imported_modules_by_file(filename)
output += display_found_modules(modules_found)
output += f'To check for reported vulnerabilities in external modules used by this file, use the command:
codeaudit modulescan {filename}
'
output += ""
return output
def directory_scan_report(
directory_to_scan,
nosec_flag,
filename=DEFAULT_OUTPUT_FILE,
package_name=None,
release=None,
):
"""Reports potential security issues for all Python files found in a directory.
This function performs security validations on all files found in a specified directory.
The result is written to a HTML report.
You can specify the name and directory for the generated HTML report.
Parameters:
directory_to_scan (str) : The full path to the Python source files to be scanned. Can be present in temp directory.
filename (str, optional): The name of the HTML file to save the report to.
Defaults to `DEFAULT_OUTPUT_FILE`.
Returns:
None - A HTML report is written as output
"""
# Check if the provided path is a valid directory
if not os.path.isdir(directory_to_scan):
print(f"Error: '{directory_to_scan}' is not a valid directory.")
exit(1)
collection_ok_files = [] # create a collection of files with no issues found
output = "Python Code Audit Report
"
files_to_check = collect_python_source_files(directory_to_scan)
output += "Directory scan report
"
name_of_package = get_filename_from_path(directory_to_scan)
if package_name is not None:
# Use real package name and retrieved release info
output += f"Below the result of the Codeaudit scan of (Package name - Release):
"
output += f" {package_name} - {release}
"
else:
output += f"Below the result of the Codeaudit scan of the directory: {name_of_package}
"
output += f"Total Python files found: {len(files_to_check)}
"
if nosec_flag:
output += NOSEC_WARNING
number_of_files = len(files_to_check)
print(f"Number of files that are checked for security issues:{number_of_files}")
printProgressBar(
0, number_of_files, prefix="Progress:", suffix="Complete", length=50
)
for i, file_to_scan in enumerate(files_to_check):
printProgressBar(
i + 1, number_of_files, prefix="Progress:", suffix="Complete", length=50
)
if not nosec_flag: # no filtering on reviewed items with markers in code
scan_output = perform_validations(
file_to_scan
) # scans for weaknesses in the file
else:
unfiltered_scan_output = perform_validations(
file_to_scan
) # scans for weaknesses in the file
scan_output = filter_sast_results(unfiltered_scan_output)
spy_output = data_egress_scan(file_to_scan) # scans for secrets in the file
data = scan_output["result"]
if data or has_privacy_findings(spy_output):
file_report_html = single_file_report(file_to_scan, scan_output)
name_of_file = get_filename_from_path(file_to_scan)
output += f"Security scan: {name_of_file}
"
if package_name is None:
output += "" + f"Location of the file: {file_to_scan}
"
output += file_report_html
secrets_report_html = secrets_report(spy_output)
output += secrets_report_html
else:
file_name_with_no_issue = get_filename_from_path(file_to_scan)
collection_ok_files.append(
{"filename": file_name_with_no_issue, "directory": file_to_scan}
)
output += "Files in directory with no security issues
"
output += f"✅ Total Python files without detected security issues: {len(collection_ok_files)}
"
output += "The Python files with no security issues detected by codeaudit are:
"
output += dict_list_to_html_table(collection_ok_files)
output += "
"
if package_name is not None:
output += f"
Note:Since this check is done on a package on PyPI.org, the temporary local directories are deleted. To examine the package in detail, you should download the sources locally and run the command:codeaudit filescan again.
"
output += "Disclaimer:This scan only evaluates Python files. Please note that security vulnerabilities may also exist in other files associated with the Python module.
"
output += DISCLAIMER_TEXT
create_htmlfile(output, filename)
def report_module_information(inputfile, reportname=DEFAULT_OUTPUT_FILE):
"""
Generate a report on known vulnerabilities in Python modules and packages.
This function analyzes a single Python file to identify imported
external modules and checks those modules against the OSV vulnerability
database. The collected results are written to a static HTML report.
If the input refers to a valid PyPI package name instead of a local Python
file, the function generates a vulnerability report directly for that
package.
While processing modules, progress information is printed to standard
output.
Example:
Generate a module vulnerability report for a Python file::
codeaudit modulescan | [yourreportname.html]
codeaudit modulescan mypythonfile.py
Args:
inputfile (str): Path to a Python source file (*.py) to analyze, or the
name of a package available on PyPI.
reportname (str, optional): Name (and optional path) of the HTML file to
write the vulnerability report to. The filename should use the
``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
Returns:
None: The function writes a static HTML report to disk.
Raises:
SystemExit: If the input is not a valid Python file or a valid PyPI
package. File parsing and I/O errors are reported via standard
output before exiting.
"""
html_output = "Python Code Audit Report
"
file_path = Path(inputfile)
if file_path.is_dir():
print(
"codeaudit modulescan only works on single python files (*.py) or packages present on PyPI.org"
)
print(
"See codeaudit modulescan -h or check the manual https://codeaudit.nocomplexity.com"
)
exit(1)
elif (
file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(inputfile)
):
source = read_in_source_file(inputfile)
used_modules = get_imported_modules(source)
# Initial call to print 0% progress
external_modules = used_modules["imported_modules"]
l = len(external_modules)
printProgressBar(0, l, prefix="Progress:", suffix="Complete", length=50)
html_output += f"Module scan report
"
html_output += f"Security information for file: {inputfile}
"
html_output += f"Total Dependencies Scanned: {l}
"
if external_modules:
html_output += ""
html_output += "View scanned module dependencies(imported packages).
"
html_output += (
"\n"
+ "\n".join(f" - {module}
" for module in external_modules)
+ "\n
"
)
html_output += ""
else:
html_output += "✅ No external modules found!"
# Now vuln info per external module
if external_modules:
html_output += "
Vulnerability information for detected modules
"
for i, module in enumerate(external_modules): # sorted for nicer report
printProgressBar(i + 1, l, prefix="Progress:", suffix="Complete", length=50)
html_output += module_vulnerability_check(module) + "
"
html_output += f'
💡 To check for security weaknesses in this package, use the command:
codeaudit filescan {inputfile}
'
html_output += "
" + DISCLAIMER_TEXT
create_htmlfile(html_output, reportname)
elif get_pypi_download_info(inputfile):
package_name = inputfile # The input variable is now equal to the package name
html_output += f"Package scan report for known vulnerabilities
"
html_output += module_vulnerability_check(package_name)
html_output += f'
💡 To check for security weaknesses in this package, use the command:
codeaudit filescan {package_name}
'
html_output += "
" + DISCLAIMER_TEXT
create_htmlfile(html_output, reportname)
else:
# File is NOT a valid Python file, or package does not exist on PyPI.
print(
f"Error: '{inputfile}' isn't a valid Python file(*.py), or a valid package on PyPI.org."
)
exit(1)
def module_vulnerability_check(module):
"""
Build the HTML fragment for the module vulnerability section of a code audit
module scan report.
The function checks whether vulnerability information is available for the
given Python package/module and returns an HTML snippet accordingly:
- If no vulnerabilities are found, a success message is rendered.
- If vulnerabilities are found, a collapsible HTML section is
generated containing the formatted vulnerability data.
Args:
module (str): Name of the Python package/module to check.
Returns:
str: HTML string representing the vulnerability scan result for the module.
"""
output = ""
vuln_info = check_module_vulnerability(module)
if not vuln_info:
# here SAST scan for package? - not needed (now)- do a filescan on Python package manually - dependency trees can be deep and for complex package are never Python only.
output += f"✅ No known vulnerabilities found for package: {module}.
"
else:
output += ""
output += f"⚠️ View vulnerability information for package {module}.
"
output += json_to_html(vuln_info)
output += ""
return output
def create_htmlfile(html_input, outputfile):
"""Creates a clean html file based on html input given"""
output_path = Path(outputfile).expanduser().resolve()
# Validate output directory (CLI-friendly)
if not output_path.parent.is_dir():
print(
f"Error: output directory does not exist:\n {output_path.parent}",
file=sys.stderr,
)
sys.exit(1)
# Read CSS so it is included in the reporting HTML file
css_content = Path(SIMPLE_CSS_FILE).read_text(encoding="utf-8")
# Start building the HTML
output = ''
output += ''
output += "Python_Code_Audit_SecurityReport "
output += f""
output += ''
output += ''
output += ''
output += ""
output += ''
output += html_input
now = datetime.datetime.now()
timestamp_str = now.strftime("%Y-%m-%d %H:%M")
output += (
f"This Python security report was created on: {timestamp_str} "
f"with {PYTHON_CODE_AUDIT_TEXT} version {CA_VERSION}
"
)
output += "
"
output += ""
output += ""
output += ""
# Write the HTML file
output_path.write_text(output, encoding="utf-8")
print("\n=====================================================================")
print(
"Code Audit report file created!\n"
"Paste the line below directly into your browser bar:\n"
f"\t{output_path.as_uri()}\n"
)
print("=====================================================================\n")
def extract_altair_html(plot_html):
match = re.search(r"]*>(.*?)", plot_html, re.DOTALL | re.IGNORECASE)
if match:
body_content = match.group(1).strip()
minimal_html = f"{body_content}\n"
return minimal_html
else:
return "Altair plot was supposed to be here: But something went wrong! Fix needed." # Empty fallback if
not found
# Replace the second dot with
def replace_second_dot(s):
parts = s.split(".")
if len(parts) > 2:
return ".".join(parts[:2]) + "
" + ".".join(parts[2:])
return s
def get_info_on_test(error):
"""
Selects row in the checks DataFrame to print help text and severity.
Args:
error (str): A string to search for in the ['construct'] column.
Returns:
tuple: (severity, info_text)
"""
severity = "tbd"
info_text = "tbd"
checks = ast_security_checks()
df = checks
# Try to find exact match in 'construct'
found_rows_exact = df[df["construct"] == error]
if not found_rows_exact.empty:
row = found_rows_exact.iloc[0] # get the first matching row
severity = row["severity"]
info_text = row["info"]
elif "extractall" in error:
# fallback if extractall is mentioned
# see also open issues : When both tarfile and zipfile module are used with aliases detection works, but static AST resolution parsing is not 100% possible. Human data flow analyse is needed since aliases can be used. So shortcut taken here, since aliases and usage should be automatic detected!
fallback_rows = df[df["construct"] == "tarfile.TarFile"]
if not fallback_rows.empty:
row = fallback_rows.iloc[0]
severity = row["severity"]
info_text = row["info"]
else:
print(f"\nERROR: No fallback row found for 'tarfile.extractall'")
exit(1)
else:
print(f"\nERROR: No row found for '{error}'")
print(f"No rows found exactly matching '{error}'.")
exit(1)
return severity, info_text
def report_implemented_tests(filename=DEFAULT_OUTPUT_FILE):
"""
Creates an HTML report of all implemented security checks.
This report provides a user-friendly overview of the static security checks
currently supported by Python Code Audit. It is intended to make it easier to review
the available validations without digging through the codebase.
The generated HTML includes:
- A table of all implemented checks
- The number of validations
- The version of Python Code Audit (codeaudit) used
- A disclaimer about version-specific reporting
The report is saved to the specified filename and is formatted to be
embeddable in larger multi-report documents.
Help me continue developing Python Code Audit as free and open-source software.
Join the community to contribute to the most complete, local first , Python Security Static scanner.
Help!! Join the journey, check: https://github.com/nocomplexity/codeaudit#contributing
Parameters:
filename (str): The output HTML filename. Defaults to 'codeaudit_checks.html'.
"""
df_checks = ast_security_checks()
df_checks["construct"] = df_checks["construct"].apply(
replace_second_dot
) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
df_checks_sorted = df_checks.sort_values(by="construct")
output = "Python Code Audit Implemented validations
" # prepared to be embedded to display multiple reports, so used
number_of_test = len(df_checks)
output += df_checks_sorted.to_html(escape=False, index=False)
output += "
"
output += (
f"
Number of implemented security validations:{number_of_test}
"
)
output += f"Version of codeaudit: {CA_VERSION}"
output += "
Because Python and cybersecurity are constantly changing, issue reports SHOULD specify the codeaudit version used.
"
output += DISCLAIMER_TEXT
create_htmlfile(output, filename)
def printProgressBar(
iteration,
total,
prefix="",
suffix="",
decimals=1,
length=100,
fill="█",
printEnd="\r",
):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str)
printEnd - Optional : end character (e.g. "\r", "\r\n") (Str)
"""
if total == 0:
percent = "100"
filledLength = 0
bar = "-" * length
else:
percent = ("{0:." + str(decimals) + "f}").format(
100 * (iteration / float(total))
)
filledLength = int(length * iteration // total)
bar = fill * filledLength + "-" * (length - filledLength)
print(f"\r{prefix} |{bar}| {percent}% {suffix}", end=printEnd)
if total != 0 and iteration >= total:
print() # New line on completion