import hashlib import math import random import re import string from sys import getsizeof from cycode.cli.consts import SCA_SHORTCUT_DEPENDENCY_PATHS from cycode.cli.utils.binary_utils import is_binary_string def obfuscate_text(text: str) -> str: match_len = len(text) start_reveled_len = math.ceil(match_len / 8) end_reveled_len = match_len - (math.ceil(match_len / 8)) obfuscated = obfuscate_regex.sub('*', text) return f'{text[:start_reveled_len]}{obfuscated[start_reveled_len:end_reveled_len]}{text[end_reveled_len:]}' obfuscate_regex = re.compile(r'[^+\-\s]') def is_binary_content(content: str) -> bool: """Get the first 1024 chars and check if it's binary or not.""" chunk = content[:1024] chunk_bytes = convert_string_to_bytes(chunk) return is_binary_string(chunk_bytes) def get_content_size(content: str) -> int: return getsizeof(content) def convert_string_to_bytes(content: str) -> bytes: return bytes(content, 'UTF-8') def hash_string_to_sha256(content: str) -> str: return hashlib.sha256(content.encode()).hexdigest() def generate_random_string(string_len: int) -> str: # letters, digits, and symbols characters = string.ascii_letters + string.digits + string.punctuation return ''.join(random.choice(characters) for _ in range(string_len)) # noqa: S311 def get_position_in_line(text: str, position: int) -> int: return position - text.rfind('\n', 0, position) - 1 def shortcut_dependency_paths(dependency_paths_list: str) -> str: separate_dependency_paths_list = dependency_paths_list.split(',') result = '' for dependency_paths in separate_dependency_paths_list: dependency_paths = dependency_paths.strip().rstrip() dependencies = dependency_paths.split(' -> ') if len(dependencies) <= SCA_SHORTCUT_DEPENDENCY_PATHS: result += dependency_paths else: result += f'{dependencies[0]} -> ... -> {dependencies[-1]}' result += '\n' return result.rstrip().rstrip(',') def sanitize_text_for_encoding(text: str) -> str: """Sanitize text by replacing surrogate characters and invalid UTF-8 sequences. This prevents encoding errors when Rich tries to display the content, especially on Windows. Surrogate characters (U+D800 to U+DFFF) cannot be encoded to UTF-8 and will cause errors. """ return text.encode('utf-8', errors='replace').decode('utf-8')