#!/usr/bin/env python # -*- coding: utf-8 -*- # @Function # Find duplicate class among java libs. # # @Usage # $ show-duplicate-java-classes # find jars from current dir # $ show-duplicate-java-classes path/to/lib_dir1 /path/to/lib_dir2 # $ show-duplicate-java-classes -c path/to/class_dir1 -c /path/to/class_dir2 # # @online-doc https://github.com/oldratlee/useful-scripts/blob/master/docs/java.md#-show-duplicate-java-classes # @author tg123 (farmer1992 at gmail dot com) # @author Jerry Lee (oldratlee at gmail dot com) __author__ = 'tg123' from glob import glob from os import walk from zipfile import ZipFile from os.path import relpath, isdir from optparse import OptionParser def list_jar_file_under_lib_dirs(libs): jar_files = set() for lib in libs: if isdir(lib): jar_files |= {f for f in glob(lib + '/*.jar')} else: jar_files.add(lib) return jar_files def list_class_under_jar_file(jar_file): return {f for f in ZipFile(jar_file).namelist() if f.lower().endswith('.class')} def list_class_under_class_dir(class_dir): return {relpath(dir_path + "/" + filename, class_dir) for dir_path, _, file_names in walk(class_dir) for filename in file_names if filename.lower().endswith('.class')} def expand_2_class_path(jar_files, class_dirs): java_class_2_class_paths = {} # list all classes in jar files for jar_file in jar_files: for class_file in list_class_under_jar_file(jar_file): java_class_2_class_paths.setdefault(class_file, set()).add(jar_file) # list all classes in class dir for class_dir in class_dirs: for class_file in list_class_under_class_dir(class_dir): java_class_2_class_paths.setdefault(class_file, set()).add(class_dir) return java_class_2_class_paths, jar_files | set(class_dirs) def find_duplicate_classes(java_class_2_class_paths): class_path_2_duplicate_classes = {} for java_class, class_paths in list(java_class_2_class_paths.items()): if len(class_paths) > 1: classes = class_path_2_duplicate_classes.setdefault(frozenset(class_paths), set()) classes.add(java_class) return class_path_2_duplicate_classes def print_class_paths(class_paths): print() print("=" * 80) print("class paths to find:") print("=" * 80) for idx, class_path in enumerate(class_paths): print("%-3d: %s" % (idx + 1, class_path)) if __name__ == '__main__': optionParser = OptionParser('usage: %prog ' '[-c class-dir1 [-c class-dir2] ...] ' '[lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]') optionParser.add_option("-c", "--class-dir", dest="class_dirs", default=[], action="append", help="add class dir") options, libs = optionParser.parse_args() if not options.class_dirs and not libs: libs = ['.'] java_class_2_class_paths, class_paths = expand_2_class_path( list_jar_file_under_lib_dirs(libs), options.class_dirs) class_path_2_duplicate_classes = find_duplicate_classes(java_class_2_class_paths) if not class_path_2_duplicate_classes: print("COOL! No duplicate classes found!") print_class_paths(class_paths) exit() print("Found duplicate classes in below class path:") for idx, jars in enumerate(class_path_2_duplicate_classes): print("%-3d(%d@%d): %s" % (idx + 1, len(class_path_2_duplicate_classes[jars]), len(jars), " ".join(jars))) print() print("=" * 80) print("Duplicate classes detail info:") print("=" * 80) for idx, (jars, classes) in enumerate(class_path_2_duplicate_classes.items()): print("%-3d(%d@%d): %s" % (idx + 1, len(class_path_2_duplicate_classes[jars]), len(jars), " ".join(jars))) for i, c in enumerate(classes): print("\t%-3d %s" % (i + 1, c)) print_class_paths(class_paths) exit(1)