Python-World · AdityaJ7 · Sep 20, 2020 · Sep 20, 2020 · Sep 20, 2020
diff --git a/projects/Duplicate files remover/README.md b/projects/Duplicate files remover/README.md
@@ -0,0 +1,20 @@
+# Duplicate Files Remover
+This script removes duplicate files in the directory where the script runs.
+
+### Prerequisites
+* No external libraries are used
+* os
+* hashlib
+
+### How to run the script
+Execute `python3 duplicatefileremover.py` 
+
+### Screenshot/GIF showing the sample use of the script
+<!--Remove the below lines and add yours -->
+![Screenshot of the Output](Screenshot.png)
+
+## Working
+The script first lists all the files in the directory. It takes MD5 hash of each file, when hash of 2 files become same it deletes the file.
+
+## Author Name
+Anandha Krishnan Aji
diff --git a/projects/Duplicate files remover/Screenshot.png b/projects/Duplicate files remover/Screenshot.png
diff --git a/projects/Duplicate files remover/duplicatefileremover.py b/projects/Duplicate files remover/duplicatefileremover.py
@@ -0,0 +1,40 @@
+import hashlib
+import os
+
+# Returns the hash string of the given file name
+
+
+def hashFile(filename):
+    # For large files, if we read it all together it can lead to memory overflow, So we take a blocksize to read at a time
+    BLOCKSIZE = 65536
+    hasher = hashlib.md5()
+    with open(filename, 'rb') as file:
+        # Reads the particular blocksize from file
+        buf = file.read(BLOCKSIZE)
+        while(len(buf) > 0):
+            hasher.update(buf)
+            buf = file.read(BLOCKSIZE)
+    return hasher.hexdigest()
+
+
+if __name__ == "__main__":
+    # Dictionary to store the hash and filename
+    hashMap = {}
+
+    # List to store deleted files
+    deletedFiles = []
+    filelist = [f for f in os.listdir() if os.path.isfile(f)]
+    for f in filelist:
+        key = hashFile(f)
+        # If key already exists, it deletes the file
+        if key in hashMap.keys():
+            deletedFiles.append(f)
+            os.remove(f)
+        else:
+            hashMap[key] = f
+    if len(deletedFiles) == 0:
+        print('Deleted Files')
+        for i in deletedFiles:
+            print(i)
+    else:
+        print('No duplicate files found')