ICILearn
/
learnopencv
tükrözi: https://github.com/spmallick/learnopencv.git


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
							import imagehash
from PIL import Image
import os


def find_similar_images(base_dir, hash_size=8):

    snapshots_files = sorted(os.listdir(base_dir))

    hash_dict = {}
    duplicates = []
    num_duplicates = 0

    print('---'*5,"Finding similar files",'---'*5)

    for file in snapshots_files:
        read_file = Image.open(os.path.join(base_dir, file))
        comp_hash = str(imagehash.dhash(read_file, hash_size=hash_size))

        if comp_hash not in hash_dict:
            hash_dict[comp_hash] = file
        else:
            print('Duplicate file: ', file)
            duplicates.append(file)
            num_duplicates+=1
    
    print('\nTotal duplicate files:', num_duplicates)
    print("-----"*10)
    return hash_dict, duplicates


def remove_duplicates(base_dir):

    _, duplicates = find_similar_images(base_dir, hash_size=12)

    if not len(duplicates):
        print('No duplicates found!')

    else:
        print("Removing duplicates...")

        for dup_file in duplicates:
            file_path = os.path.join(base_dir, dup_file)

            if os.path.exists(file_path):
                os.remove(file_path)
            else:
                print('Filepath: ', file_path, 'does not exists.')
        

        print('All duplicates removed!')
    
    print('***'*10,'\n')

if __name__ == "__main__":
    remove_duplicates('sample_1')