Update Image.py

Add support for difference-aware hash(dHash) calculation. DHash value can be used to quickly determine the similarity of two pictures.
This commit is contained in:
dust-to-dust 2022-04-08 10:19:27 +08:00 committed by GitHub
parent 4996f84fb3
commit 998116bf08
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2692,6 +2692,78 @@ class Image:
if not ImageQt.qt_is_installed:
raise ImportError("Qt bindings are not installed")
return ImageQt.toqpixmap(self)
def dhash(self) -> str:
"""
Calculate the dHash value of the picture. DHash value can be used to quickly determine the similarity of two pictures.
"""
difference = self._difference(self)
decimal_value = 0
hash_string = ""
for index, value in enumerate(difference):
if value:
decimal_value += value * (2 ** (index % 8))
if index % 8 == 7:
hash_string += str(hex(decimal_value)[2:].rjust(2, "0"))
decimal_value = 0
return hash_string
def is_similar_to(self, image, hamming_distance: int = 3) -> bool:
"""
:param image: PIL.Image / dHash(str)
:param hamming_distance:
:return: bool
"""
assert 0 <= hamming_distance <= 64
dis = self.hamming_distance(image)
return True if dis <= hamming_distance else False
def hamming_distance(self, image) -> int:
"""
Calculate the Hamming distance between two pictures (based on dHash algorithm)
:param image: PIL.Image / dHash(str)
:return: hamming distance. The larger the value, the greater the difference between the two pictures;
otherwise, the more similar they are.
"""
if isinstance(image, str):
return self._hamming_distance_with_hash(image)
image1_diff = self._difference(self)
image2_diff = self._difference(image)
return sum(image1_diff ^ image2_diff)
@staticmethod
def _difference(image):
"""
*Private method*
Calculate difference sequence
:param image: PIL.Image
:return: bool list
"""
from numpy import array
resize_width = 9
resize_height = 8
difference = [] # 64-bit boolean sequence
# convert to 9*8 grayscale image
data = array(image.resize((resize_width, resize_height)).convert('L'))
for row in range(resize_height):
for col in range(resize_width - 1):
difference.append(data[row, col] > data[row, col + 1])
return array(difference)
def _hamming_distance_with_hash(self, dhash: str) -> int:
"""
*Private method*
calculate hamming distance with perceptual hash
:return: hamming_distance(int)
"""
difference = (int(self.dhash(), 16)) ^ (int(dhash, 16))
return bin(difference).count("1")
# --------------------------------------------------------------------