天天看點

python查找相似圖檔

pip install nanopq                

Ks 是8的倍數,需要比資料次元小

向量長度需要是M的倍數
預設按照距離排序,非常不錯


import datetime
import heapq

import os

import cv2
import nanopq
import numpy as np


import nanopq
import numpy as np
import shutil


def dHash(gray,height):
    #縮放8*8
    # gray=cv2.resize(img,(64,63),interpolation=cv2.INTER_CUBIC)
    #轉換灰階圖
    # gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    hash_str=''
    #每行前一個像素大于後一個像素為1,相反為0,生成哈希
    for i in range(height):
        for j in range(height):
            if gray[i,j]>gray[i,j+1]:
                hash_str=hash_str+'1'
            else:
                hash_str=hash_str+'0'
    return hash_str

def pq_dis():
    N, D = 10000, 128
    X = np.random.random((N, D)).astype(np.float32)  # 10,000 128-dim vectors
    query = np.random.random((D,)).astype(np.float32)  # a 128-dim vector

    # Instantiate with M=8 sub-spaces
    pq = nanopq.PQ(M=8,Ks=256)

    # Train with the top 1000 vectors
    pq.fit(X[:1000])

    # Encode to PQ-codes
    X_code = pq.encode(X)  # (10000, 8) with dtype=np.uint8

    time1=datetime.datetime.now()
    # Results: create a distance table online, and compute Asymmetric Distance to each PQ-code
    dists = pq.dtable(query).adist(X_code)

    nsmallestList = heapq.nsmallest(5, dists)
    print(nsmallestList)
    indexs=[dists.tolist().index(i) for i in nsmallestList]
    print(indexs)
    print(dists[indexs])
    print("time",(datetime.datetime.now()-time1).microseconds)

if __name__ == '__main__':
    path=r"C:\Users\Administrator\Documents\Tencent Files\441648051\FileRecv\pic_error"
    files=os.listdir(path)

    datas=[]
    for file in files:
        img_1=cv2.imread(path+"/"+file,0)
        img1 = cv2.resize(img_1, (65, 64), interpolation=cv2.INTER_LINEAR)
        dhash=dHash(img1,64)
        data= list(map(int,dhash))
        datas.append(data)
    datas=np.asarray(datas,dtype=np.float32)
    N=len(datas)
    D=64*64

    query =datas[0]# np.random.random((D,)).astype(np.float32)  # a 128-dim vector

    # Instantiate with M=8 sub-spaces
    pq = nanopq.PQ(M=8,Ks=48)

    # Train with the top 1000 vectors
    pq.fit(datas)

    # Encode to PQ-codes
    X_code = pq.encode(datas)  # (10000, 8) with dtype=np.uint8

    time1=datetime.datetime.now()
    # Results: create a distance table online, and compute Asymmetric Distance to each PQ-code
    dists = pq.dtable(query).adist(X_code)

    nsmallestList = heapq.nsmallest(54, dists)
    print(nsmallestList)
    indexs=[dists.tolist().index(i) for i in nsmallestList]
    print(indexs)
    for i in indexs:
        print(files[i])
        shutil.copy(path+"/"+files[i], "out/"+str(dists[i])+"_"+files[i])
    print("time",(datetime.datetime.now()-time1).microseconds)