天天看點

【推薦系統】UserCF基于使用者的協同過濾

#movielens資料集,代碼隻考慮該使用者是否對某些物品感興趣,不管評分,有分數就認為感興趣
import pandas as pd
from collections import defaultdict
import numpy as np

# 讀取檔案
ratings = pd.read_csv('./movielens/ratings.csv') 
trainset=ratings.sample(frac=0.75,random_state=0,axis=0)
testset=ratings[~ratings.index.isin(trainset.index)]

user_item=defaultdict(list)#使用者-物品表
item_user=defaultdict(list)#倒排表
for idx,item in trainset.iterrows():
    userId=int(item["userId"])
    movieId=int(item["movieId"])
    rating=item["rating"]
#     print(userId,movieId,rating)
    user_item[userId].append(movieId)
    item_user[movieId].append(userId)

#建構使用者共現矩陣
w=np.zeros([611,611])
item_user=dict(sorted(item_user.items(), key=lambda x: x[0]))
for item,users in item_user.items():
    for u in users:
        for v in users:
            if(u!=v):
                w[u][v]+=1
                w[v][u]+=1

#構造使用者相似矩陣
for i in range(len(w)):
    for j in range(0,i):
        if(len(user_item[i])*len(user_item[j])==0):
            w[i][j]=w[j][i]=0.0
        else:
            w[i][j]=w[j][i]=w[i][j]/np.sqrt(len(user_item[i])*len(user_item[j]))


recommendations=defaultdict(list)
def get_recommendations(uid):
    scores={}
    #擷取相似使用者的id
    arg=np.argsort(w[uid])
    similar_users=arg[::-1]
    for idx in similar_users[:10]:#找到K個最相似的使用者
        for item in user_item[idx]:#每個相似使用者喜歡的電影
            if(item not in user_item[uid]):#該使用者沒看過的電影
                score=0.0
                for user in item_user[item]:#計算得分
                    score+=w[uid][user]*1.0
                scores[item]=score
    scores=dict(sorted(scores.items(), key=lambda x: x[1],reverse=True))
    for x in list(scores.keys())[:10]:
        recommendations[uid].append(x)

for i in range(1,611):
    get_recommendations(i)

def cal_precision():
    user_item_test=defaultdict(list)
    for idx,item in testset.iterrows():
        userId=int(item["userId"])
        movieId=int(item["movieId"])
        rating=item["rating"]
        user_item_test[userId].append(movieId)
    precision=0.0
    for i in range(1,611):
        real=set(user_item_test[i])
        est=set(recommendations[i])
        precision+=len(real&est)/len(est)
    return precision/610

cal_precision()

with open("./movielens/usercf.csv","w") as f:
    f.write("userId,recommendation\n")
    for i in range(1,620):
        f.write(str(i)+",")
        for x in recommendations[i]:
            f.write(str(x)+" ")
        f.write("\n")
           

繼續閱讀