你的努力将成就更好的自己。
矩陣分解并作出推薦
#字元編碼!!!
#-*- Coding: utf-8 -*-
import pandas as pd
import numpy as np
import tensorflow as tf
def data_process():
ratings = pd.read_csv("F:\\ml-latest-small\\ratings.csv")
movies = pd.read_csv("F:\\ml-latest-small\\movies.csv")
movies['movieRow'] = movies.index
movies = movies[['movieRow','movieId','title']]
movies.to_csv("F:\\ml-latest-small\\moviesProcessed.csv",index = False,header = True,encoding = 'utf-8')
print(movies.tail())
ratings = pd.merge(ratings,movies,on = 'movieId')
ratings = ratings[['userId','movieRow','rating']]
ratings.to_csv('F:\\ml-latest-small\\ratingsProcessed.csv', index=False, header=True, encoding='utf-8')
print(ratings.head())
return ratings,movies
#建構矩陣
def build_matrix(ratings):
user_no = ratings['userId'].max()+1
movies_no = ratings['movieRow'].max()+1
rate = np.zeros((movies_no,user_no))
print(rate.shape)
flag = 0
ratings_length = np.shape(ratings)[0]
print(np.shape(ratings))
for index, row in ratings.iterrows():
rate[int(row['movieRow']),int(row['userId'])] = row['rating']
flag+1
record = rate > 0
record = np.array(record,dtype= int)
print(record)
return rate,record,user_no,movies_no
#構模組化型
def normalizeRatings(rate,record):
m,n = rate.shape
rating_mean = np.zeros((m,1))
rating_norm = np.zeros((m,n))
for i in range(m):
idx = (record[i,:]!=0)
rating_mean[i] = np.mean(rate[i,idx])
rating_norm[i,idx] = rate[i,idx] - rating_mean[i]
return rating_norm,rating_mean
def build_model(rate,record,movies_no,user_no):
rating_norm,rating_mean = normalizeRatings(rate,record)
rating_mean = np.nan_to_num(rating_mean)
num_features = 12
x = tf.Variable(tf.random_normal([movies_no,num_features],stddev= 0.35))
theta = tf.Variable(tf.random_normal([user_no,num_features],stddev=0.35))
loss = 1/2 * tf.reduce_sum(((tf.matmul(x, theta, transpose_b=True) - rating_norm) * record) ** 2) + \
0.5*(1/2 * (tf.reduce_sum(x ** 2) + tf.reduce_sum(theta ** 2)))
train = tf.train.AdamOptimizer(1e-3).minimize(loss)
return x,theta,train,loss,rating_mean
#訓練模型
def Train(loss):
tf.summary.scalar('train_loss',loss)
summaryMerged = tf.summary.merge_all()
filename = "F:\\ml-latest-small\\movie_tensorborad.csv"
writer = tf.summary.FileWriter(filename)
return summaryMerged,writer
def recommend(movies):
user_id = input(u'您要想哪位使用者進行推薦?請輸入使用者編号:')
sortedResult = predicts[:, int(user_id)].argsort()[::-1]
# argsort()函數傳回的是數組值從小到大的索引值; argsort()[::-1] 傳回的是數組值從大到小的索引值
print(u'為該使用者推薦的評分最高的20部電影是:'.center(80, '='))
# center() 傳回一個原字元串居中,并使用空格填充至長度 width 的新字元串。預設填充字元為空格。
idx = 0
for i in sortedResult:
print(u'評分: %.2f, 電影名: %s' % (predicts[i, int(user_id)]-2, movies.iloc[i]['title']))
idx += 1
if idx == 20:
break
#評估模型
if __name__ == "__main__":
ratings,movies = data_process()
rate,record,user_no,movies_no =build_matrix(ratings)
x,theta,train,loss,rating_mean = build_model(rate,record,movies_no,user_no)
summaryMerged,writer = Train(loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for i in range(2000):
_,movie_summary = sess.run([train,summaryMerged])
writer.add_summary(movie_summary,i)
current_x,current_theta = sess.run([x,theta])
predicts = np.dot(current_x,current_theta.T) + rating_mean
error = np.sqrt(np.sum(((predicts - rate)*record)**2))
print(u'模型評估errors:', error)
recommend(movies)
複制
結果如下:
movieRow movieId title
9737 9737 193581 Black Butler: Book of the Atlantic (2017)
9738 9738 193583 No Game No Life: Zero (2017)
9739 9739 193585 Flint (2017)
9740 9740 193587 Bungo Stray Dogs: Dead Apple (2018)
9741 9741 193609 Andrew Dice Clay: Dice Rules (1991)
userId movieRow rating
0 1 0 4.0
1 5 0 4.0
2 7 0 4.5
3 15 0 2.5
4 17 0 4.5
(9742, 611)
(100836, 3)
[[0 1 0 ... 1 1 1]
[0 0 0 ... 1 0 0]
[0 1 0 ... 1 0 0]
...
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]]
模型評估errors: 151.9801784601805
您要想哪位使用者進行推薦?請輸入使用者編号:1
==============================為該使用者推薦的評分最高的20部電影是:===============================
評分: 5.39, 電影名: Now You See Me (2013)
評分: 4.43, 電影名: Postman, The (Postino, Il) (1994)
評分: 4.40, 電影名: My Neighbor Totoro (Tonari no Totoro) (1988)
評分: 4.35, 電影名: Color Purple, The (1985)
評分: 4.23, 電影名: The Revenant (2015)
評分: 4.21, 電影名: Smoke (1995)
評分: 4.19, 電影名: Big Sleep, The (1946)
評分: 4.19, 電影名: Drugstore Cowboy (1989)
評分: 4.16, 電影名: Whale Rider (2002)
評分: 4.12, 電影名: Gandhi (1982)
評分: 4.10, 電影名: Murder in the First (1995)
評分: 4.10, 電影名: Lone Star (1996)
評分: 4.03, 電影名: Lifeboat (1944)
評分: 4.00, 電影名: Planes, Trains & Automobiles (1987)
評分: 3.99, 電影名: Moonstruck (1987)
評分: 3.93, 電影名: Remains of the Day, The (1993)
評分: 3.91, 電影名: To Kill a Mockingbird (1962)
評分: 3.90, 電影名: Crash (1996)
評分: 3.89, 電影名: Grave of the Fireflies (Hotaru no haka) (1988)
評分: 3.88, 電影名: Wallace & Gromit: The Best of Aardman Animation (1996)
複制