基于物品的協同過濾算法:給使用者推薦與他之前喜歡的物品相似的物品;
基于使用者的協同過濾算法:給使用者推薦與他興趣相似的使用者喜歡的物品;
實作協同過濾算法,大緻幾個關鍵步驟:
1:根據曆史資料收集使用者偏好
2:過濾噪音
3:計算相似度(餘弦相似度計算、歐氏距離計算、Jaccard系數、皮爾森相關系數),根據特定的業務進行計算
4:找到與目标最相鄰的K個相似的使用者(基于使用者userCF)或物品(基于物品itemCF)
5:根據特定的業務進行過濾資料進行推薦
userCF和itemCF差別
userCF推薦是與使用者興趣相同的使用者們喜歡的物品,哪怕使用者根本不認識這些人
itemCF推薦是與使用者之前感興趣的物品相似的物品
userCF更橫向更社會化
itemCF更縱向更個性化
userCF要維護一個使用者相似度矩陣,适合使用者較少的場合
itemCF要維護一個物品相似度矩陣,适合物品較少的場合
處理使用者
case class Weight(hot:Double,score:Double)
case class UserBean(userId:String,videoCode:String,weight:Weight)
case class ItemBean(videoCode:String,weight:Weight)
// 使用者-物品倒排清單
val userMap = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
// 物品-使用者倒排清單
val itemMap = mutable.HashMap[String,mutable.HashSet[String]]()
def readData(userArray: Array[UserBean]): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={
userArray.foreach(u=>{
val user = userMap.getOrElse(u.userId,mutable.HashMap[String,Weight]())
if(user.isEmpty) userMap += (u.userId -> user)
user += (u.videoCode -> u.weight)
val item = itemMap.getOrElse(u.videoCode,mutable.HashSet[String]())
if(item.isEmpty) itemMap += (u.videoCode -> item)
item += u.userId
})
userMap
}
itemCF資料計算
def itemResult(): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={
val bool = true
val itemMatrix = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
val itemCount = mutable.HashMap[String,Double]()
userMap.foreach(u=>{
u._2.foreach(i=>{
val iMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
if(iMap.isEmpty) itemMatrix += (i._1 -> iMap)
itemCount += (i._1 -> (itemCount.getOrElse(i._1,0.0)+1))
u._2.foreach(j=>{
if(!i._1.equals(j._1)){
// 計算權重的方式有很多種
val w = iMap.getOrElse(j._1,Weight(0.0,0.0))
if(bool){
iMap += (j._1 -> Weight((1.0/math.log1p(u._2.size))+w.hot,0+w.score))
}else{
// 使用者對物品帶有評分、喜愛度等的,可以使用使用者對對物品的權重進行計算距離
iMap += (j._1 -> Weight(math.min(i._2.hot,j._2.hot)/math.max(i._2.hot,j._2.hot)+w.hot,
math.min(i._2.hot,j._2.score)/math.max(i._2.score,j._2.hot)+w.score))
// 也可以使用其他計算距離的公式計算權重,如: 餘弦相似度、歐幾裡得距離、皮爾遜相關系數等
}
}
})
})
})
if(bool){
itemMatrix.foreach(i=>{
i._2.foreach(j=>{
val jMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
// 1/log1+|N(i)|/sprt(N(i)*N(j)) 懲罰了使用者u和使用者j共同興趣清單中熱門物品對他們相似度的影響
val weight = jMap.getOrElse(j._1,Weight(0,0)).hot/math.sqrt(itemCount.getOrElse(i._1,0.0)*itemCount.getOrElse(j._1,0.0))
jMap += (j._1 -> Weight(weight,0.0))
})
})
}
itemMatrix
}
itemCF對使用者進行推薦
def getItemRecommend(itemMatrix:mutable.HashMap[String,mutable.HashMap[String,Weight]],userArray: Array[UserBean],
k:Int): Unit ={
val set = mutable.HashSet[String]()
set ++= userArray.map(_.videoCode)
val recommends = mutable.HashMap[String,Weight]()
userArray.foreach(u=>{
val videoList = itemMatrix.getOrElse(u.videoCode,mutable.HashMap[String,Weight]())
// 選擇權重高的前k個相似的物品推薦
videoList.toList.sortBy(_._2.hot).reverse.take(k).map(v=>{
if(!set.contains(v._1)){
// 推薦給使用者物品的權重 = 使用者目前物品權重 * 目前物品的相關物品權重
// recommends += (v._1 -> Weight((recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot)*u.weight.hot,(0.0+v._2.score)*v._2.score))
recommends += (v._1 -> Weight(recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot,(0.0+v._2.score)*v._2.score))
}
})
})
println(recommends.toList.sortBy(_._2.hot).reverse.take(10).mkString(";"))
}
userCF資料計算
def userResult(): mutable.HashMap[String,mutable.HashMap[String,Double]] ={
val userCount = mutable.HashMap[String,Double]()
val userMatrix = mutable.HashMap[String,mutable.HashMap[String,Double]]()
val bool = true
itemMap.foreach(us=>{
us._2.foreach(u=>{
val uMap = userMatrix.getOrElse(u,mutable.HashMap[String,Double]())
if(uMap.isEmpty) userMatrix += (u -> uMap)
userCount += (u -> (userCount.getOrElse(u,0.0)+1))
us._2.foreach(v=>{
if(!u.equals(v)){
if(bool){
uMap += (v -> (1.0/math.log1p(us._2.size*1.0)+uMap.getOrElse(v,0.0)))
}
}
})
})
})
if(bool){
userMatrix.foreach(u=>{
u._2.foreach(v=>{
val jMap = userMatrix.getOrElse(u._1,mutable.HashMap[String,Double]())
// 1/log1+|N(i)|/sprt(N(i)*N(j)) 懲罰了使用者u和使用者j共同興趣清單中熱門物品對他們相似度的影響
val weight = jMap.getOrElse(v._1,0.0)/math.sqrt(userCount.getOrElse(u._1,0.0)*userCount.getOrElse(v._1,0.0))
jMap += (v._1 -> weight)
})
})
}
userMatrix
}
def getUserRecommend(userMatrix : mutable.HashMap[String,mutable.HashMap[String,Double]],userArray: Array[UserBean],
k:Int): Unit ={
val set = mutable.HashSet[String]()
set ++= userArray.map(_.videoCode)
val userId = userArray(0).userId
val recommends = mutable.HashMap[String,Double]()
val users = userMatrix.getOrElse(userId,null)
if(users == null) return
// 選擇使用者權重高的前k個使用者的物品做推薦
users.toList.sortBy(_._2).reverse.take(k).foreach(u=>{
val userVideo = userMap.getOrElse(u._1,null)
if(userVideo != null){
userVideo.foreach(v=>{
if(!set.contains(v._1)){
// 使用者對應的物品權重可以以多種形式計算
recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+u._2))
// recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+v._2.hot))
// recommends += (v._1 -> (u._2*v._2.hot + recommends.getOrElse(v._1,0.0)))
}
})
}
})
println(userId +" : "+recommends.toList.sortBy(_._2).reverse.take(10).mkString(";"))
}
def main(args: Array[String]): Unit = {
var userArray = Array[UserBean]()
userArray = userArray :+ UserBean("1","A",Weight(2.0,0.0))
userArray = userArray :+ UserBean("2","B",Weight(3.0,0.0))
userArray = userArray :+ UserBean("1","B",Weight(4.0,0.0))
userArray = userArray :+ UserBean("1","C",Weight(1.0,0.0))
userArray = userArray :+ UserBean("3","C",Weight(1.0,0.0))
userArray = userArray :+ UserBean("4","H",Weight(1.0,0.0))
userArray = userArray :+ UserBean("4","A",Weight(1.0,0.0))
userArray = userArray :+ UserBean("4","B",Weight(1.0,0.0))
userArray = userArray :+ UserBean("5","E",Weight(1.0,0.0))
userArray = userArray :+ UserBean("5","A",Weight(1.0,0.0))
readData(userArray)
println("=======itemCF")
val b = itemResult()
val u = userArray.groupBy(_.userId)
getItemRecommend(b,u.getOrElse("1",null),20)
getItemRecommend(b,u.getOrElse("2",null),20)
getItemRecommend(b,u.getOrElse("3",null),20)
getItemRecommend(b,u.getOrElse("4",null),20)
println("=======userCF")
val uMap = userResult()
getUserRecommend(uMap,u.getOrElse("1",null),20)
getUserRecommend(uMap,u.getOrElse("2",null),20)
getUserRecommend(uMap,u.getOrElse("3",null),20)
getUserRecommend(uMap,u.getOrElse("4",null),20)
}