天天看點

協同過濾算法代碼(itemCF\userCF)

基于物品的協同過濾算法:給使用者推薦與他之前喜歡的物品相似的物品;

基于使用者的協同過濾算法:給使用者推薦與他興趣相似的使用者喜歡的物品;

實作協同過濾算法,大緻幾個關鍵步驟:

1:根據曆史資料收集使用者偏好

2:過濾噪音

3:計算相似度(​​餘弦相似度計算​​、​​歐氏距離計算​​、Jaccard系數、皮爾森相關系數),根據特定的業務進行計算

4:找到與目标最相鄰的K個相似的使用者(基于使用者userCF)或物品(基于物品itemCF)

5:根據特定的業務進行過濾資料進行推薦

userCF和itemCF差別

userCF推薦是與使用者興趣相同的使用者們喜歡的物品,哪怕使用者根本不認識這些人

itemCF推薦是與使用者之前感興趣的物品相似的物品

userCF更橫向更社會化

itemCF更縱向更個性化

userCF要維護一個使用者相似度矩陣,适合使用者較少的場合

itemCF要維護一個物品相似度矩陣,适合物品較少的場合

處理使用者

case class Weight(hot:Double,score:Double)
  case class UserBean(userId:String,videoCode:String,weight:Weight)
  case class ItemBean(videoCode:String,weight:Weight)

  //    使用者-物品倒排清單
  val userMap = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
  //    物品-使用者倒排清單
  val itemMap = mutable.HashMap[String,mutable.HashSet[String]]()

  def readData(userArray: Array[UserBean]): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={

    userArray.foreach(u=>{
      val user = userMap.getOrElse(u.userId,mutable.HashMap[String,Weight]())
      if(user.isEmpty) userMap += (u.userId -> user)
      user += (u.videoCode -> u.weight)

      val item = itemMap.getOrElse(u.videoCode,mutable.HashSet[String]())
      if(item.isEmpty) itemMap += (u.videoCode -> item)
      item += u.userId

    })

    userMap
  }      

itemCF資料計算

def itemResult(): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={

    val bool = true

    val itemMatrix = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
    val itemCount = mutable.HashMap[String,Double]()
    userMap.foreach(u=>{
      u._2.foreach(i=>{
        val iMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
        if(iMap.isEmpty) itemMatrix += (i._1 -> iMap)
        itemCount += (i._1 -> (itemCount.getOrElse(i._1,0.0)+1))

        u._2.foreach(j=>{
          if(!i._1.equals(j._1)){
//            計算權重的方式有很多種
            val w = iMap.getOrElse(j._1,Weight(0.0,0.0))
            if(bool){
              iMap += (j._1 -> Weight((1.0/math.log1p(u._2.size))+w.hot,0+w.score))
            }else{

//            使用者對物品帶有評分、喜愛度等的,可以使用使用者對對物品的權重進行計算距離
              iMap += (j._1 -> Weight(math.min(i._2.hot,j._2.hot)/math.max(i._2.hot,j._2.hot)+w.hot,
                math.min(i._2.hot,j._2.score)/math.max(i._2.score,j._2.hot)+w.score))

//            也可以使用其他計算距離的公式計算權重,如: 餘弦相似度、歐幾裡得距離、皮爾遜相關系數等
            }

          }
        })
      })
    })

    if(bool){
      itemMatrix.foreach(i=>{
        i._2.foreach(j=>{
          val jMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
//        1/log1+|N(i)|/sprt(N(i)*N(j)) 懲罰了使用者u和使用者j共同興趣清單中熱門物品對他們相似度的影響
          val weight = jMap.getOrElse(j._1,Weight(0,0)).hot/math.sqrt(itemCount.getOrElse(i._1,0.0)*itemCount.getOrElse(j._1,0.0))
          jMap += (j._1 -> Weight(weight,0.0))

        })
      })
    }

    itemMatrix
  }      

itemCF對使用者進行推薦

def getItemRecommend(itemMatrix:mutable.HashMap[String,mutable.HashMap[String,Weight]],userArray: Array[UserBean],
                       k:Int): Unit ={

    val set = mutable.HashSet[String]()
    set ++= userArray.map(_.videoCode)

    val recommends = mutable.HashMap[String,Weight]()

    userArray.foreach(u=>{
      val videoList = itemMatrix.getOrElse(u.videoCode,mutable.HashMap[String,Weight]())
//      選擇權重高的前k個相似的物品推薦
      videoList.toList.sortBy(_._2.hot).reverse.take(k).map(v=>{
        if(!set.contains(v._1)){
//          推薦給使用者物品的權重 =  使用者目前物品權重 * 目前物品的相關物品權重
//          recommends += (v._1 -> Weight((recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot)*u.weight.hot,(0.0+v._2.score)*v._2.score))
          recommends += (v._1 -> Weight(recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot,(0.0+v._2.score)*v._2.score))
        }
      })
    })

    println(recommends.toList.sortBy(_._2.hot).reverse.take(10).mkString(";"))
  }      

userCF資料計算

def userResult(): mutable.HashMap[String,mutable.HashMap[String,Double]] ={
    val userCount = mutable.HashMap[String,Double]()
    val userMatrix = mutable.HashMap[String,mutable.HashMap[String,Double]]()

    val bool = true

    itemMap.foreach(us=>{

      us._2.foreach(u=>{
        val uMap = userMatrix.getOrElse(u,mutable.HashMap[String,Double]())
        if(uMap.isEmpty) userMatrix += (u -> uMap)
        userCount += (u -> (userCount.getOrElse(u,0.0)+1))

        us._2.foreach(v=>{
          if(!u.equals(v)){
            if(bool){
              uMap += (v -> (1.0/math.log1p(us._2.size*1.0)+uMap.getOrElse(v,0.0)))
            }
          }
        })
      })
    })

    if(bool){
      userMatrix.foreach(u=>{
        u._2.foreach(v=>{
          val jMap = userMatrix.getOrElse(u._1,mutable.HashMap[String,Double]())
          //        1/log1+|N(i)|/sprt(N(i)*N(j)) 懲罰了使用者u和使用者j共同興趣清單中熱門物品對他們相似度的影響
          val weight = jMap.getOrElse(v._1,0.0)/math.sqrt(userCount.getOrElse(u._1,0.0)*userCount.getOrElse(v._1,0.0))
          jMap += (v._1 -> weight)
        })
      })
    }

    userMatrix
  }      
def getUserRecommend(userMatrix : mutable.HashMap[String,mutable.HashMap[String,Double]],userArray: Array[UserBean],
                       k:Int): Unit ={
    val set = mutable.HashSet[String]()
    set ++= userArray.map(_.videoCode)

    val userId = userArray(0).userId

    val recommends = mutable.HashMap[String,Double]()

    val users = userMatrix.getOrElse(userId,null)
    if(users == null) return

//    選擇使用者權重高的前k個使用者的物品做推薦
    users.toList.sortBy(_._2).reverse.take(k).foreach(u=>{

      val userVideo = userMap.getOrElse(u._1,null)
      if(userVideo != null){
        userVideo.foreach(v=>{
          if(!set.contains(v._1)){

//          使用者對應的物品權重可以以多種形式計算
            recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+u._2))

//          recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+v._2.hot))
//          recommends += (v._1 -> (u._2*v._2.hot + recommends.getOrElse(v._1,0.0)))
          }
        })
      }

    })
    println(userId +" : "+recommends.toList.sortBy(_._2).reverse.take(10).mkString(";"))
  }      
def main(args: Array[String]): Unit = {

    var userArray = Array[UserBean]()
    userArray = userArray :+ UserBean("1","A",Weight(2.0,0.0))
    userArray = userArray :+ UserBean("2","B",Weight(3.0,0.0))
    userArray = userArray :+ UserBean("1","B",Weight(4.0,0.0))
    userArray = userArray :+ UserBean("1","C",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("3","C",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("4","H",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("4","A",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("4","B",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("5","E",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("5","A",Weight(1.0,0.0))

    readData(userArray)
    println("=======itemCF")
    val b = itemResult()
    val u = userArray.groupBy(_.userId)
    getItemRecommend(b,u.getOrElse("1",null),20)
    getItemRecommend(b,u.getOrElse("2",null),20)
    getItemRecommend(b,u.getOrElse("3",null),20)
    getItemRecommend(b,u.getOrElse("4",null),20)


    println("=======userCF")
    val uMap = userResult()
    getUserRecommend(uMap,u.getOrElse("1",null),20)
    getUserRecommend(uMap,u.getOrElse("2",null),20)
    getUserRecommend(uMap,u.getOrElse("3",null),20)
    getUserRecommend(uMap,u.getOrElse("4",null),20)
  }      
上一篇: 什麼是GC

繼續閱讀