檔案中資料格式:1^A1002^A1522718982.606^time_local=03/Apr/2018:09:29:42 +0800&http_referer=-&http_user_agent=Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36&-=master1^A/do?t=99b61873a98742a3a29a4a6d64bc043f&en=pv&ct=1521163377&v=1&pl=pc
源代碼:
import org.apache.spark._
object LogTransfer {
def transfer(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("LogTransfer").setMaster("local")
val sc = new SparkContext(sparkConf)
val logFile = sc.textFile(args(0))
val logLines = logFile.map(line => line.split("""\^A/do\?|\^A"""))
// logLines.first.foreach(println)
// 此處無法隻能用foreach,不能用map,為什麼????????
logLines.foreach(log =>{
println("---------------------------------")
log.foreach(println)
// 業務資料代碼......
})
sc.stop()
}
def main(args: Array[String]): Unit = {
// args傳入檔案路徑
transfer(args)
}}