以下是本人做的测试,如果有错误请及时指正,有问题欢迎一起讨论。
情景:
需要读取HDFS上的数据,处理之后,写入到MySQL数据库里面去。
实现:
1、版本
spark版本:1.2.1
MySQL版本:5.6.21
2、数据准备
HDFS上:
MySQL里:
3、spark代码
package sparkTest
import java.sql.{DriverManager, PreparedStatement, Connection}
import org.apache.spark.{SparkContext, SparkConf}
object sparkToMysql {
case class info(info1: String, info2: Int)
def toMySQL(iterator: Iterator[(String, Int)]): Unit = {
var conn: Connection = null
var ps: PreparedStatement = null
val sql = "insert into info(info1, info2) values (?, ?)"
try {
Class.forName("com.mysql.jdbc.Driver");
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/spark_test", "root", "111111")
iterator.foreach(dataIn => {
ps = conn.prepareStatement(sql)
ps.setString(1, dataIn._1)
ps.setInt(2, dataIn._2)
ps.executeUpdate()
}
)
} catch {
case e: Exception => e.printStackTrace()
} finally {
if (ps != null) {
ps.close()
}
if (conn != null) {
conn.close()
}
}
}
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("sparkToMysql").setMaster("local")
val sc = new SparkContext(conf)
val dataFromHDFS=sc.textFile(args(0)).map(_.split("\\^")).map(line => (line(0),line(1).toInt))
dataFromHDFS.foreachPartition(toMySQL)
}
}
4、运行
/home/lenovo/spark-1.2.1/bin/spark-submit \
--class sparkTest.sparkToMysql \
--driver-class-path /home/lenovo/mysql-connector-java-5.0.8.jar \
/home/lenovo/sparkTest/sparkToMysql.jar \
hdfs://master:9000/sparkToMysql
5、结果