在現在的環境中hadoop+spark+mpp的OLAP的場景越來越多,學習spark需要的第一步就是搭建測試環境。
一、前提準備
spark或者說hadoop叢集的最小機器就是3台,分别如下:
192.168.206.27 master
192.168.206.33 slave1
192.168.203.19 slave2
1.修改機器名
vi /etc/sysconfig/network
vi /etc/hosts
2.確定編碼一緻
/etc/sysconfig/i18n
這裡需要多說一句,一般情況下我會設定成zh_CN.UTF-8因為一些提示資訊中文會直覺些,但是很多人喜歡en_US.UTF-8,如果需要安裝中文支援
yum groupinstall chinese-support
yum install fonts-chinese.noarch
yum install m17n-db-common-cjk
yum install m17n-db-chinese
vi /etc/sysconfig/i18n
hljs-string">"zh_CN.UTF-8"
SYSFONT="latarcyrheb-sun16"
LC_ALL="zh_CN.UTF-8"
vi /etc/profile
export LC_ALL="zh_CN.UTF-8"
#如果不行reboot試試,還不行localedef -v -c -i zh_CN -f UTF-8 zh_CN.UTF-8
#遇到一次很特殊的,使用yum -y install fontforge解決
3.ntp時間一緻crontab -e
我習慣用ntpdate
* */2 * * * /usr/sbin/ntpdate asia.pool.ntp.org && /sbin/hwclock --systohc
二、安裝HADOOP叢集
1.安裝jdk,3台全部安裝
#上傳 jdk到 /usr/local
cd /usr/local
rpm -ivh jdk-u80-linux-x64.rpm
vi /etc/profile
export JAVA_HOME=/usr/java/jdk1._80
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export JAVA_HOME JAVA_BIN PATH CLASSPATH
PATH=$PATH:$HOME/bin
export PATH
source /etc/profile
Java -version
#ssh [email protected] slave2 上去操作
2.安裝scala
spark1.6安裝2.11版本
#http://www.scala-lang.org/download/2.11.8.html
mkdir -p /home/scala
tar -xzvf scala-.tgz -C /home/scala/
vi /etc/profile
export SCALA_HOME=/home/scala/scala-
PATH=$PATH:$HOME/bin:$SCALA_HOME/bin
export PATH
source /etc/profile
scala -version
scp -r /home/scala [email protected]
scp -r /home/scala [email protected]
#ssh後設定/etc/profile
3.hadoop安裝
mkdir -p /home/hadoop
tar xzvf hadoop-2.7.2.tar.gz -C /home/hadoop/
cd /home/hadoop/hadoop-2.7.2/etc/hadoop
#3.1 hadoop-env.sh配置JAVA_HOME**
export JAVA_HOME=/usr/java/jdk1.7.0_80
#3.2在yarn-env.sh中配置JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_80
#3.3在slaves中配置slave節點的ip或者host,
salve1
slave2
#3.4修改core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000/</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/hadoop-2.7.2/tmp</value>
</property>
</configuration>
#3.5修改hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/hadoop-2.7.2/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/hadoop-2.7.2/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
#3.6 修改mapred-site.xml
cp mapred-site.xml.template mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
3.7 修改yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8035</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
</configuration>
4.分發驗證
#4.1 分發
scp -r hadoop/ [email protected]:/home
scp -r hadoop/ [email protected]:/home
#4.2格式化namenode
#增加環境變量
vi /etc/profile
export HADOOP_HOME=/home/hadoop/hadoop-.
PATH=$PATH:$HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin
export PATH
# slave1 2同樣操作
cd $HADOOP_HOME
bin/hdfs namenode -format
#==提示錯誤nable to load native-hadoop library,
cd lib/native
file libhadoop.so.. #看到是64位的,不是版本問題
#下載下傳:wget http://dl.bintray.com/sequenceiq/sequenceiq-bin/hadoop-native-64-2.7.0.tar
tar xvf hadoop-native--..tar -C native/
#用這個native替換hadoop下版本就可以了
#4.3啟動
sbin/start-dfs.sh
sbin/start-yarn.sh
jps
#master可以看到如下程序:
SecondaryNameNode
NameNode
ResourceManager
Jps
# slave上可以看到程序
NodeManager
Jps
DataNode
#浏覽器:http://master:8088 yarn管理界面
http://master: hdfs管理界面
5.spark安裝
#注意選擇pre-bulit for hadoop2.6 later
mkdir -p /home/spark
tar xzvf spark-.-bin-hadoop2..tgz -C /home/spark/
cd /home/spark/
mv spark-.-bin-hadoop2./ spark-./
cd /home/spark/spark-./conf
#5.1配置
cp spark-env.sh.template spark-env.sh
vi spark-env.sh
export SCALA_HOME=/home/scala/scala-.
export JAVA_HOME=/usr/java/jdk1._80
export HADOOP_HOME=/home/hadoop/hadoop-.
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
SPARK_MASTER_IP=master
SPARK_LOCAL_DIRS=/home/spark/spark-.
SPARK_DRIVER_MEMORY=G
SPARK_WORKER_INSTANCES=
SPARK_WORKER_MEMORY=m
cp slaves.template slaves
vi slaves
slave1
slave2
# 5.2 分發
scp -r /home/spark/ r[email protected]:/home
scp -r /home/spark/ [email protected]:/home
#5.3啟動
sbin/start-all.sh
#jps檢查:master多了個master程序
Jps
SecondaryNameNode
Master
NameNode
ResourceManager
#slave上多了worker程序
DataNode
Worker
Jps
NodeManager
#進入Spark的Web管理頁面: http://master:8080
#5.4為了友善可以在/etc/profile加入spark
#完整的:
export JAVA_HOME=/usr/java/jdk1._80
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export JAVA_HOME JAVA_BIN PATH CLASSPATH
export SCALA_HOME=/home/scala/scala-.
export HADOOP_HOME=/home/hadoop/hadoop-.
export SPARK_HOME=/home/spark/spark-.
PATH=$PATH:$HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$SPARK_HOME/sbin
export PATH
#5.5運作示例
#本地模式兩線程運作
./bin/run-example SparkPi --master local[]
#Spark Standalone 叢集模式運作
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master spark://master: \
lib/spark-examples-.-hadoop2..jar \
#Spark on YARN 叢集上 yarn-cluster 模式運作
./bin/spark-submit \
--class org.apache.spark.examples.SparkPi \
--master yarn-cluster \ # can also be `yarn-client`
lib/spark-examples*.jar \
#注意 Spark on YARN 支援兩種運作模式,分别為yarn-cluster和yarn-client,具體的差別可以看這篇博文,從廣義上講,yarn-cluster适用于生産環境;
#而yarn-client适用于互動和調試,也就是希望快速地看到application的輸出。
spark-shell --version
以上是安裝筆記,後續應用待續…