天天看點

hadoop3.1.3-spark-2.4.6-bin-hadoop2.7-hive-3.1.2單機版本安裝

環境:centos7.4

vim /etc/profile

export JAVA_HOME=/usr/java/java8
export JRE_HOME=/usr/java/java8/jre
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export HIVE_HOME=/usr/java/hive
export HBASE_HOME=/usr/java/hbase
export SPARK_HOME=/usr/java/spark
export SCALA_HOME=/usr/java/scala
export HADOOP_HOME=/usr/java/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin:$HADOOP_HOME/bin:$SCALA_HOME/bin:$HADOOP_HOME/sbin:$HBASE_HOME/bin:$HIVE_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin
           

一、安裝hadoop

修改配置檔案:core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl" target="_blank" rel="external nofollow"  target="_blank" rel="external nofollow"  target="_blank" rel="external nofollow" ?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
       <property>
		<name>fs.defaultFS</name>
		<value>hdfs://bigdata:8020</value>
	</property>
	<!-- 臨時檔案存儲目錄 -->
	<property>
		<name>hadoop.tmp.dir</name>
		<value>/usr/java/hadoop/datas/tmp</value>
	</property>
    <!--  緩沖區大小,實際工作中根據伺服器性能動态調整 -->
	<property>
		<name>io.file.buffer.size</name>
		<value>8192</value>
	</property>
    <!--  開啟hdfs的垃圾桶機制,删除掉的資料可以從垃圾桶中回收,機關分鐘 -->
	<property>
		<name>fs.trash.interval</name>
		<value>10080</value>
	</property>
	<property>
		<name>hadoop.proxyuser.hadoop.hosts</name>
		<value>*</value>
	</property>
	<property>
		<name>hadoop.proxyuser.hadoop.groups</name>
		<value>root</value>
	</property>
</configuration>
           

修改hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl" target="_blank" rel="external nofollow"  target="_blank" rel="external nofollow"  target="_blank" rel="external nofollow" ?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
		<name>dfs.namenode.name.dir</name>
		<value>file:///usr/java/hadoop/datas/datas/namenode/namenodedatas</value>
	</property>
	<property>
		<name>dfs.blocksize</name>
		<value>134217728</value>
	</property>
	<property>
		<name>dfs.namenode.handler.count</name>
		<value>10</value>
	</property>
	<property>
		<name>dfs.datanode.data.dir</name>
		<value>file:///usr/java/hadoop/datas/datas/datanode/datanodeDatas</value>
	</property>
	<property>
		<name>dfs.namenode.http-address</name>
		<value>bigdata:50070</value>
	</property>
	<property>
		<name>dfs.replication</name>
		<value>1</value>
	</property>
	<property>
		<name>dfs.permissions.enabled</name>
		<value>false</value>
	</property>
	<property>
		<name>dfs.namenode.checkpoint.edits.dir</name>
		<value>file:///usr/java/hadoop/datas/dfs/nn/snn/edits</value>
	</property>
	<property>
		<name>dfs.namenode.secondary.http-address</name>
		<value>bigdata:50090</value>
	</property>
	<property>
		<name>dfs.namenode.edits.dir</name>
		<value>file:///usr/java/hadoop/datas/datas/dfs/nn/edits</value>
	</property>
	<property>
		<name>dfs.namenode.checkpoint.dir</name>
		<value>file:///usr/java/hadoop/datas/datas/dfs/snn/name</value>
	</property>
</configuration>
           

修改mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl" target="_blank" rel="external nofollow"  target="_blank" rel="external nofollow"  target="_blank" rel="external nofollow" ?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
	<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>
	<property>
		<name>mapreduce.map.memory.mb</name>
		<value>1024</value>
	</property>
	<property>
		<name>mapreduce.map.java.opts</name>
		<value>-Xmx512M</value>
	</property>
	<property>
		<name>mapreduce.reduce.memory.mb</name>
		<value>1024</value>
	</property>
	<property>
		<name>mapreduce.reduce.java.opts</name>
		<value>-Xmx512M</value>
	</property>
	<property>
		<name>mapreduce.task.io.sort.mb</name>
		<value>256</value>
	</property>
	<property>
		<name>mapreduce.task.io.sort.factor</name>
		<value>100</value>
	</property>
	<property>
		<name>mapreduce.reduce.shuffle.parallelcopies</name>
		<value>25</value>
	</property>
	<property>
		<name>mapreduce.jobhistory.address</name>
		<value>bigdata:10020</value>
	</property>
	<property>
		<name>mapreduce.jobhistory.webapp.address</name>
		<value>bigdata:19888</value>
	</property>
	<property>
		<name>mapreduce.jobhistory.intermediate-done-dir</name>
		<value>/usr/java/hadoop/datas/jobhsitory/intermediateDoneDatas</value>
	</property>
	<property>
		<name>mapreduce.jobhistory.done-dir</name>
		<value>/usr/java/hadoop/datas/jobhsitory/DoneDatas</value>
	</property>
	<property>
	  <name>yarn.app.mapreduce.am.env</name>
	  <value>HADOOP_MAPRED_HOME=/usr/java/hadoop</value>
	</property>
	<property>
	  <name>mapreduce.map.env</name>
	  <value>HADOOP_MAPRED_HOME=/usr/java/hadoop/</value>
	</property>
	<property>
	  <name>mapreduce.reduce.env</name>
	  <value>HADOOP_MAPRED_HOME=/usr/java/hadoop</value>
	</property>
</configuration>
           

修改yarn-site.xml

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
	<property>
		<name>dfs.namenode.handler.count</name>
		<value>100</value>
	</property>
	<property>
		<name>yarn.log-aggregation-enable</name>
		<value>true</value>
	</property>
	<property>
		<name>yarn.resourcemanager.address</name>
		<value>bigdata:8032</value>
	</property>
	<property>
		<name>yarn.resourcemanager.scheduler.address</name>
		<value>bigdata:8030</value>
	</property>
	<property>
		<name>yarn.resourcemanager.resource-tracker.address</name>
		<value>bigdata:8031</value>
	</property>
	<property>
		<name>yarn.resourcemanager.admin.address</name>
		<value>bigdata:8033</value>
	</property>
	<property>
		<name>yarn.resourcemanager.webapp.address</name>
		<value>bigdata:8088</value>
	</property>
	<property>
		<name>yarn.resourcemanager.hostname</name>
		<value>bigdata</value>
	</property>
	<property>
		<name>yarn.scheduler.minimum-allocation-mb</name>
		<value>1024</value>
	</property>
	<property>
		<name>yarn.scheduler.maximum-allocation-mb</name>
		<value>2048</value>
	</property>
	<property>
		<name>yarn.nodemanager.vmem-pmem-ratio</name>
		<value>2.1</value>
	</property>
	<!-- 設定不檢查虛拟記憶體的值,不然記憶體不夠會報錯 -->
	<property>
		<name>yarn.nodemanager.vmem-check-enabled</name>
		<value>false</value>
	</property>
	<property>
		<name>yarn.nodemanager.resource.memory-mb</name>
		<value>1024</value>
	</property>
	<property>
		<name>yarn.nodemanager.resource.detect-hardware-capabilities</name>
		<value>true</value>
	</property>
	<property>
		<name>yarn.nodemanager.local-dirs</name>
		<value>file:///usr/java/hadoop/datas/nodemanager/nodemanagerDatas</value>
	</property>
	<property>
		<name>yarn.nodemanager.log-dirs</name>
		<value>file:///usr/java/hadoop/datas/nodemanager/nodemanagerLogs</value>
	</property>
	<property>
		<name>yarn.nodemanager.log.retain-seconds</name>
		<value>10800</value>
	</property>
	<property>
		<name>yarn.nodemanager.remote-app-log-dir</name>
		<value>/usr/java/hadoop/datas/remoteAppLog/remoteAppLogs</value>
	</property>
	<property>
		<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
		<value>logs</value>
	</property>
	<property>
		<name>yarn.nodemanager.aux-services</name>
		<value>mapreduce_shuffle</value>
	</property>
	<property>
		<name>yarn.log-aggregation.retain-seconds</name>
		<value>18144000</value>
	</property>
	<property>
		<name>yarn.log-aggregation.retain-check-interval-seconds</name>
		<value>86400</value>
	</property>
	<!-- yarn上面運作一個任務,最少需要1.5G記憶體,虛拟機沒有這麼大的記憶體就調小這個值,不然會報錯 -->
	<property>
        <name>yarn.app.mapreduce.am.resource.mb</name>
        <value>1024</value>
	</property>
</configuration>
           

二、安裝Hive

(1) 複制mysql驅動程式到hive的lib目錄下。

(2) 配置hive-site.xml

vim hive-site.xml

<configuration>
<property>
	<name>javax.jdo.option.ConnectionPassword</name>
	<value>root</value>
	<description>password to use against metastore database</description>
</property>
<property>
	<name>javax.jdo.option.ConnectionUserName</name>
	<value>root</value>
	<description>Username to use against metastore database</description>
</property>
<property>
	<name>javax.jdo.option.ConnectionURL</name>
	<value>jdbc:mysql://127.0.0.1:3306/hive</value>
</property>
<property>
	<name>javax.jdo.option.ConnectionDriverName</name>
	<value>com.mysql.jdbc.Driver</value>
	<description>Driver class name for a JDBC metastore</description>
</property>
</configuration>
           

(3) 在msyql中建立存放hive資訊的資料庫

create database if not exists hive;

insert into mysql.user(Host,User,Password)values('%','hadoop',password('hadoop'));

grant all privileges on hivedb.* to 'hadoop'@'%' identified by 'hadoop';

(4) 初始化hive的中繼資料(表結構)到mysql中

cd /soft/hive/bin

schematool -dbType mysql -initSchema

不同版本的hadoop或hive裡面的guava包版本都不同,重點是hadoop裡的包比hive裡的版本高。

解決問題:

删除hive中低版本的guava包,把hadoop裡的複制到hive的lib目錄下即可。

三、安裝spark

vim spark-env.sh
export SCALA_HOME=/usr/java/scala
export JAVA_HOME=/usr/java/java8
export HADOOP_HOME=/usr/java/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_LOG_DIR=/usr/java/spark/logs
export SPARK_PID_DIR=/usr/java/spark
export SPARK_DRIVER_MEMORY=512M
export SPARK_MASTER_HOST=bigdata

vim spark-defaults.conf
spark.yarn.jars=hdfs://ns/spark-lib/*.jar
spark.yarn.am.memory=512M
spark.driver.memory=512M
spark.executor.memory=512M
spark.driver.extraClassPath=/usr/java/spark/lib/*