# hadoop安装
# 本内容你将获得
- hadoop安装环境准备
- Hadoop安装包上传
- CenOS7上安装Hadoop
- 验证Hadoop安装
# 软件安装
# 说明
- hadoop依赖 Java环境
# 环境准备
# 设置免密登录
#安装时进行文件分发
cd /root
ssh-keygen -t rsa
ssh-copy-id -i root@localhost
# 设置hosts
cd /etc
vi hosts
192.168.17.149 hadoopmaster
保存后,ping hadoopmaster 进行网络测试
# 设置hostname
cd /etc
vi hostname
hadoopmaster
保存后reboot服务器使设置的主机名生效
# 安装包上传
# 上传hadoop-3.3.4.tar.gz到服务器/root/tools目录
解压安装包
tar -xzvf hadoop-3.3.4.tar.gz
# 上传jdk-8u333-linux-x64.tar.gz到服务器/root/tools目录
解压JDK包
tar -xzvf jdk-8u333-linux-x64.tar.gz
配置JAVA_HOME环境变量
vi /etc/profile
export JAVA_HOME=/root/tools/jdk1.8.0_333
export JRE_HOME=$JAVA_HOME/jre
export PATH=$PATH:$JAVA_HOME/bin
export CLASSPATH=.:$JAVA_HOME/jre/lib:$JAVA_HOME/lib:$JAVA_HOME/lib/tools.jar
保存后,source /etc/profile 使配置生效
# 安装开始
# 配置hadoop环境变量
vi /etc/profile
# HADOOP env variables
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HADOOP_HOME=/root/tools/hadoop-3.3.4
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
保存后,source /etc/profile 使配置生效
# 修改core-site.xml,在<configuration>中增加如下<property>
cd /root/tools/hadoop-3.3.4/etc/hadoop
vi core-site.xml
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.17.149:9000</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
<description>Allow the superuser oozie to impersonate any members of the group group1 and group2</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
</property>
# 修改 hdfs-site.xml,在<configuration>中增加如下<property>
cd /root/tools/hadoop-3.3.4/etc/hadoop
vi hdfs-site.xml
<property>
<name>dfs.namenode.name.dir</name>
<value>/root/hadoop_store/hdfs/namenode2</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/root/hadoop_store/hdfs/datanode2</value>
</property>
<property>
<name>dfs.http.address</name>
<value>0.0.0.0:50070</value>
</property>
保存后,创建对应的目录
mkdir -p /root/hadoop_store/hdfs/namenode2
mkdir -p /root/hadoop_store/hdfs/datanode2
# 修改mapred-site.xml,在<configuration>中增加如下<property>
cd /root/tools/hadoop-3.3.4/etc/hadoop
vi mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.tracker</name>
<value>hdfs://192.168.17.149:8001</value>
<final>true</final>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/root/tools/hadoop-3.3.4</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/root/tools/hadoop-3.3.4</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/root/tools/hadoop-3.3.4</value>
</property>
<!-- 开启jobhistory服务-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>192.168.17.149:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>192.168.17.149:19888</value>
</property>
# 修改yarn-site.xml,在<configuration>中增加如下<property>
cd /root/tools/hadoop-3.3.4/etc/hadoop
vi yarn-site.xml
<property>
<!-- 为mr程序提供shuffle服务 -->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>192.168.17.149:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>192.168.17.149:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>192.168.17.149:8050</value>
</property>
<!--日志聚集 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志信息保存在文件系统上的最长时间 秒为单位-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>640800</value>
</property>
<!-- Site specific YARN configuration properties -->
<!-- resource,manager主节点所在机器 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>192.168.17.149</value>
</property>
<!-- 一台NodeManager的总可用内存资源 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>12288</value>
</property>
<!-- 一台NodeManager的总可用(逻辑)cpu核数 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>
<!-- 是否检查容器的虚拟内存使用超标情况 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 容器的虚拟内存使用上限:与物理内存的比率 -->
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
<!-- container内存按照默认大小配置,即为最小1G,最大8G -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>8192</value>
</property>
<!-- 开启jobhistory服务-->
<property>
<name>yarn.log.server.url</name>
<value>http://192.168.17.149:19888/jobhistory/logs/</value>
</property>
# 修改hadoop-env.sh
cd /root/tools/hadoop-3.3.4/etc/hadoop
vi hadoop-env.sh
# JAVA_HOME=/usr/java/testing hdfs dfs -ls #设置JAVA_HOME
export JAVA_HOME=/root/tools/jdk1.8.0_333
# export HADOOP_HEAPSIZE_MAX= #设置HADOOP_HEAPSIZE
#<!-- 设置hadoop heap 大小-->
export HADOOP_HEAPSIZE=6000
# 修改yarn-env.sh
cd /root/tools/hadoop-3.3.4/etc/hadoop
vi yarn-env.sh
YARN_HEAPSIZE=6000 #在文件最后增加YARN_HEAPSIZE配置
# 在nameNode服务器上进行格式化
[root@hadoopmaster hadoop]# hdfs namenode -format #日志中出现如下提示,说明格式化成功
2022-09-14 16:00:06,267 INFO common.Storage: Storage directory /root/hadoop_store/hdfs/namenode2 has been successfully formatted
# 启停
cd /root/tools/hadoop-3.3.4/sbin
./start-all.sh --启动hadoop服务
./stop-all.sh --停止hadoop服务
./mr-jobhistory-daemon.sh start historyserver --启动yarn日志功能
./mr-jobhistory-daemon.sh stop historyserver --停止yarn日志功能
# 验证安装
访问前端界面,验证安装是否成功
# 开放防火墙端口
如服务器开启防火墙,开放hadoop如下端口
--hadoop前端端口
firewall-cmd --zone=public --add-port 50070/tcp --permanent
firewall-cmd --zone=public --add-port 8088/tcp --permanent
firewall-cmd --zone=public --add-port 19888/tcp --permanent
--刷新防火墙
firewall-cmd --reload
# hadoop前端
网址: http://ip:50070
# yarn前端
网址: http://ip:8088
# yarn历史前端
网址: http://ip:19888
# 其他
- 无