安装SSH,配置SSH无密码登录

1
2
3
4
cd ~/.ssh/                     # 若没有该目录,请先执行一次ssh localhost
ssh-keygen -t rsa              # 会有提示,都按回车就可以
cat id_rsa.pub >> authorized_keys  # 加入授权
chmod 600 ./authorized_keys    # 修改文件权限

修改主机名

1
2
3
4
5
6
7
8
hostnamectl set-hostname hadoop1

# 查看
hostname

#修改主机列表
vi /etc/hosts
ip 主机名

安装jdk

1
yum install java-1.8.0-openjdk* -y

安装Hadoop

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
mkdir -p /home/hadoop/hdfs/tmp /home/hadoop/hdfs/data /home/hadoop/hdfs/name
cd /home/hadoop
tar -zxvf hadoop-3.3.6.tar.gz

# 环境变量新增
vi /etc/profile

export HADOOP_HOME=/home/hadoop/hadoop-3.3.6
export PATH=$PATH:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin

# 生效
source /etc/profile
# 查看
echo $HADOOP_HOME

# 添加hadoop-env.sh环境变量
# echo $JAVA_HOME
vi /home/hadoop/hadoop-3.3.6/etc/hadoop/hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.412.b08-1.el7_9.x86_64/jre
export HADOOP_HOME=/home/hadoop/hadoop-3.3.6
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

#修改core-site.xml:在configuration标签添加
vi /home/hadoop/hadoop-3.3.6/etc/hadoop/core-site.xml

<!-- 指定 NameNode 的地址 -->
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://hadoop1:9090</value>
</property>

<!-- 指定 hadoop 数据的存储目录 -->
<property>
    <name>hadoop.tmp.dir</name>
    <value>/home/hadoop/hdfs/tmp</value>
</property>

<property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
</property>

<property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
</property>

# 修改 hdfs-site.xml:在configuration标签添加
vi /home/hadoop/hadoop-3.3.6/etc/hadoop/hdfs-site.xml

<property>
    <name>dfs.replication</name>      
    <value>1</value>
    <description>副本个数,配置默认是3,应小于datanode机器数量</description>
</property>  

<property>
    <name>dfs.namenode.name.dir</name>  
    <value>/home/hadoop/hdfs/name</value>  
    <final>true</final>
</property>  

<property>
    <name>dfs.datanode.data.dir</name>  
    <value>/home/hadoop/hdfs/data</value>  
    <final>true</final>
</property>  

<property>
    <name>dfs.http.address</name>
    <value>0.0.0.0:50070</value>
    <description>将绑定IP改为0.0.0.0,而不是本地回环IP,这样,就能够实现外网访问本机的50070端口了</description>
</property>

<property>
    <name>dfs.permissions</name>  
    <value>false</value>
</property>

# 修改mapred-site.xml,在configuration标签添加
vi /home/hadoop/hadoop-3.3.6/etc/hadoop/mapred-site.xml

<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>

# 修改 yarn-site.xml,在configuration标签添加
vi /home/hadoop/hadoop-3.3.6/etc/hadoop/yarn-site.xml

<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>

# 格式化HDFS
hdfs namenode -format

# 关闭防火墙
systemctl stop firewalld

# 启动
cd /home/hadoop/hadoop-3.3.6/sbin
./start-all.sh

# 访问
http://IP:50070
http://IP:8088