February 10, 2020

[Note] Installing Hadoop 2 on Ubuntu 18


Environment: Ubuntu 18 64-bit, Java 8 installed, host name=bigdata
This is a note on installing Hadoop2.


# optional: create user:group for hadoop.  I use hduser:hadoop

# set up password-less ssh, and do this:
ssh 0.0.0.0

# get  hadoop2 file, uncompress and install above in /opt/hadoop
wget http://apache.mirrors.tds.net/hadoop/common/hadoop-2.10.0/hadoop-2.10.0.tar.gz

# vi ~/.bashrc and add:
export HADOOP_HOME=/opt/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=${HADOOP_HOME}/bin
export HADOOP_COMMON_HOME=${HADOOP_HOME}/bin

export HADOOP_HDFS_HOME=${HADOOP_HOME}/bin
export YARN_HOME=${HADOOP_HOME}/bin
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"


# vi /opt/hadoop/etc/hadoop/hadoop-env.sh, and add:
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64

# vi /opt/hadoop/etc/hadoop/hdfs-site.xml, and add:
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>

    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/data/hadoop/name_node</value>
        <final>true</final>
    </property>

    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/data/hadoop/data_node</value>
        <final>true</final>
    </property>
</configuration>


# create data folders, and give appropriate ownership, group and permission for Hadoop process
sudo mkdir -p /data/hadoop/name_node
sudo mkdir -p /data/hadoop/data_node
sudo mkdir -p /data/hadoop/tmp

# vi /opt/hadoop/etc/hadoop/core-site.xml
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/data/hadoop/tmp</value>
    </property>
</configuration>


# vi /opt/hadoop/etc/hadoop/yarn-site.xml
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
</configuration>


# vi /opt/hadoop/etc/hadoop/mapred-site.xml
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>


Time to run:

$ . ~./bashrc
$ hdfs namenode -format 
$ start-dfs.sh
$ start-yarn.sh
$ jps


UI

  • NameNode, http://bigdata:9870/
  • Data node, http://bigdata:9864/datanode.html
  • Resource Manager, http://bigdata:8088/

Resources

Hadoop Commands

Tools



No comments:

Post a Comment