ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • [Hadoop/Hive] installation/configuration 하둡 하이브 설치 방법
    📚 데이터베이스/빅데이터 2022. 8. 2. 05:05

    1. Download Hadoop files

    2. Update necessary config files

    3. Download Hive files

    4. Update Hive config file

    5. Install Hive metastore 

     

    /* Update the system and install Java */
    sudo apt update
    
    sudo apt install openjdk-8-jdk -y
    
    java -version; javac -version
    
    
    /* Install open SSH  */
    sudo apt install openssh-server openssh-client -y
    
    ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
    
    cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    
    chmod 0600 ~/.ssh/authorized_keys
    
    ssh localhost
    
    /* Install Hadoop */
    
    
    wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz -P /disk/hadoop
    
    cd /disk/hadoop
    
    tar xzf hadoop-3.3.1.tar.gz
    
    --/disk/hadoop/hadoop-3.3.1
    
    cd ~
    
    sudo nano .bashrc
    
    #Hadoop Related Options append at teh end of .bashrc file
    export HADOOP_HOME=/disk/hadoop/hadoop-3.3.1
    export HADOOP_INSTALL=$HADOOP_HOME
    export HADOOP_MAPRED_HOME=$HADOOP_HOME
    export HADOOP_COMMON_HOME=$HADOOP_HOME
    export HADOOP_HDFS_HOME=$HADOOP_HOME
    export YARN_HOME=$HADOOP_HOME
    export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
    export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
    export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/nativ"
    
    
    source ~/.bashrc
    
    
    which javac
    --/usr/bin/javac
    
    readlink -f /usr/bin/javac
    --/usr/lib/jvm/java-8-openjdk-amd64/bin/javac
    
    sudo nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
    -- Update 
    	export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
    
    sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml
    
    <configuration>
    <property>
      <name>hadoop.tmp.dir</name>
      <value>/disk/ubuntuhive/tmpdata</value>
    </property>
    <property>
      <name>fs.default.name</name>
      <value>hdfs://127.0.0.1:9000</value>
    </property>
    </configuration>
    
    mkdir -p /disk/ubuntuhive/tmpdata
    mkdir -p /disk/ubuntuhive/dfsdata/namenode
    mkdir -p /disk/ubuntuhive/dfsdata/datanode
    
    
    sudo nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml
    <configuration>
    <property>
      <name>dfs.data.dir</name>
      <value>/disk/ubuntuhive/dfsdata/namenode</value>
    </property>
    <property>
      <name>dfs.data.dir</name>
      <value>/disk/ubuntuhive/dfsdata/datanode</value>
    </property>
    <property>
      <name>dfs.replication</name>
      <value>1</value>
    </property>
    </configuration>
    
    
    sudo nano $HADOOP_HOME/etc/hadoop/mapred-site.xml
    <configuration> 
    <property> 
      <name>mapreduce.framework.name</name> 
      <value>yarn</value> 
    </property> 
    </configuration>
    
    
    sudo nano $HADOOP_HOME/etc/hadoop/yarn-site.xml
    
    <configuration>
    <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle</value>
    </property>
    <property>
      <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
      <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
      <name>yarn.resourcemanager.hostname</name>
      <value>127.0.0.1</value>
    </property>
    <property>
      <name>yarn.acl.enable</name>
      <value>0</value>
    </property>
    <property>
      <name>yarn.nodemanager.env-whitelist</name>
    <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    </configuration>
    
    /* the below command need to be executed only once while installing Hadoop */
    hdfs namenode -format
    
    /* the beow commands needs to be executed when ever you dont see all the processes in jps command. usually after restarting the VM's*/
    cd /disk/hadoop/hadoop-3.3.1/sbin/
     ./start-dfs.sh
     ./start-yarn.sh
    
    jps
    /* Install Hive */
    cd ~
    wget https://dlcdn.apache.org/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz -P /disk/hive
    
    cd /disk/hive
    
    tar xzf apache-hive-3.1.2-bin.tar.gz
    
    cd ~
    
    sudo nano .bashrc
    
    export HIVE_HOME="/disk/hive/apache-hive-3.1.2-bin"
    export PATH=$PATH:$HIVE_HOME/bin
    
    cd ~
    source ~/.bashrc
    
    
    
    sudo nano $HIVE_HOME/bin/hive-config.sh
    export HADOOP_HOME=/disk/hadoop/hadoop-3.3.1
    
    
    hdfs dfs -mkdir /tmp
    hdfs dfs -chmod g+w /tmp
    hdfs dfs -ls /
    
    hdfs dfs -mkdir -p /user/hive/warehouse
    hdfs dfs -chmod g+w /user/hive/warehouse
    hdfs dfs -ls /user/hive
    
    cd $HIVE_HOME/conf
    cp hive-default.xml.template hive-site.xml
    
    
    cd /disk/hive/apache-hive-3.1.2-bin/conf
    sudo nano hive-site.xml
    <property><name>system:java.io.tmpdir</name><value>/tmp/hive/java</value></property>
    <property><name>system:user.name</name><value>${user.name}</value></property>
    
    check for text :
    Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive locks for[remove this] transactional tables.  This ensures that inserts (w/o overwrite) running concurrently
      are not hidden by the INSERT OVERWRITE.
    
    check for 
    <property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
      <description>
        Enforce metastore schema version consistency.
      True: Verify that version information stored in is compatible with one from Hive jars.  Also disable automatic
     schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
     proper metastore schema migration. (Default)
        False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
      </description>
    </property>
    
    
    
    
    
    cd $HIVE_HOME
    $HIVE_HOME/bin/schematool -dbType derby -initSchema
    hive
    
    
    cd $HIVE_HOME/bin;hive
    
    jps
    
    
    
    /* Setup for beeline */
    sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml
    <property>
         <name>hadoop.proxyuser.ubuntuhive.hosts</name>
         <value>*</value>
    </property>
    <property>
         <name>hadoop.proxyuser.ubuntuhive.groups</name>
         <value>*</value>
    </property>
    
    cd /disk/hadoop/hadoop-3.3.1/sbin/
    ./stop-dfs.sh
    ./stop-yarn.sh
    
    ./start-dfs.sh
    ./start-yarn.sh
    
    cd $HIVE_HOME/bin;
    hiveserver2
    
    
    beeline -u jdbc:hive2://localhost:10000 -n ubuntuhive
Designed by Tistory.