📚 데이터베이스/빅데이터
[Hadoop/Hive] installation/configuration 하둡 하이브 설치 방법
써니(>_<)
2022. 8. 2. 05:05
1. Download Hadoop files
2. Update necessary config files
3. Download Hive files
4. Update Hive config file
5. Install Hive metastore
/* Update the system and install Java */
sudo apt update
sudo apt install openjdk-8-jdk -y
java -version; javac -version
/* Install open SSH */
sudo apt install openssh-server openssh-client -y
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
ssh localhost
/* Install Hadoop */
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz -P /disk/hadoop
cd /disk/hadoop
tar xzf hadoop-3.3.1.tar.gz
--/disk/hadoop/hadoop-3.3.1
cd ~
sudo nano .bashrc
#Hadoop Related Options append at teh end of .bashrc file
export HADOOP_HOME=/disk/hadoop/hadoop-3.3.1
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/nativ"
source ~/.bashrc
which javac
--/usr/bin/javac
readlink -f /usr/bin/javac
--/usr/lib/jvm/java-8-openjdk-amd64/bin/javac
sudo nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
-- Update
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/disk/ubuntuhive/tmpdata</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://127.0.0.1:9000</value>
</property>
</configuration>
mkdir -p /disk/ubuntuhive/tmpdata
mkdir -p /disk/ubuntuhive/dfsdata/namenode
mkdir -p /disk/ubuntuhive/dfsdata/datanode
sudo nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.data.dir</name>
<value>/disk/ubuntuhive/dfsdata/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/disk/ubuntuhive/dfsdata/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
sudo nano $HADOOP_HOME/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
sudo nano $HADOOP_HOME/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>127.0.0.1</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
/* the below command need to be executed only once while installing Hadoop */
hdfs namenode -format
/* the beow commands needs to be executed when ever you dont see all the processes in jps command. usually after restarting the VM's*/
cd /disk/hadoop/hadoop-3.3.1/sbin/
./start-dfs.sh
./start-yarn.sh
jps
/* Install Hive */
cd ~
wget https://dlcdn.apache.org/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz -P /disk/hive
cd /disk/hive
tar xzf apache-hive-3.1.2-bin.tar.gz
cd ~
sudo nano .bashrc
export HIVE_HOME="/disk/hive/apache-hive-3.1.2-bin"
export PATH=$PATH:$HIVE_HOME/bin
cd ~
source ~/.bashrc
sudo nano $HIVE_HOME/bin/hive-config.sh
export HADOOP_HOME=/disk/hadoop/hadoop-3.3.1
hdfs dfs -mkdir /tmp
hdfs dfs -chmod g+w /tmp
hdfs dfs -ls /
hdfs dfs -mkdir -p /user/hive/warehouse
hdfs dfs -chmod g+w /user/hive/warehouse
hdfs dfs -ls /user/hive
cd $HIVE_HOME/conf
cp hive-default.xml.template hive-site.xml
cd /disk/hive/apache-hive-3.1.2-bin/conf
sudo nano hive-site.xml
<property><name>system:java.io.tmpdir</name><value>/tmp/hive/java</value></property>
<property><name>system:user.name</name><value>${user.name}</value></property>
check for text :
Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive locks for[remove this] transactional tables. This ensures that inserts (w/o overwrite) running concurrently
are not hidden by the INSERT OVERWRITE.
check for
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
<description>
Enforce metastore schema version consistency.
True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic
schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
proper metastore schema migration. (Default)
False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
</description>
</property>
cd $HIVE_HOME
$HIVE_HOME/bin/schematool -dbType derby -initSchema
hive
cd $HIVE_HOME/bin;hive
jps
/* Setup for beeline */
sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml
<property>
<name>hadoop.proxyuser.ubuntuhive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.ubuntuhive.groups</name>
<value>*</value>
</property>
cd /disk/hadoop/hadoop-3.3.1/sbin/
./stop-dfs.sh
./stop-yarn.sh
./start-dfs.sh
./start-yarn.sh
cd $HIVE_HOME/bin;
hiveserver2
beeline -u jdbc:hive2://localhost:10000 -n ubuntuhive