-
[Hadoop/Hive] installation/configuration 하둡 하이브 설치 방법📚 데이터베이스/빅데이터 2022. 8. 2. 05:05
1. Download Hadoop files
2. Update necessary config files
3. Download Hive files
4. Update Hive config file
5. Install Hive metastore
/* Update the system and install Java */ sudo apt update sudo apt install openjdk-8-jdk -y java -version; javac -version /* Install open SSH */ sudo apt install openssh-server openssh-client -y ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 0600 ~/.ssh/authorized_keys ssh localhost /* Install Hadoop */ wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz -P /disk/hadoop cd /disk/hadoop tar xzf hadoop-3.3.1.tar.gz --/disk/hadoop/hadoop-3.3.1 cd ~ sudo nano .bashrc #Hadoop Related Options append at teh end of .bashrc file export HADOOP_HOME=/disk/hadoop/hadoop-3.3.1 export HADOOP_INSTALL=$HADOOP_HOME export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/nativ" source ~/.bashrc which javac --/usr/bin/javac readlink -f /usr/bin/javac --/usr/lib/jvm/java-8-openjdk-amd64/bin/javac sudo nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh -- Update export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml <configuration> <property> <name>hadoop.tmp.dir</name> <value>/disk/ubuntuhive/tmpdata</value> </property> <property> <name>fs.default.name</name> <value>hdfs://127.0.0.1:9000</value> </property> </configuration> mkdir -p /disk/ubuntuhive/tmpdata mkdir -p /disk/ubuntuhive/dfsdata/namenode mkdir -p /disk/ubuntuhive/dfsdata/datanode sudo nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml <configuration> <property> <name>dfs.data.dir</name> <value>/disk/ubuntuhive/dfsdata/namenode</value> </property> <property> <name>dfs.data.dir</name> <value>/disk/ubuntuhive/dfsdata/datanode</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property> </configuration> sudo nano $HADOOP_HOME/etc/hadoop/mapred-site.xml <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> sudo nano $HADOOP_HOME/etc/hadoop/yarn-site.xml <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.hostname</name> <value>127.0.0.1</value> </property> <property> <name>yarn.acl.enable</name> <value>0</value> </property> <property> <name>yarn.nodemanager.env-whitelist</name> <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> </property> </configuration> /* the below command need to be executed only once while installing Hadoop */ hdfs namenode -format /* the beow commands needs to be executed when ever you dont see all the processes in jps command. usually after restarting the VM's*/ cd /disk/hadoop/hadoop-3.3.1/sbin/ ./start-dfs.sh ./start-yarn.sh jps
/* Install Hive */ cd ~ wget https://dlcdn.apache.org/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz -P /disk/hive cd /disk/hive tar xzf apache-hive-3.1.2-bin.tar.gz cd ~ sudo nano .bashrc export HIVE_HOME="/disk/hive/apache-hive-3.1.2-bin" export PATH=$PATH:$HIVE_HOME/bin cd ~ source ~/.bashrc sudo nano $HIVE_HOME/bin/hive-config.sh export HADOOP_HOME=/disk/hadoop/hadoop-3.3.1 hdfs dfs -mkdir /tmp hdfs dfs -chmod g+w /tmp hdfs dfs -ls / hdfs dfs -mkdir -p /user/hive/warehouse hdfs dfs -chmod g+w /user/hive/warehouse hdfs dfs -ls /user/hive cd $HIVE_HOME/conf cp hive-default.xml.template hive-site.xml cd /disk/hive/apache-hive-3.1.2-bin/conf sudo nano hive-site.xml <property><name>system:java.io.tmpdir</name><value>/tmp/hive/java</value></property> <property><name>system:user.name</name><value>${user.name}</value></property> check for text : Ensures commands with OVERWRITE (such as INSERT OVERWRITE) acquire Exclusive locks for[remove this] transactional tables. This ensures that inserts (w/o overwrite) running concurrently are not hidden by the INSERT OVERWRITE. check for <property> <name>hive.metastore.schema.verification</name> <value>false</value> <description> Enforce metastore schema version consistency. True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures proper metastore schema migration. (Default) False: Warn if the version information stored in metastore doesn't match with one from in Hive jars. </description> </property> cd $HIVE_HOME $HIVE_HOME/bin/schematool -dbType derby -initSchema hive cd $HIVE_HOME/bin;hive jps /* Setup for beeline */ sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml <property> <name>hadoop.proxyuser.ubuntuhive.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.ubuntuhive.groups</name> <value>*</value> </property> cd /disk/hadoop/hadoop-3.3.1/sbin/ ./stop-dfs.sh ./stop-yarn.sh ./start-dfs.sh ./start-yarn.sh cd $HIVE_HOME/bin; hiveserver2 beeline -u jdbc:hive2://localhost:10000 -n ubuntuhive
'📚 데이터베이스 > 빅데이터' 카테고리의 다른 글
[MapReduce] 그림으로 이해하는 하둡 맵리듀스 예제 (0) 2022.08.03 [MapReduce] 하둡 맵리듀스 이해 (0) 2022.08.03 [AzureVM] How to use Azure VM with PuTTy (0) 2022.08.02 [Flume] collecting streaming data (0) 2022.08.01 [Hive] overview: distributed data warehouse (0) 2022.08.01