diff --git a/Dockerfile b/Dockerfile index d53818b..734318e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,9 +16,9 @@ RUN apt-get update && \ apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip # Download and extract Hadoop -RUN mkdir -p $HADOOP_HOME -RUN wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/stable/hadoop-3.3.6.tar.gz -RUN tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1 +RUN mkdir -p $HADOOP_HOME && \ + wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/stable/hadoop-3.3.6.tar.gz && \ + tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1 # Configure SSH RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa && \ @@ -27,8 +27,8 @@ RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa && \ RUN wget -O /usr/local/hadoop/lib/javax.activation-api-1.2.0.jar https://jcenter.bintray.com/javax/activation/javax.activation-api/1.2.0/javax.activation-api-1.2.0.jar -RUN mkdir -p /home/hadoop/hdfs/{namenode,datanode} -RUN chown -R $USER:$USER /home/hadoop/hdfs +RUN mkdir -p /home/hadoop/hdfs/{namenode,datanode} && \ + chown -R $USER:$USER /home/hadoop/hdfs # Hadoop configuration COPY core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml @@ -55,100 +55,109 @@ RUN echo "HDFS_NAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \ echo "export HADOOP_CLASSPATH+=\" \$HADOOP_HOME/lib/*.jar\"" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh -# Copy init and restart scripts -COPY restart $HADOOP_HOME/bin/restart -COPY init $HADOOP_HOME/bin/init -RUN chmod +x $HADOOP_HOME/bin/restart -RUN chmod +x $HADOOP_HOME/bin/init - # Install pig -RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.tar.gz -RUN tar -xzvf pig.tar.gz -RUN mv pig-0.17.0 /pig -RUN echo "export PIG_HOME=/pig" >> ~/.bashrc && \ +RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.tar.gz && \ + tar -xzvf pig.tar.gz && \ + mv pig-0.17.0 /pig && \ + echo "export PIG_HOME=/pig" >> ~/.bashrc && \ echo "export PATH=\$PATH:/pig/bin" >> ~/.bashrc && \ echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc # Install hbase -RUN wget http://apache.mirror.gtcomm.net/hbase/stable/hbase-2.5.7-bin.tar.gz -RUN tar -xzvf hbase-2.5.7-bin.tar.gz -RUN mv hbase-2.5.7 /usr/local/hbase -RUN echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \ - echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc -RUN echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \ +RUN wget http://apache.mirror.gtcomm.net/hbase/stable/hbase-2.5.7-bin.tar.gz && \ + tar -xzvf hbase-2.5.7-bin.tar.gz && \ + mv hbase-2.5.7 /usr/local/hbase && \ + echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \ + echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \ + echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \ echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /usr/local/hbase/conf/hbase-env.sh COPY hbase-site.xml ~/hbase-site.xml -RUN mkdir -p /hadoop/zookeeper -RUN chown -R $USER:$USER /hadoop/ +RUN mkdir -p /hadoop/zookeeper && \ + chown -R $USER:$USER /hadoop/ # Install Hive -RUN wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz -RUN tar -xzvf apache-hive-3.1.3-bin.tar.gz -RUN mv apache-hive-3.1.3-bin /usr/local/hive -RUN echo "export HIVE_HOME=/usr/local/hive" >> ~/.bashrc && \ - echo "export PATH=\$PATH:\$HIVE_HOME/bin" >> ~/.bashrc -RUN echo "HADOOP_HOME=/usr/local/hadoop" >> /usr/local/hive/bin/hive-config.sh +RUN wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \ + tar -xzvf apache-hive-3.1.3-bin.tar.gz && \ + mv apache-hive-3.1.3-bin /usr/local/hive && \ + echo "export HIVE_HOME=/usr/local/hive" >> ~/.bashrc && \ + echo "export PATH=\$PATH:\$HIVE_HOME/bin" >> ~/.bashrc && \ + echo "HADOOP_HOME=/usr/local/hadoop" >> /usr/local/hive/bin/hive-config.sh # Install Flume -RUN wget https://archive.apache.org/dist/flume/1.9.0/apache-flume-1.9.0-bin.tar.gz -RUN tar -xzvf apache-flume-1.9.0-bin.tar.gz -RUN mv apache-flume-1.9.0-bin /usr/local/flume -RUN echo "export FLUME_HOME=/usr/local/flume" >> ~/.bashrc && \ - echo "export PATH=\$PATH:\$FLUME_HOME/bin" >> ~/.bashrc -RUN sed -i '214c\ \$EXEC \$JAVA_HOME/java \$JAVA_OPTS \$FLUME_JAVA_OPTS "\${arr_java_props[@]}" -cp "\$FLUME_CLASSPATH" \\' /usr/local/flume/bin/flume-ng +RUN wget https://archive.apache.org/dist/flume/1.9.0/apache-flume-1.9.0-bin.tar.gz && \ + tar -xzvf apache-flume-1.9.0-bin.tar.gz && \ + mv apache-flume-1.9.0-bin /usr/local/flume && \ + echo "export FLUME_HOME=/usr/local/flume" >> ~/.bashrc && \ + echo "export PATH=\$PATH:\$FLUME_HOME/bin" >> ~/.bashrc && \ + sed -i '214c\ \$EXEC \$JAVA_HOME/java \$JAVA_OPTS \$FLUME_JAVA_OPTS "\${arr_java_props[@]}" -cp "\$FLUME_CLASSPATH" \\' /usr/local/flume/bin/flume-ng # Install Sqoop -RUN wget https://archive.apache.org/dist/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz -RUN tar -xzvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz -RUN mv sqoop-1.4.7.bin__hadoop-2.6.0 /usr/local/sqoop -RUN echo "export SQOOP_HOME=/usr/local/sqoop" >> ~/.bashrc && \ - echo "export PATH=\$PATH:\$SQOOP_HOME/bin" >> ~/.bashrc -RUN mv /usr/local/sqoop/conf/sqoop-env-template.sh /usr/local/sqoop/conf/sqoop-env.sh -RUN echo "export HADOOP_COMMON_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh && \ +RUN wget https://archive.apache.org/dist/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz && \ + tar -xzvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz && \ + mv sqoop-1.4.7.bin__hadoop-2.6.0 /usr/local/sqoop && \ + echo "export SQOOP_HOME=/usr/local/sqoop" >> ~/.bashrc && \ + echo "export PATH=\$PATH:\$SQOOP_HOME/bin" >> ~/.bashrc && \ + mv /usr/local/sqoop/conf/sqoop-env-template.sh /usr/local/sqoop/conf/sqoop-env.sh && \ + echo "export HADOOP_COMMON_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh && \ echo "export HADOOP_MAPRED_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh # Install Zookeeper -RUN wget https://downloads.apache.org/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz -RUN tar -xvf apache-zookeeper-3.9.1-bin.tar.gz -RUN mv apache-zookeeper-3.9.1-bin /usr/local/zookeeper -RUN mv /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg -RUN echo "export ZOOKEEPER_HOME=/usr/local/zookeeper" >> ~/.bashrc && \ +RUN wget https://downloads.apache.org/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \ + tar -xvf apache-zookeeper-3.9.1-bin.tar.gz && \ + mv apache-zookeeper-3.9.1-bin /usr/local/zookeeper && \ + mv /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg && \ + echo "export ZOOKEEPER_HOME=/usr/local/zookeeper" >> ~/.bashrc && \ echo "export PATH=\$PATH:\$ZOOKEEPER_HOME/bin" >> ~/.bashrc # Install Spark ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get install -y scala git -RUN wget https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz -RUN tar -xf spark-3.4.1-bin-hadoop3.tgz -RUN mv spark-3.4.1-bin-hadoop3 /usr/local/spark -RUN echo "export SPARK_HOME=/usr/local/spark" >> ~/.bashrc && \ + apt-get install -y scala git && \ + wget https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz && \ + tar -xf spark-3.4.1-bin-hadoop3.tgz && \ + mv spark-3.4.1-bin-hadoop3 /usr/local/spark && \ + echo "export SPARK_HOME=/usr/local/spark" >> ~/.bashrc && \ echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >> ~/.bashrc # Install Pyspark RUN pip install pyspark # Install Kafka -RUN wget https://downloads.apache.org/kafka/3.6.1/kafka_2.13-3.6.1.tgz -RUN tar -xzvf kafka_2.13-3.6.1.tgz -RUN mv kafka_2.13-3.6.1 /usr/local/kafka -RUN echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc && \ +RUN wget https://downloads.apache.org/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \ + tar -xzvf kafka_2.13-3.6.1.tgz && \ + mv kafka_2.13-3.6.1 /usr/local/kafka && \ + echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc && \ echo "export PATH=\$PATH:\$KAFKA_HOME/bin" >> ~/.bashrc # Install Postgresql RUN apt-get install postgresql postgresql-contrib -y # Connect Postgresql with sqoop -RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.1.jar -RUN mv postgresql-42.7.1.jar /usr/local/sqoop/lib/postgresql-42.7.1.jar -RUN rm /usr/local/sqoop/lib/commons-lang3-3.4.jar -RUN wget https://dlcdn.apache.org//commons/lang/binaries/commons-lang-2.6-bin.tar.gz -RUN tar -xvf commons-lang-2.6-bin.tar.gz -RUN mv commons-lang-2.6/* /usr/local/sqoop/lib -RUN rm -rf commons-lang-2.6 -RUN mkdir /usr/local/sqoop/conf/manager.d -RUN echo "org.postgresql.Driver=/usr/lib/sqoop/lib/postgresql-42.7.1.jar" > postgresql +RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.1.jar && \ + mv postgresql-42.7.1.jar /usr/local/sqoop/lib/postgresql-42.7.1.jar && \ + rm /usr/local/sqoop/lib/commons-lang3-3.4.jar && \ + wget https://dlcdn.apache.org//commons/lang/binaries/commons-lang-2.6-bin.tar.gz && \ + tar -xvf commons-lang-2.6-bin.tar.gz && \ + mv commons-lang-2.6/* /usr/local/sqoop/lib && \ + rm -rf commons-lang-2.6 && \ + mkdir /usr/local/sqoop/conf/manager.d && \ + echo "org.postgresql.Driver=/usr/lib/sqoop/lib/postgresql-42.7.1.jar" > postgresql + +# Copy init and restart scripts +COPY restart $HADOOP_HOME/bin/restart +COPY init $HADOOP_HOME/bin/init +COPY colors $HADOOP_HOME/bin/colors +RUN chmod +x $HADOOP_HOME/bin/restart && \ + chmod +x $HADOOP_HOME/bin/colors && \ + chmod +x $HADOOP_HOME/bin/init + +# Cleaning up archives +RUN rm *.tar.gz && \ + rm *.tgz + +# Remove code in .bashrc +RUN sed -i 5,7d ~/.bashrc # Expose necessary ports EXPOSE 9870 8088 9000 diff --git a/colors b/colors new file mode 100644 index 0000000..4f3947c --- /dev/null +++ b/colors @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +FG_RED="\e[31m" +FG_GREEN="\e[32m" +FG_BROWN="\e[33m" +FG_BLUE="\e[34m" +FG_PURPLE="\e[35m" +FG_CYAN="\e[36m" +FG_GRAY="\e[37m" + +BG_RED="\e[41m" +BG_GREEN="\e[42m" +BG_BROWN="\e[43m" +BG_BLUE="\e[44m" +BG_PURPLE="\e[45m" +BG_CYAN="\e[46m" +BG_GRAY="\e[47m" + +RESET="\e[0m" diff --git a/init b/init index ca1fb32..5781dcb 100644 --- a/init +++ b/init @@ -1,19 +1,41 @@ #!/usr/bin/env bash -service ssh restart # restart ssh service -stop-hbase.sh # stop all processes -stop-all.sh -hdfs namenode -format # format namenodes -rm *.tar.gz # delete all tar files to save space +source 'colors' -# start all -start-all.sh -start-hbase.sh +read -p "Choose your PC name: " name -# hive initialization -hdfs dfs -mkdir /tmp -hdfs dfs -chmod g+w /tmp -hdfs dfs -mkdir -p /user/hive/warehouse -hdfs dfs -chmod g+w /user/hive/warehouse -schematool -dbType derby -initSchema +echo "Choose prompt color:" +echo -e "\e[0m1] \e[31muser@$name:/#\e[0m" +echo -e "\e[0m2] \e[32muser@$name:/#\e[0m" +echo -e "\e[0m3] \e[33muser@$name:/#\e[0m" +echo -e "\e[0m4] \e[34muser@$name:/#\e[0m" +echo -e "\e[0m5] \e[35muser@$name:/#\e[0m" +echo -e "\e[0m6] \e[36muser@$name:/#\e[0m" +echo -e "\e[0m7] \e[37muser@$name:/#\e[0m" +read -p "Option: " num + +echo "PS1=\"\\e[3""$num""m\\u@$name:\\w# \\e[0m\"" >> ~/.bashrc + +printf "${FG_BROWN}Restarting SSH... " +service ssh restart > /dev/nul +printf "${FG_GREEN}Done!\n${FG_BROWN}Stopping all processes... " +stop-hbase.sh > /dev/nul +stop-all.sh > /dev/nul +printf "${FG_GREEN}Done!\n${FG_BROWN}Formatting HDFS... " +hdfs namenode -format &> /dev/nul + +printf "${FG_GREEN}Done!\n${FG_BROWN}Starting up all processes... " +start-all.sh > /dev/nul +start-hbase.sh > /dev/nul + +printf "${FG_GREEN}Done!\n${FG_BROWN}Setting up Hive... " +hdfs dfs -mkdir /tmp > /dev/nul +hdfs dfs -chmod g+w /tmp > /dev/nul +hdfs dfs -mkdir -p /user/hive/warehouse > /dev/nul +hdfs dfs -chmod g+w /user/hive/warehouse > /dev/nul +schematool -dbType derby -initSchema &> /dev/nul + +printf "${FG_GREEN}Done!\n${FG_BROWN}Checking processes... " +printf "\n${FG_BLUE}$(jps | wc -l) processes.${RESET}\n" +printf "Enter ${FG_BLUE}source ~/.bashrc${RESET} to see changes!\n" diff --git a/restart b/restart index d0162fc..ade74e3 100644 --- a/restart +++ b/restart @@ -1,7 +1,16 @@ #!/usr/bin/env bash -service ssh restart -stop-hbase.sh -stop-all.sh -start-all.sh -start-hbase.sh -jps + +source 'colors' + +printf "${FG_BROWN}Restarting SSH... " +service ssh restart > /dev/nul +printf "${FG_GREEN}Done!\n${FG_BROWN}Stopping all processes... " +stop-hbase.sh > /dev/nul +stop-all.sh > /dev/nul + +printf "${FG_GREEN}Done!\n${FG_BROWN}Starting up all processes... " +start-all.sh > /dev/nul +start-hbase.sh > /dev/nul + +printf "${FG_GREEN}Done!\n${FG_BROWN}Checking processes... " +printf "\n${FG_BLUE}$(jps | wc -l) processes.${RESET}\n" \ No newline at end of file