From bdc22df94fe70b98c98526e47819e3b62a104e35 Mon Sep 17 00:00:00 2001 From: Suraj B M Date: Tue, 6 Feb 2024 14:47:59 +0530 Subject: [PATCH 1/5] Modify Dockerfile --- Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 734318e..f658d11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ ENV YARN_RESOURCEMANAGER_USER=root # Install necessary dependencies RUN apt-get update && \ - apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip + apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip dos2unix # Download and extract Hadoop RUN mkdir -p $HADOOP_HOME && \ @@ -148,7 +148,10 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.1.jar && \ COPY restart $HADOOP_HOME/bin/restart COPY init $HADOOP_HOME/bin/init COPY colors $HADOOP_HOME/bin/colors -RUN chmod +x $HADOOP_HOME/bin/restart && \ +RUN dos2unix $HADOOP_HOME/bin/restart && \ + dos2unix $HADOOP_HOME/bin/colors && \ + dos2unix$HADOOP_HOME/bin/init && \ + chmod +x $HADOOP_HOME/bin/restart && \ chmod +x $HADOOP_HOME/bin/colors && \ chmod +x $HADOOP_HOME/bin/init From ede6947e690c73aa230cd8bbd15c3f52bbf01274 Mon Sep 17 00:00:00 2001 From: Suraj B M Date: Wed, 7 Feb 2024 11:35:38 +0530 Subject: [PATCH 2/5] Fix space between dos2unix --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f658d11..597e110 100644 --- a/Dockerfile +++ b/Dockerfile @@ -150,7 +150,7 @@ COPY init $HADOOP_HOME/bin/init COPY colors $HADOOP_HOME/bin/colors RUN dos2unix $HADOOP_HOME/bin/restart && \ dos2unix $HADOOP_HOME/bin/colors && \ - dos2unix$HADOOP_HOME/bin/init && \ + dos2unix $HADOOP_HOME/bin/init && \ chmod +x $HADOOP_HOME/bin/restart && \ chmod +x $HADOOP_HOME/bin/colors && \ chmod +x $HADOOP_HOME/bin/init From cf1799c6876320b74d1fa8cb6ed0f3190d7425a2 Mon Sep 17 00:00:00 2001 From: Suraj B M <100959814+silicoflare@users.noreply.github.com> Date: Wed, 6 Mar 2024 08:41:58 +0530 Subject: [PATCH 3/5] Update README.md --- README.md | 56 +------------------------------------------------------ 1 file changed, 1 insertion(+), 55 deletions(-) diff --git a/README.md b/README.md index ad5a5cb..02ad926 100644 --- a/README.md +++ b/README.md @@ -1,56 +1,2 @@ # Hadoop on Docker -Use this to get a quick version of Hadoop to run on Docker. - -1. Install Docker on your host PC -
- -2. Clone your platform specific branch of this repository -```bash -# Windows -git clone -b windows --single-branch https://github.com/silicoflare/docker-hadoop - -# Mac -git clone -b mac --single-branch https://github.com/silicoflare/docker-hadoop - -# Linux -git clone -b linux --single-branch https://github.com/silicoflare/docker-hadoop -``` -
- -3. Navigate to the directory -```bash -cd docker-hadoop -``` -
- -4. Build the docker image (you may need to use sudo) -```bash -docker build -t hadoop . -``` -
- -5. Wait for the build to finish -
- -6. Create a new container using the newly created image -```bash -docker run -it -p 9870:9870 -p 8088:8088 -p 9864:9864 --name anyname hadoop bash -``` -
- -7. Once the prompt appears, execute the following command to initialize everything: -```bash -init -``` -
- -8. From the next time, just run this to open the prompt. Use the same name that you used to create the container. -```bash -docker start anyname -docker exec -it anyname bash -``` - -Once in, execute: -```bash -restart -``` +Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image. From 8aa5fecb1399ccc355acb105a356ca73fd82da8d Mon Sep 17 00:00:00 2001 From: Suraj B M Date: Tue, 2 Apr 2024 13:35:17 +0530 Subject: [PATCH 4/5] fix: hadoop and hbase download links --- Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 597e110..060cb1c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ RUN apt-get update && \ # Download and extract Hadoop RUN mkdir -p $HADOOP_HOME && \ - wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/stable/hadoop-3.3.6.tar.gz && \ + wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz && \ tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1 # Configure SSH @@ -64,9 +64,9 @@ RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.ta echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc # Install hbase -RUN wget http://apache.mirror.gtcomm.net/hbase/stable/hbase-2.5.7-bin.tar.gz && \ - tar -xzvf hbase-2.5.7-bin.tar.gz && \ - mv hbase-2.5.7 /usr/local/hbase && \ +RUN wget http://apache.mirror.gtcomm.net/hbase/2.5.8/hbase-2.5.8-bin.tar.gz && \ + tar -xzvf hbase-2.5.8-bin.tar.gz && \ + mv hbase-2.5.8 /usr/local/hbase && \ echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \ echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \ echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \ From a5adc36add66c2d5d0ec58789b21489665719b13 Mon Sep 17 00:00:00 2001 From: Suraj B M Date: Tue, 2 Apr 2024 13:43:53 +0530 Subject: [PATCH 5/5] feat: add kafka cli --- Dockerfile | 5 ++++- kafka | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 kafka diff --git a/Dockerfile b/Dockerfile index 060cb1c..3927be3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -148,12 +148,15 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.1.jar && \ COPY restart $HADOOP_HOME/bin/restart COPY init $HADOOP_HOME/bin/init COPY colors $HADOOP_HOME/bin/colors +COPY kafka $HADOOP_HOME/bin/kafka RUN dos2unix $HADOOP_HOME/bin/restart && \ dos2unix $HADOOP_HOME/bin/colors && \ dos2unix $HADOOP_HOME/bin/init && \ + dos2unix $HADOOP_HOME/bin/kafka && \ chmod +x $HADOOP_HOME/bin/restart && \ chmod +x $HADOOP_HOME/bin/colors && \ - chmod +x $HADOOP_HOME/bin/init + chmod +x $HADOOP_HOME/bin/init && \ + chmod +x $HADOOP_HOME/bin/kafka # Cleaning up archives RUN rm *.tar.gz && \ diff --git a/kafka b/kafka new file mode 100644 index 0000000..0406e3d --- /dev/null +++ b/kafka @@ -0,0 +1,20 @@ +#!/bin/bash + +verb=$1 +arg=$2 + +if [ $verb == "start" ]; then + if [ $arg == "zookeeper" ]; then + $KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties + elif [ $arg == "kafka" ]; then + $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties + fi +elif [ $verb == "create-topic" ]; then + $KAFKA_HOME/bin/kafka-topics.sh --create --topic $arg --bootstrap-server localhost:9092 +elif [ $verb == "produce" ]; then + $KAFKA_HOME/bin/kafka-console-producer.sh --topic $arg --bootstrap-server localhost:9092 +elif [ $verb == "consume" ]; then + $KAFKA_HOME/bin/kafka-console-consumer.sh --topic $arg --from-beginning --bootstrap-server localhost:9092 +elif [ $verb == "submit" ]; then + spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 $arg +fi \ No newline at end of file