diff --git a/Dockerfile b/Dockerfile index 20496bb..0725f63 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,11 +13,11 @@ ENV YARN_RESOURCEMANAGER_USER=root # Install necessary dependencies RUN apt-get update && \ - apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip + apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip dos2unix # Download and extract Hadoop RUN mkdir -p $HADOOP_HOME && \ - wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/stable/hadoop-3.3.6.tar.gz && \ + wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz && \ tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1 # Configure SSH @@ -64,9 +64,9 @@ RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.ta echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc # Install hbase -RUN wget http://apache.mirror.gtcomm.net/hbase/stable/hbase-2.5.7-bin.tar.gz && \ - tar -xzvf hbase-2.5.7-bin.tar.gz && \ - mv hbase-2.5.7 /usr/local/hbase && \ +RUN wget http://apache.mirror.gtcomm.net/hbase/2.5.8/hbase-2.5.8-bin.tar.gz && \ + tar -xzvf hbase-2.5.8-bin.tar.gz && \ + mv hbase-2.5.8 /usr/local/hbase && \ echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \ echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \ echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \ @@ -148,9 +148,15 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.1.jar && \ COPY restart $HADOOP_HOME/bin/restart COPY init $HADOOP_HOME/bin/init COPY colors $HADOOP_HOME/bin/colors -RUN chmod +x $HADOOP_HOME/bin/restart && \ +COPY kafka $HADOOP_HOME/bin/kafka +RUN dos2unix $HADOOP_HOME/bin/restart && \ + dos2unix $HADOOP_HOME/bin/colors && \ + dos2unix $HADOOP_HOME/bin/init && \ + dos2unix $HADOOP_HOME/bin/kafka && \ + chmod +x $HADOOP_HOME/bin/restart && \ chmod +x $HADOOP_HOME/bin/colors && \ - chmod +x $HADOOP_HOME/bin/init + chmod +x $HADOOP_HOME/bin/init && \ + chmod +x $HADOOP_HOME/bin/kafka # Cleaning up archives RUN rm *.tar.gz && \ diff --git a/README.md b/README.md index ad5a5cb..02ad926 100644 --- a/README.md +++ b/README.md @@ -1,56 +1,2 @@ # Hadoop on Docker -Use this to get a quick version of Hadoop to run on Docker. - -1. Install Docker on your host PC -
- -2. Clone your platform specific branch of this repository -```bash -# Windows -git clone -b windows --single-branch https://github.com/silicoflare/docker-hadoop - -# Mac -git clone -b mac --single-branch https://github.com/silicoflare/docker-hadoop - -# Linux -git clone -b linux --single-branch https://github.com/silicoflare/docker-hadoop -``` -
- -3. Navigate to the directory -```bash -cd docker-hadoop -``` -
- -4. Build the docker image (you may need to use sudo) -```bash -docker build -t hadoop . -``` -
- -5. Wait for the build to finish -
- -6. Create a new container using the newly created image -```bash -docker run -it -p 9870:9870 -p 8088:8088 -p 9864:9864 --name anyname hadoop bash -``` -
- -7. Once the prompt appears, execute the following command to initialize everything: -```bash -init -``` -
- -8. From the next time, just run this to open the prompt. Use the same name that you used to create the container. -```bash -docker start anyname -docker exec -it anyname bash -``` - -Once in, execute: -```bash -restart -``` +Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image. diff --git a/kafka b/kafka new file mode 100644 index 0000000..0406e3d --- /dev/null +++ b/kafka @@ -0,0 +1,20 @@ +#!/bin/bash + +verb=$1 +arg=$2 + +if [ $verb == "start" ]; then + if [ $arg == "zookeeper" ]; then + $KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties + elif [ $arg == "kafka" ]; then + $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties + fi +elif [ $verb == "create-topic" ]; then + $KAFKA_HOME/bin/kafka-topics.sh --create --topic $arg --bootstrap-server localhost:9092 +elif [ $verb == "produce" ]; then + $KAFKA_HOME/bin/kafka-console-producer.sh --topic $arg --bootstrap-server localhost:9092 +elif [ $verb == "consume" ]; then + $KAFKA_HOME/bin/kafka-console-consumer.sh --topic $arg --from-beginning --bootstrap-server localhost:9092 +elif [ $verb == "submit" ]; then + spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 $arg +fi \ No newline at end of file