Compare commits

..

15 Commits

Author SHA1 Message Date
Suraj B M
dc9016b941 Merge pull request #14 from doanxem99/amd
Fix download URL and install script errors in Dockerfile
2026-02-05 15:10:16 +05:30
doanxem99
4d7ad91d1e fixed dead links, minor bugs 2026-02-05 15:40:07 +07:00
Aryan Anand
68dc43ed74 Upgrade HBase from 2.5.8 to 3.0.0-beta-1 2025-10-10 12:13:41 +05:30
Aryan Anand
d75780a3b6 Kafka in bg 2024-10-17 17:44:48 +05:30
Aryan Anand
ae2efbde2d Update README.md 2024-10-10 10:31:26 +05:30
Aryan Anand
70af45cd7e Update README.md 2024-10-09 09:34:16 +05:30
Aryan Anand
3ab5aa8119 Update README.md 2024-10-09 09:30:25 +05:30
Aryan Anand
788a45bf14 Update README.md 2024-10-09 09:27:57 +05:30
Aryan Anand
7cbf3acb05 Update README.md 2024-10-06 15:07:26 +05:30
Aryan Anand
98f15fec76 Link Shortened 2024-10-06 15:06:14 +05:30
Aryan Anand
720ae5cf98 Mutiple instances 2024-10-06 14:50:17 +05:30
Aryan Anand
c7ceedb27c Update README.md 2024-09-05 15:40:11 +05:30
Aryan Anand
79f329fa8e Update README.md 2024-08-28 19:13:24 +05:30
Suraj B M
d935f91810 Merge pull request #10 from aryan-212/patch-2
Automated Install and Uninstall on Linux Systems
2024-08-09 23:47:12 +05:30
Aryan Anand
62c67d1737 Update README.md 2024-08-09 23:44:06 +05:30
3 changed files with 81 additions and 27 deletions

View File

@@ -36,7 +36,7 @@ COPY hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
COPY mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
COPY yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64" >> ~/.bashrc && \
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> ~/.bashrc && \
echo "export HADOOP_HOME=/usr/local/hadoop" >> ~/.bashrc && \
echo "export HADOOP_INSTALL=\$HADOOP_HOME" >> ~/.bashrc && \
echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> ~/.bashrc && \
@@ -52,7 +52,7 @@ RUN echo "HDFS_NAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
echo "HDFS_SECONDARYNAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
echo "YARN_NODEMANAGER_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
echo "YARN_RESOURCEMANAGER_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
echo "export HADOOP_CLASSPATH+=\" \$HADOOP_HOME/lib/*.jar\"" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
# Install pig
@@ -64,20 +64,20 @@ RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.ta
echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc
# Install hbase
RUN wget http://apache.mirror.gtcomm.net/hbase/2.5.8/hbase-2.5.8-bin.tar.gz && \
tar -xzvf hbase-2.5.8-bin.tar.gz && \
mv hbase-2.5.8 /usr/local/hbase && \
RUN wget https://archive.apache.org/dist/hbase/3.0.0-beta-1/hbase-3.0.0-beta-1-bin.tar.gz && \
tar -xzvf hbase-3.0.0-beta-1-bin.tar.gz && \
mv hbase-3.0.0-beta-1 /usr/local/hbase && \
echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \
echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \
echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64/" >> /usr/local/hbase/conf/hbase-env.sh
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /usr/local/hbase/conf/hbase-env.sh
COPY hbase-site.xml ~/hbase-site.xml
RUN mkdir -p /hadoop/zookeeper && \
chown -R $USER:$USER /hadoop/
# Install Hive
RUN wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \
RUN wget http://archive.apache.org/dist/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \
tar -xzvf apache-hive-3.1.3-bin.tar.gz && \
mv apache-hive-3.1.3-bin /usr/local/hive && \
echo "export HIVE_HOME=/usr/local/hive" >> ~/.bashrc && \
@@ -103,7 +103,7 @@ RUN wget https://archive.apache.org/dist/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6
echo "export HADOOP_MAPRED_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh
# Install Zookeeper
RUN wget https://downloads.apache.org/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \
RUN wget https://archive.apache.org/dist/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \
tar -xvf apache-zookeeper-3.9.1-bin.tar.gz && \
mv apache-zookeeper-3.9.1-bin /usr/local/zookeeper && \
mv /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg && \
@@ -121,10 +121,10 @@ RUN apt-get update && \
echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >> ~/.bashrc
# Install Pyspark
RUN pip install pyspark
RUN pip install pyspark --break-system-packages
# Install Kafka
RUN wget https://downloads.apache.org/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \
RUN wget https://archive.apache.org/dist/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \
tar -xzvf kafka_2.13-3.6.1.tgz && \
mv kafka_2.13-3.6.1 /usr/local/kafka && \
echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc && \

View File

@@ -1,2 +1,35 @@
# Hadoop on Docker
## Linux/WSL
### Installation
```bash
curl -fsSL https://bit.ly/hadock-install -o ~/hadoock && chmod +x ~/hadoock && sudo mv ~/hadoock /usr/bin/hadoock
```
if you're using WSL make sure to update it
```powershell
wsl.exe --update
```
### Usage
```bash
hadock
```
### Update
```bash
hadock --uninstall
curl -fsSL https://bit.ly/hadock-install | bash
```
#### Copying from host
```bash
dockcp /path/to/file
```
This command copies ```path/to/file``` from host to ```/home``` on your docker container
#### Running multiple instances
```bash
hadock_again
```
Use this command to open another shell in the same docker instance
### Uninstallation
```bash
hadock --uninstall
```
Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image.

51
kafka
View File

@@ -1,20 +1,41 @@
#!/bin/bash
verb=$1
arg=$2
verb="$1"
arg="$2"
if [ $verb == "start" ]; then
if [ $arg == "zookeeper" ]; then
$KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties
elif [ $arg == "kafka" ]; then
$KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties
# Check if KAFKA_HOME is set
if [ -z "$KAFKA_HOME" ]; then
echo "Error: KAFKA_HOME is not set"
exit 1
fi
elif [ $verb == "create-topic" ]; then
$KAFKA_HOME/bin/kafka-topics.sh --create --topic $arg --bootstrap-server localhost:9092
elif [ $verb == "produce" ]; then
$KAFKA_HOME/bin/kafka-console-producer.sh --topic $arg --bootstrap-server localhost:9092
elif [ $verb == "consume" ]; then
$KAFKA_HOME/bin/kafka-console-consumer.sh --topic $arg --from-beginning --bootstrap-server localhost:9092
elif [ $verb == "submit" ]; then
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 $arg
case "$verb" in
"start")
if [ "$arg" == "zookeeper" ]; then
"$KAFKA_HOME/bin/zookeeper-server-start.sh" "$KAFKA_HOME/config/zookeeper.properties" &
elif [ "$arg" == "kafka" ]; then
"$KAFKA_HOME/bin/kafka-server-start.sh" "$KAFKA_HOME/config/server.properties" &
else
echo "Error: Invalid argument for 'start'. Use 'zookeeper' or 'kafka'."
fi
;;
"create-topic")
"$KAFKA_HOME/bin/kafka-topics.sh" --create --topic "$arg" --bootstrap-server localhost:9092 &
;;
"produce")
"$KAFKA_HOME/bin/kafka-console-producer.sh" --topic "$arg" --bootstrap-server localhost:9092 &
;;
"consume")
"$KAFKA_HOME/bin/kafka-console-consumer.sh" --topic "$arg" --from-beginning --bootstrap-server localhost:9092 &
;;
"submit")
if [ -z "$arg" ]; then
echo "Error: Please provide a Spark script."
else
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 "$arg" &
fi
;;
*)
echo "Error: Unknown verb. Use 'start', 'create-topic', 'produce', 'consume', or 'submit'."
;;
esac