mirror of
https://github.com/silicoflare/docker-hadoop.git
synced 2026-05-26 20:17:59 +05:30
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 530f545b89 | |||
| 0dd01b6e0e |
20
Dockerfile
20
Dockerfile
@@ -36,7 +36,7 @@ COPY hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
||||
COPY mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
||||
COPY yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
||||
|
||||
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> ~/.bashrc && \
|
||||
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64" >> ~/.bashrc && \
|
||||
echo "export HADOOP_HOME=/usr/local/hadoop" >> ~/.bashrc && \
|
||||
echo "export HADOOP_INSTALL=\$HADOOP_HOME" >> ~/.bashrc && \
|
||||
echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> ~/.bashrc && \
|
||||
@@ -52,7 +52,7 @@ RUN echo "HDFS_NAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
|
||||
echo "HDFS_SECONDARYNAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
|
||||
echo "YARN_NODEMANAGER_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
|
||||
echo "YARN_RESOURCEMANAGER_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
|
||||
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
|
||||
echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh && \
|
||||
echo "export HADOOP_CLASSPATH+=\" \$HADOOP_HOME/lib/*.jar\"" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh
|
||||
|
||||
# Install pig
|
||||
@@ -64,20 +64,20 @@ RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.ta
|
||||
echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc
|
||||
|
||||
# Install hbase
|
||||
RUN wget https://archive.apache.org/dist/hbase/3.0.0-beta-1/hbase-3.0.0-beta-1-bin.tar.gz && \
|
||||
tar -xzvf hbase-3.0.0-beta-1-bin.tar.gz && \
|
||||
mv hbase-3.0.0-beta-1 /usr/local/hbase && \
|
||||
RUN wget http://apache.mirror.gtcomm.net/hbase/2.5.8/hbase-2.5.8-bin.tar.gz && \
|
||||
tar -xzvf hbase-2.5.8-bin.tar.gz && \
|
||||
mv hbase-2.5.8 /usr/local/hbase && \
|
||||
echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \
|
||||
echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \
|
||||
echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \
|
||||
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> /usr/local/hbase/conf/hbase-env.sh
|
||||
echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64/" >> /usr/local/hbase/conf/hbase-env.sh
|
||||
COPY hbase-site.xml ~/hbase-site.xml
|
||||
|
||||
RUN mkdir -p /hadoop/zookeeper && \
|
||||
chown -R $USER:$USER /hadoop/
|
||||
|
||||
# Install Hive
|
||||
RUN wget http://archive.apache.org/dist/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \
|
||||
RUN wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \
|
||||
tar -xzvf apache-hive-3.1.3-bin.tar.gz && \
|
||||
mv apache-hive-3.1.3-bin /usr/local/hive && \
|
||||
echo "export HIVE_HOME=/usr/local/hive" >> ~/.bashrc && \
|
||||
@@ -103,7 +103,7 @@ RUN wget https://archive.apache.org/dist/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6
|
||||
echo "export HADOOP_MAPRED_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh
|
||||
|
||||
# Install Zookeeper
|
||||
RUN wget https://archive.apache.org/dist/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \
|
||||
RUN wget https://downloads.apache.org/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \
|
||||
tar -xvf apache-zookeeper-3.9.1-bin.tar.gz && \
|
||||
mv apache-zookeeper-3.9.1-bin /usr/local/zookeeper && \
|
||||
mv /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg && \
|
||||
@@ -121,10 +121,10 @@ RUN apt-get update && \
|
||||
echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >> ~/.bashrc
|
||||
|
||||
# Install Pyspark
|
||||
RUN pip install pyspark --break-system-packages
|
||||
RUN pip install pyspark
|
||||
|
||||
# Install Kafka
|
||||
RUN wget https://archive.apache.org/dist/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \
|
||||
RUN wget https://downloads.apache.org/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \
|
||||
tar -xzvf kafka_2.13-3.6.1.tgz && \
|
||||
mv kafka_2.13-3.6.1 /usr/local/kafka && \
|
||||
echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc && \
|
||||
|
||||
33
README.md
33
README.md
@@ -1,35 +1,2 @@
|
||||
# Hadoop on Docker
|
||||
## Linux/WSL
|
||||
### Installation
|
||||
```bash
|
||||
curl -fsSL https://bit.ly/hadock-install -o ~/hadoock && chmod +x ~/hadoock && sudo mv ~/hadoock /usr/bin/hadoock
|
||||
```
|
||||
if you're using WSL make sure to update it
|
||||
```powershell
|
||||
wsl.exe --update
|
||||
```
|
||||
### Usage
|
||||
```bash
|
||||
hadock
|
||||
```
|
||||
### Update
|
||||
```bash
|
||||
hadock --uninstall
|
||||
curl -fsSL https://bit.ly/hadock-install | bash
|
||||
```
|
||||
#### Copying from host
|
||||
```bash
|
||||
dockcp /path/to/file
|
||||
```
|
||||
This command copies ```path/to/file``` from host to ```/home``` on your docker container
|
||||
#### Running multiple instances
|
||||
```bash
|
||||
hadock_again
|
||||
```
|
||||
Use this command to open another shell in the same docker instance
|
||||
### Uninstallation
|
||||
```bash
|
||||
hadock --uninstall
|
||||
```
|
||||
|
||||
Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image.
|
||||
|
||||
55
kafka
55
kafka
@@ -1,41 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
verb="$1"
|
||||
arg="$2"
|
||||
verb=$1
|
||||
arg=$2
|
||||
|
||||
# Check if KAFKA_HOME is set
|
||||
if [ -z "$KAFKA_HOME" ]; then
|
||||
echo "Error: KAFKA_HOME is not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case "$verb" in
|
||||
"start")
|
||||
if [ "$arg" == "zookeeper" ]; then
|
||||
"$KAFKA_HOME/bin/zookeeper-server-start.sh" "$KAFKA_HOME/config/zookeeper.properties" &
|
||||
elif [ "$arg" == "kafka" ]; then
|
||||
"$KAFKA_HOME/bin/kafka-server-start.sh" "$KAFKA_HOME/config/server.properties" &
|
||||
else
|
||||
echo "Error: Invalid argument for 'start'. Use 'zookeeper' or 'kafka'."
|
||||
fi
|
||||
;;
|
||||
"create-topic")
|
||||
"$KAFKA_HOME/bin/kafka-topics.sh" --create --topic "$arg" --bootstrap-server localhost:9092 &
|
||||
;;
|
||||
"produce")
|
||||
"$KAFKA_HOME/bin/kafka-console-producer.sh" --topic "$arg" --bootstrap-server localhost:9092 &
|
||||
;;
|
||||
"consume")
|
||||
"$KAFKA_HOME/bin/kafka-console-consumer.sh" --topic "$arg" --from-beginning --bootstrap-server localhost:9092 &
|
||||
;;
|
||||
"submit")
|
||||
if [ -z "$arg" ]; then
|
||||
echo "Error: Please provide a Spark script."
|
||||
else
|
||||
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 "$arg" &
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "Error: Unknown verb. Use 'start', 'create-topic', 'produce', 'consume', or 'submit'."
|
||||
;;
|
||||
esac
|
||||
if [ $verb == "start" ]; then
|
||||
if [ $arg == "zookeeper" ]; then
|
||||
$KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties
|
||||
elif [ $arg == "kafka" ]; then
|
||||
$KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties
|
||||
fi
|
||||
elif [ $verb == "create-topic" ]; then
|
||||
$KAFKA_HOME/bin/kafka-topics.sh --create --topic $arg --bootstrap-server localhost:9092
|
||||
elif [ $verb == "produce" ]; then
|
||||
$KAFKA_HOME/bin/kafka-console-producer.sh --topic $arg --bootstrap-server localhost:9092
|
||||
elif [ $verb == "consume" ]; then
|
||||
$KAFKA_HOME/bin/kafka-console-consumer.sh --topic $arg --from-beginning --bootstrap-server localhost:9092
|
||||
elif [ $verb == "submit" ]; then
|
||||
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 $arg
|
||||
fi
|
||||
Reference in New Issue
Block a user