mirror of
https://github.com/silicoflare/docker-hadoop.git
synced 2026-05-27 04:20:03 +05:30
Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc9016b941 | ||
|
|
4d7ad91d1e | ||
|
|
68dc43ed74 | ||
|
|
d75780a3b6 | ||
|
|
ae2efbde2d | ||
|
|
70af45cd7e | ||
|
|
3ab5aa8119 | ||
|
|
788a45bf14 | ||
|
|
7cbf3acb05 | ||
|
|
98f15fec76 | ||
|
|
720ae5cf98 | ||
|
|
c7ceedb27c | ||
|
|
79f329fa8e | ||
|
|
d935f91810 | ||
|
|
62c67d1737 |
14
Dockerfile
14
Dockerfile
@@ -64,9 +64,9 @@ RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.ta
|
|||||||
echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc
|
echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc
|
||||||
|
|
||||||
# Install hbase
|
# Install hbase
|
||||||
RUN wget http://apache.mirror.gtcomm.net/hbase/2.5.8/hbase-2.5.8-bin.tar.gz && \
|
RUN wget https://archive.apache.org/dist/hbase/3.0.0-beta-1/hbase-3.0.0-beta-1-bin.tar.gz && \
|
||||||
tar -xzvf hbase-2.5.8-bin.tar.gz && \
|
tar -xzvf hbase-3.0.0-beta-1-bin.tar.gz && \
|
||||||
mv hbase-2.5.8 /usr/local/hbase && \
|
mv hbase-3.0.0-beta-1 /usr/local/hbase && \
|
||||||
echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \
|
echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \
|
||||||
echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \
|
echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \
|
||||||
echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \
|
echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \
|
||||||
@@ -77,7 +77,7 @@ RUN mkdir -p /hadoop/zookeeper && \
|
|||||||
chown -R $USER:$USER /hadoop/
|
chown -R $USER:$USER /hadoop/
|
||||||
|
|
||||||
# Install Hive
|
# Install Hive
|
||||||
RUN wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \
|
RUN wget http://archive.apache.org/dist/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz && \
|
||||||
tar -xzvf apache-hive-3.1.3-bin.tar.gz && \
|
tar -xzvf apache-hive-3.1.3-bin.tar.gz && \
|
||||||
mv apache-hive-3.1.3-bin /usr/local/hive && \
|
mv apache-hive-3.1.3-bin /usr/local/hive && \
|
||||||
echo "export HIVE_HOME=/usr/local/hive" >> ~/.bashrc && \
|
echo "export HIVE_HOME=/usr/local/hive" >> ~/.bashrc && \
|
||||||
@@ -103,7 +103,7 @@ RUN wget https://archive.apache.org/dist/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6
|
|||||||
echo "export HADOOP_MAPRED_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh
|
echo "export HADOOP_MAPRED_HOME=/usr/local/hadoop" >> /usr/local/sqoop/conf/sqoop-env.sh
|
||||||
|
|
||||||
# Install Zookeeper
|
# Install Zookeeper
|
||||||
RUN wget https://downloads.apache.org/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \
|
RUN wget https://archive.apache.org/dist/zookeeper/zookeeper-3.9.1/apache-zookeeper-3.9.1-bin.tar.gz && \
|
||||||
tar -xvf apache-zookeeper-3.9.1-bin.tar.gz && \
|
tar -xvf apache-zookeeper-3.9.1-bin.tar.gz && \
|
||||||
mv apache-zookeeper-3.9.1-bin /usr/local/zookeeper && \
|
mv apache-zookeeper-3.9.1-bin /usr/local/zookeeper && \
|
||||||
mv /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg && \
|
mv /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg && \
|
||||||
@@ -121,10 +121,10 @@ RUN apt-get update && \
|
|||||||
echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >> ~/.bashrc
|
echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >> ~/.bashrc
|
||||||
|
|
||||||
# Install Pyspark
|
# Install Pyspark
|
||||||
RUN pip install pyspark
|
RUN pip install pyspark --break-system-packages
|
||||||
|
|
||||||
# Install Kafka
|
# Install Kafka
|
||||||
RUN wget https://downloads.apache.org/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \
|
RUN wget https://archive.apache.org/dist/kafka/3.6.1/kafka_2.13-3.6.1.tgz && \
|
||||||
tar -xzvf kafka_2.13-3.6.1.tgz && \
|
tar -xzvf kafka_2.13-3.6.1.tgz && \
|
||||||
mv kafka_2.13-3.6.1 /usr/local/kafka && \
|
mv kafka_2.13-3.6.1 /usr/local/kafka && \
|
||||||
echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc && \
|
echo "export KAFKA_HOME=/usr/local/kafka" >> ~/.bashrc && \
|
||||||
|
|||||||
33
README.md
33
README.md
@@ -1,2 +1,35 @@
|
|||||||
# Hadoop on Docker
|
# Hadoop on Docker
|
||||||
|
## Linux/WSL
|
||||||
|
### Installation
|
||||||
|
```bash
|
||||||
|
curl -fsSL https://bit.ly/hadock-install -o ~/hadoock && chmod +x ~/hadoock && sudo mv ~/hadoock /usr/bin/hadoock
|
||||||
|
```
|
||||||
|
if you're using WSL make sure to update it
|
||||||
|
```powershell
|
||||||
|
wsl.exe --update
|
||||||
|
```
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
hadock
|
||||||
|
```
|
||||||
|
### Update
|
||||||
|
```bash
|
||||||
|
hadock --uninstall
|
||||||
|
curl -fsSL https://bit.ly/hadock-install | bash
|
||||||
|
```
|
||||||
|
#### Copying from host
|
||||||
|
```bash
|
||||||
|
dockcp /path/to/file
|
||||||
|
```
|
||||||
|
This command copies ```path/to/file``` from host to ```/home``` on your docker container
|
||||||
|
#### Running multiple instances
|
||||||
|
```bash
|
||||||
|
hadock_again
|
||||||
|
```
|
||||||
|
Use this command to open another shell in the same docker instance
|
||||||
|
### Uninstallation
|
||||||
|
```bash
|
||||||
|
hadock --uninstall
|
||||||
|
```
|
||||||
|
|
||||||
Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image.
|
Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image.
|
||||||
|
|||||||
55
kafka
55
kafka
@@ -1,20 +1,41 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
verb=$1
|
verb="$1"
|
||||||
arg=$2
|
arg="$2"
|
||||||
|
|
||||||
if [ $verb == "start" ]; then
|
# Check if KAFKA_HOME is set
|
||||||
if [ $arg == "zookeeper" ]; then
|
if [ -z "$KAFKA_HOME" ]; then
|
||||||
$KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties
|
echo "Error: KAFKA_HOME is not set"
|
||||||
elif [ $arg == "kafka" ]; then
|
exit 1
|
||||||
$KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties
|
fi
|
||||||
fi
|
|
||||||
elif [ $verb == "create-topic" ]; then
|
case "$verb" in
|
||||||
$KAFKA_HOME/bin/kafka-topics.sh --create --topic $arg --bootstrap-server localhost:9092
|
"start")
|
||||||
elif [ $verb == "produce" ]; then
|
if [ "$arg" == "zookeeper" ]; then
|
||||||
$KAFKA_HOME/bin/kafka-console-producer.sh --topic $arg --bootstrap-server localhost:9092
|
"$KAFKA_HOME/bin/zookeeper-server-start.sh" "$KAFKA_HOME/config/zookeeper.properties" &
|
||||||
elif [ $verb == "consume" ]; then
|
elif [ "$arg" == "kafka" ]; then
|
||||||
$KAFKA_HOME/bin/kafka-console-consumer.sh --topic $arg --from-beginning --bootstrap-server localhost:9092
|
"$KAFKA_HOME/bin/kafka-server-start.sh" "$KAFKA_HOME/config/server.properties" &
|
||||||
elif [ $verb == "submit" ]; then
|
else
|
||||||
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 $arg
|
echo "Error: Invalid argument for 'start'. Use 'zookeeper' or 'kafka'."
|
||||||
fi
|
fi
|
||||||
|
;;
|
||||||
|
"create-topic")
|
||||||
|
"$KAFKA_HOME/bin/kafka-topics.sh" --create --topic "$arg" --bootstrap-server localhost:9092 &
|
||||||
|
;;
|
||||||
|
"produce")
|
||||||
|
"$KAFKA_HOME/bin/kafka-console-producer.sh" --topic "$arg" --bootstrap-server localhost:9092 &
|
||||||
|
;;
|
||||||
|
"consume")
|
||||||
|
"$KAFKA_HOME/bin/kafka-console-consumer.sh" --topic "$arg" --from-beginning --bootstrap-server localhost:9092 &
|
||||||
|
;;
|
||||||
|
"submit")
|
||||||
|
if [ -z "$arg" ]; then
|
||||||
|
echo "Error: Please provide a Spark script."
|
||||||
|
else
|
||||||
|
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 "$arg" &
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Error: Unknown verb. Use 'start', 'create-topic', 'produce', 'consume', or 'submit'."
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|||||||
Reference in New Issue
Block a user