Merge branch 'amd' into arm

This commit is contained in:
2024-04-02 13:46:08 +05:30
3 changed files with 34 additions and 62 deletions

View File

@@ -13,11 +13,11 @@ ENV YARN_RESOURCEMANAGER_USER=root
# Install necessary dependencies # Install necessary dependencies
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip apt-get install -y ssh openjdk-8-jdk neovim junit python-is-python3 nano curl python3-pip dos2unix
# Download and extract Hadoop # Download and extract Hadoop
RUN mkdir -p $HADOOP_HOME && \ RUN mkdir -p $HADOOP_HOME && \
wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/stable/hadoop-3.3.6.tar.gz && \ wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz && \
tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1 tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1
# Configure SSH # Configure SSH
@@ -64,9 +64,9 @@ RUN wget -O pig.tar.gz https://downloads.apache.org/pig/pig-0.17.0/pig-0.17.0.ta
echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc echo "export PIG_CLASSPATH=\$HADOOP_HOME/etc/hadoop" >> ~/.bashrc
# Install hbase # Install hbase
RUN wget http://apache.mirror.gtcomm.net/hbase/stable/hbase-2.5.7-bin.tar.gz && \ RUN wget http://apache.mirror.gtcomm.net/hbase/2.5.8/hbase-2.5.8-bin.tar.gz && \
tar -xzvf hbase-2.5.7-bin.tar.gz && \ tar -xzvf hbase-2.5.8-bin.tar.gz && \
mv hbase-2.5.7 /usr/local/hbase && \ mv hbase-2.5.8 /usr/local/hbase && \
echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \ echo "export HBASE_HOME=/usr/local/hbase" >> ~/.bashrc && \
echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \ echo "export PATH=\$PATH:\$HBASE_HOME/bin" >> ~/.bashrc && \
echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \ echo "export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP=\"true\"" >> /usr/local/hbase/conf/hbase-env.sh && \
@@ -148,9 +148,15 @@ RUN wget https://jdbc.postgresql.org/download/postgresql-42.7.1.jar && \
COPY restart $HADOOP_HOME/bin/restart COPY restart $HADOOP_HOME/bin/restart
COPY init $HADOOP_HOME/bin/init COPY init $HADOOP_HOME/bin/init
COPY colors $HADOOP_HOME/bin/colors COPY colors $HADOOP_HOME/bin/colors
RUN chmod +x $HADOOP_HOME/bin/restart && \ COPY kafka $HADOOP_HOME/bin/kafka
RUN dos2unix $HADOOP_HOME/bin/restart && \
dos2unix $HADOOP_HOME/bin/colors && \
dos2unix $HADOOP_HOME/bin/init && \
dos2unix $HADOOP_HOME/bin/kafka && \
chmod +x $HADOOP_HOME/bin/restart && \
chmod +x $HADOOP_HOME/bin/colors && \ chmod +x $HADOOP_HOME/bin/colors && \
chmod +x $HADOOP_HOME/bin/init chmod +x $HADOOP_HOME/bin/init && \
chmod +x $HADOOP_HOME/bin/kafka
# Cleaning up archives # Cleaning up archives
RUN rm *.tar.gz && \ RUN rm *.tar.gz && \

View File

@@ -1,56 +1,2 @@
# Hadoop on Docker # Hadoop on Docker
Use this to get a quick version of Hadoop to run on Docker. Check the [wiki](https://github.com/silicoflare/docker-hadoop/wiki) for a full guide to use this docker image.
1. Install Docker on your host PC
<br>
2. Clone your platform specific branch of this repository
```bash
# Windows
git clone -b windows --single-branch https://github.com/silicoflare/docker-hadoop
# Mac
git clone -b mac --single-branch https://github.com/silicoflare/docker-hadoop
# Linux
git clone -b linux --single-branch https://github.com/silicoflare/docker-hadoop
```
<br>
3. Navigate to the directory
```bash
cd docker-hadoop
```
<br>
4. Build the docker image (you may need to use sudo)
```bash
docker build -t hadoop .
```
<br>
5. Wait for the build to finish
<br>
6. Create a new container using the newly created image
```bash
docker run -it -p 9870:9870 -p 8088:8088 -p 9864:9864 --name anyname hadoop bash
```
<br>
7. Once the prompt appears, execute the following command to initialize everything:
```bash
init
```
<br>
8. From the next time, just run this to open the prompt. Use the same name that you used to create the container.
```bash
docker start anyname
docker exec -it anyname bash
```
Once in, execute:
```bash
restart
```

20
kafka Normal file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
verb=$1
arg=$2
if [ $verb == "start" ]; then
if [ $arg == "zookeeper" ]; then
$KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties
elif [ $arg == "kafka" ]; then
$KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties
fi
elif [ $verb == "create-topic" ]; then
$KAFKA_HOME/bin/kafka-topics.sh --create --topic $arg --bootstrap-server localhost:9092
elif [ $verb == "produce" ]; then
$KAFKA_HOME/bin/kafka-console-producer.sh --topic $arg --bootstrap-server localhost:9092
elif [ $verb == "consume" ]; then
$KAFKA_HOME/bin/kafka-console-consumer.sh --topic $arg --from-beginning --bootstrap-server localhost:9092
elif [ $verb == "submit" ]; then
spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.1 $arg
fi