commit 127fe5de0116b28ff527f78805fa3b7c77caa1da Author: Suraj B M Date: Sun Jan 28 22:13:32 2024 +0530 Initial commit diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9604fac --- /dev/null +++ b/Dockerfile @@ -0,0 +1,62 @@ +# Use the official Ubuntu base image +FROM ubuntu:latest + +# Set environment variables for Hadoop +ENV HADOOP_HOME /usr/local/hadoop +ENV PATH $HADOOP_HOME/bin:$PATH +ENV HDFS_NAMENODE_USER=root +ENV HDFS_DATANODE_USER=root +ENV HDFS_SECONDARYNAMENODE_USER=root +ENV YARN_NODEMANAGER_USER=root +ENV YARN_RESOURCEMANAGER_USER=root + + +# Install necessary dependencies +RUN apt-get update && \ + apt-get install -y ssh && \ + apt-get install -y openjdk-8-jdk && \ + apt-get install -y neovim && \ + rm -rf /var/lib/apt/lists/* + +# Download and extract Hadoop +RUN mkdir -p $HADOOP_HOME +RUN wget -O hadoop.tar.gz https://downloads.apache.org/hadoop/common/stable/hadoop-3.3.6.tar.gz +RUN tar -xzvf hadoop.tar.gz -C $HADOOP_HOME --strip-components=1 + +# Configure SSH +RUN ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa && \ + cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys && \ + chmod 0600 ~/.ssh/authorized_keys + +RUN wget -O /usr/local/hadoop/lib/javax.activation-api-1.2.0.jar https://jcenter.bintray.com/javax/activation/javax.activation-api/1.2.0/javax.activation-api-1.2.0.jar + +RUN mkdir -p /home/hadoop/hdfs/{namenode,datanode} +RUN chown -R $USER:$USER /home/hadoop/hdfs + +# Hadoop configuration +COPY core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml +COPY hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml +COPY mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml +COPY yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml + +RUN echo "export HADOOP_HOME=/usr/local/hadoop" >> ~/.bashrc +RUN echo "export HADOOP_INSTALL=\$HADOOP_HOME" >> ~/.bashrc +RUN echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> ~/.bashrc +RUN echo "export HADOOP_COMMON_HOME=\$HADOOP_HOME" >> ~/.bashrc +RUN echo "export HADOOP_HDFS_HOME=\$HADOOP_HOME" >> ~/.bashrc +RUN echo "export YARN_HOME=\$HADOOP_HOME" >> ~/.bashrc +RUN echo "export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_HOME/lib/native" >> ~/.bashrc +RUN echo "export PATH=\$PATH:\$HADOOP_HOME/sbin:\$HADOOP_HOME/bin" >> ~/.bashrc +RUN echo "export HADOOP_OPTS=\"-Djava.library.path=\$HADOOP_HOME/lib/native\"" >> ~/.bashrc + +RUN echo "HDFS_NAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh +RUN echo "HDFS_DATANODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh +RUN echo "HDFS_SECONDARYNAMENODE_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh +RUN echo "YARN_NODEMANAGER_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh +RUN echo "YARN_RESOURCEMANAGER_USER=root" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh +RUN echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh +RUN echo "export HADOOP_CLASSPATH+=\" \$HADOOP_HOME/lib/*.jar\"" >> /usr/local/hadoop/etc/hadoop/hadoop-env.sh + + +# Expose necessary ports +EXPOSE 9870 8088 9000 \ No newline at end of file diff --git a/core-site.xml b/core-site.xml new file mode 100644 index 0000000..9e664c0 --- /dev/null +++ b/core-site.xml @@ -0,0 +1,9 @@ + + + + + fs.default.name + hdfs://0.0.0.0:9000 + The default file system URI + + diff --git a/hdfs-site.xml b/hdfs-site.xml new file mode 100644 index 0000000..7ed838b --- /dev/null +++ b/hdfs-site.xml @@ -0,0 +1,18 @@ + + + + + dfs.replication + 1 + + + + dfs.name.dir + file:///home/hadoop/hdfs/namenode + + + + dfs.data.dir + file:///home/hadoop/hdfs/datanode + + \ No newline at end of file diff --git a/mapred-site.xml b/mapred-site.xml new file mode 100644 index 0000000..83d59e0 --- /dev/null +++ b/mapred-site.xml @@ -0,0 +1,22 @@ + + + + + mapreduce.framework.name + yarn + + + + yarn.app.mapreduce.am.env + HADOOP_MAPRED_HOME=$HADOOP_HOME + + + mapreduce.map.env + HADOOP_MAPRED_HOME=$HADOOP_HOME + + + + mapreduce.reduce.env + HADOOP_MAPRED_HOME=$HADOOP_HOME + + \ No newline at end of file diff --git a/slaves b/slaves new file mode 100644 index 0000000..e69de29 diff --git a/yarn-site.xml b/yarn-site.xml new file mode 100644 index 0000000..fc1c6bf --- /dev/null +++ b/yarn-site.xml @@ -0,0 +1,8 @@ + + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + \ No newline at end of file