diff --git a/2.6.0/Dockerfile b/2.6.0/Dockerfile index 85fff43..d356231 100644 --- a/2.6.0/Dockerfile +++ b/2.6.0/Dockerfile @@ -70,6 +70,7 @@ ADD hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml # 50475 = dfs.datanode.https.address (HTTPS / Secure UI) # HDFS: Secondary NameNode (SNN) # 50090 = dfs.secondary.http.address (HTTP / Checkpoint for NameNode metadata) -EXPOSE 9000 50070 50010 50020 50075 50090 +# 50030 (HTTP for Job Tracker web UI) +EXPOSE 9000 50070 50010 50020 50075 50090 50030 9001 CMD ["hdfs"] diff --git a/README.md b/README.md index aab9f9c..0a5b2f1 100644 --- a/README.md +++ b/README.md @@ -93,5 +93,63 @@ Each component provide its own web UI. Open you browser at one of the URLs below | HDFS NameNode | [http://dockerhost:50070](http://dockerhost:50070) | | HDFS DataNode | [http://dockerhost:50075](http://dockerhost:50075) | | HDFS Secondary NameNode | [http://dockerhost:50090](http://dockerhost:50090) | +##Running MapReduce example +The General workflow for MapRecuce is:'Input—>Map—>Reduce->Output'. +Below are steps to implement the workflow. + +###1) Configuration + +Ammend the following configuration to mapred-site.xml: + +Set the property mapred.job.tracker to hdfs-namenode:9001. +Remove the property mapreduce.framework.name + + +###2) Input Data + +###2.1) create directory for the input file in HDFS + +hadoop fs -mkdir /WordCount +hadoop fs -mkdir /WordCount/Input + +###2.2) Prepare the input file + +mkdir ~/hdp-ex/ +cd ~/hdp-ex/ + +touch in.txt + +In this example we are using the following words: + +hello world hello docker hello hadoop hello mapreduce h + +###2.3) copy the input file to HDFS for processing by map reduce + +hadoop fs -copyFromLocal ~/hdp-ex/in.txt hdfs://hdfs-namenode:9000/WordCount/Input + +###3) run the mapreduce, word count + +hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /WordCount/Input/in.txt /WordCount/Output/ + +###4) output: check the output + +hadoop fs -ls /WordCount/Output/ + +Found 2 items +-rw-r--r-- 2 root supergroup 0 2015-09-27 21:00 /WordCount/Output/_SUCCESS +-rw-r--r-- 2 root supergroup 50 2015-09-27 21:00 /WordCount/Output/part-r-00000 + +Read the output file: + +hadoop fs -cat /WordCount/Output/part-r-00000 + +------------ +docker 1 +h 1 +hadoop 1 +hello 4 +mapreduce 1 +world 1 +