From bae1d67fb920ee8f4d3e9f326cbf5a3478d2d7ab Mon Sep 17 00:00:00 2001 From: francois saab Date: Tue, 29 Sep 2015 00:43:55 +0300 Subject: [PATCH 1/5] Readme File for MapReduce instructions --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/README.md b/README.md index aab9f9c..a5c7fe6 100644 --- a/README.md +++ b/README.md @@ -93,5 +93,64 @@ Each component provide its own web UI. Open you browser at one of the URLs below | HDFS NameNode | [http://dockerhost:50070](http://dockerhost:50070) | | HDFS DataNode | [http://dockerhost:50075](http://dockerhost:50075) | | HDFS Secondary NameNode | [http://dockerhost:50090](http://dockerhost:50090) | +##Running MapReduce example +The General workflow for MapRecuce is:'Input—>Map—>Reduce->Output'. + +Below are steps to implement the workflow. + +###1) Configuration + +Ammend the following configuration to mapred-site.xml: + +Set the property mapred.job.tracker to hdfs-namenode:9001. +Remove the property mapreduce.framework.name + + +###2) Input Data + +###2.1) create directory for the input file in HDFS + +hadoop fs -mkdir /usr +hadoop fs -mkdir /usr/WordCount +hadoop fs -mkdir /usr/WordCount/Input + +###2.2) Prepare the input file + +mkdir ~/hdp-ex/ +cd ~/hdp-ex/ + +touch in.txt + +In this example we are using the following words: + +hello world hello docker hello hadoop hello mapreduce h + +###2.3) copy the input file to HDFS for processing by map reduce + +hadoop fs -copyFromLocal ~/hdp-ex/in.txt hdfs://hdfs-namenode:9000/usr/WordCount/Input + +###3) run the mapreduce, word count + +hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /usr/WordCount/Input/in.txt /usr/WordCount/Output/ + +###4) output: check the output + +hadoop fs -ls /usr/WordCount/Output/ + +Found 2 items +-rw-r--r-- 2 root supergroup 0 2015-09-27 21:00 /usr/WordCount/Output/_SUCCESS +-rw-r--r-- 2 root supergroup 50 2015-09-27 21:00 /usr/WordCount/Output/part-r-00000 + +Read the output file: + +hadoop fs -cat /usr/WordCount/Output/part-r-00000 + +------------ +docker 1 +h 1 +hadoop 1 +hello 4 +mapreduce 1 +world 1 From 7c5f1646c271a110660d8e4cf9701c952085b2ff Mon Sep 17 00:00:00 2001 From: francois saab Date: Tue, 29 Sep 2015 00:51:52 +0300 Subject: [PATCH 2/5] Changes in port number for docker file --- 2.6.0/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/2.6.0/Dockerfile b/2.6.0/Dockerfile index 85fff43..d356231 100644 --- a/2.6.0/Dockerfile +++ b/2.6.0/Dockerfile @@ -70,6 +70,7 @@ ADD hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml # 50475 = dfs.datanode.https.address (HTTPS / Secure UI) # HDFS: Secondary NameNode (SNN) # 50090 = dfs.secondary.http.address (HTTP / Checkpoint for NameNode metadata) -EXPOSE 9000 50070 50010 50020 50075 50090 +# 50030 (HTTP for Job Tracker web UI) +EXPOSE 9000 50070 50010 50020 50075 50090 50030 9001 CMD ["hdfs"] From 29f9bb0a627df78490f5d92612c6d52c47b7cc9f Mon Sep 17 00:00:00 2001 From: francois saab Date: Tue, 29 Sep 2015 21:27:58 +0300 Subject: [PATCH 3/5] fixing path --- README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a5c7fe6..2e0e1d9 100644 --- a/README.md +++ b/README.md @@ -110,10 +110,9 @@ Remove the property mapreduce.framework.name ###2) Input Data ###2.1) create directory for the input file in HDFS - -hadoop fs -mkdir /usr -hadoop fs -mkdir /usr/WordCount -hadoop fs -mkdir /usr/WordCount/Input + +hadoop fs -mkdir /WordCount +hadoop fs -mkdir /WordCount/Input ###2.2) Prepare the input file @@ -128,15 +127,15 @@ hello world hello docker hello hadoop hello mapreduce h ###2.3) copy the input file to HDFS for processing by map reduce -hadoop fs -copyFromLocal ~/hdp-ex/in.txt hdfs://hdfs-namenode:9000/usr/WordCount/Input +hadoop fs -copyFromLocal ~/hdp-ex/in.txt hdfs://hdfs-namenode:9000/WordCount/Input ###3) run the mapreduce, word count -hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /usr/WordCount/Input/in.txt /usr/WordCount/Output/ +hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /WordCount/Input/in.txt /WordCount/Output/ ###4) output: check the output -hadoop fs -ls /usr/WordCount/Output/ +hadoop fs -ls /WordCount/Output/ Found 2 items -rw-r--r-- 2 root supergroup 0 2015-09-27 21:00 /usr/WordCount/Output/_SUCCESS @@ -144,7 +143,7 @@ Found 2 items Read the output file: -hadoop fs -cat /usr/WordCount/Output/part-r-00000 +hadoop fs -cat /WordCount/Output/part-r-00000 ------------ docker 1 From 6e49507798719710023cc70e461e5db6fcf83b06 Mon Sep 17 00:00:00 2001 From: francois saab Date: Tue, 29 Sep 2015 21:42:49 +0300 Subject: [PATCH 4/5] spaces --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2e0e1d9..1a4fba6 100644 --- a/README.md +++ b/README.md @@ -152,4 +152,4 @@ hadoop 1 hello 4 mapreduce 1 world 1 - + From 54ec55c04188c481aba9799ca28317dc55cfa185 Mon Sep 17 00:00:00 2001 From: francois saab Date: Tue, 29 Sep 2015 21:45:24 +0300 Subject: [PATCH 5/5] updating log --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1a4fba6..0a5b2f1 100644 --- a/README.md +++ b/README.md @@ -138,8 +138,8 @@ hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2. hadoop fs -ls /WordCount/Output/ Found 2 items --rw-r--r-- 2 root supergroup 0 2015-09-27 21:00 /usr/WordCount/Output/_SUCCESS --rw-r--r-- 2 root supergroup 50 2015-09-27 21:00 /usr/WordCount/Output/part-r-00000 +-rw-r--r-- 2 root supergroup 0 2015-09-27 21:00 /WordCount/Output/_SUCCESS +-rw-r--r-- 2 root supergroup 50 2015-09-27 21:00 /WordCount/Output/part-r-00000 Read the output file: