Upload
piyali-gupta
View
265
Download
3
Embed Size (px)
Citation preview
7/24/2019 Cloudera Developer Exercise Instructions
1/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
1
Cloudera Developer Training for
Apache Hadoop:
Hands-On Exercises
!"#"$%& ()*"+ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -
.%#/+01# 23"$45+"6 7+5#8 .9:; ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ##5#8 % ?%@="/>4" A)B ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, CC
.%#/+01# 23"$45+"6 D$5*5#8 % ?%@="/>4" A%E% F$)8$%G ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, CH
.%#/+01# 23"$45+"6 ?)$" F$%4*54" D5*I ?%@="/>4" A%E% F$)8$%G+ ,,,,,,,,,,,,,,,,, JK
1@*5)#%& .%#/+01# 23"$45+"6 D$5*5#8 % ?%@="/>4" ;*$"%G5#8 F$)8$%G ,,,,,,,,, JH
.%#/+01# 23"$45+"6 D$5*5#8 7#5* L"+*+ D5*I *I" ?=7#5* :$%G"M)$N ,,,,,,,,,,,,,,, JO
.%#/+01# 23"$45+"6 7+5#8 L))&=>##"$ %#/ F%++5#8 F%$%G"*"$+ ,,,,,,,,,,,,,,,,,,,,,,,,, -P
1@*5)#%& .%#/+01# 23"$45+"6 7+5#8 % Q)GB5#"$ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, --
.%#/+01# 23"$45+"6 L"+*5#8 M5*I R)4%&A)B=>##"$ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -K
1@*5)#%& .%#/+01# 23"$45+"6 R)885#8 ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -S
"#$%#&
7/24/2019 Cloudera Developer Exercise Instructions
2/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
2
.%#/+01# 23"$45+"6 7+5#8 Q)>#*"$+ %#/ % ?%@01#&T A)B ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, KC
.%#/+01# 23"$45+"6 D$5*5#8 % F%$*5*5)#"$ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, K-
.%#/+01# 23"$45+"6 UG@&"G"#*5#8 % Q>+*)G D$5*%B&"Q)G@%$%B&" ,,,,,,,,,,,,,,,,,,, KH
.%#/+01# 23"$45+"6 7+5#8 ;"V>"#4":5&"+ %#/ :5&" Q)G@$"++5)# ,,,,,,,,,,,,,,,,,,,,,,,,, KO
.%#/+01# 23"$45+"6 Q$"%*5#8 %# U#E"$*"/ U#/"3 ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, &%*5#8 D)$/ Q)0144>$$"#4" ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, &%*5#8 9%*% D5*I .5E" ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, H-
.%#/+01# 23"$45+"6 =>##5#8 %# 1)W5" D)$NX&)M ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, HO
Y)#>+ 23"$45+"+ ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ZC
Y)#>+ 23"$45+"6 23@&)$5#8 % ;"4)#/%$T ;)$* 23%G@&" ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, ZJ
7/24/2019 Cloudera Developer Exercise Instructions
3/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
3
General Notes
'()*+,-./0 1-.23234 5)*-0,0 *0, . 62-1*.( 7.5823, -*33234 18, ',319: ;
+201-2?*12)3< @820 67 8.0 'AB C'()*+,-./0 A201-2?*12)3D 235(*+234 EF.58, B.+))FG
2301.((,+ 23 H0,*+)IA201-2?*1,+ J)+,< H0,*+)IA201-2?*1,+ J)+, 20 . J,18)+ )K
-*33234 B.+))F L8,-,?M .(( B.+))F +.,J)30 -*3 )3 18, 0.J, J.5823,< N1 20D
,00,312.((MD . 5(*01,- 5)30201234 )K . 0234(, J.5823,< N1 L)-O0 P*01 (2O, . (.-4,-
B.+))F 5(*01,-D 18, )3(M O,M +2KK,-,35, C.F.-1 K-)J 0F,,+D )K 5)*-0,QG ?,234 18.1 18,
?()5O -,F(25.12)3 K.51)- 20 0,1 1) $D 0235, 18,-, 20 )3(M . 0234(, A.1.R)+, .S.2(.?(, 08,(( F-)JF1 M5&& B"85# *) 8"* %4V>%5#*"/ M5*I *I" .%/))@ *))&+, [)>
M5&& G%#5@>&%*" X5&"+ 5# .9:;\ *I" .%/))@ 95+*$5B>*"/ :5&" ;T+*"G,
Set Up Your Environment
C, `,K)-, 01.-1234 18, ,>,-520,0D -*3 18, 5)*-0, 0,1*F 05-2F1 23 . 1,-J23.( L23+)LU
$ ~/scripts/developer/training_setup_dev.sh
Hadoop
B.+))F 20 .(-,.+M 2301.((,+D 5)3K24*-,+D .3+ -*33234 )3 M)*- S2-1*.( J.5823,,-520,0
7/24/2019 Cloudera Developer Exercise Instructions
9/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
9
$ hadoop fs -mkdir testlog
$ gunzip -c access_log.gz | head -n 5000 \
| hadoop fs -put - testlog/test_access_log
Step 3: Viewing and Manipulating Files
R)L (,1/0 S2,L 0)J, )K 18, +.1. M)* P*01 5)F2,+ 231) BAW: M5&& 4)G@5&" A%E% X5&"+\ 4$"%*" % A]=\ %#/ $># ?%@="/>4"
^)B+,
N3 .++212)3 1) J.32F*(.1234 K2(,0 23 BAW:D 18, L-.FF,- F-)4-.Jhadoop20 *0,+ 1)
(.*358 7.F],+*5, P)?0< @8, 5)+, K)- . P)? 20 5)31.23,+ 23 . 5)JF2(,+ YE] K2(,,5*1,+.JF(, )K . 7.F],+*5, P)? 20 1) 5)*31 18, 3*J?,- )K )55*--,35,0 )K
,.58 L)-+ 23 . K2(, )- 0,1 )K K2(,0< N3 1820 (.? M)* L2(( 5)JF2(, .3+ 0*?J21 .
7.F],+*5, P)? 1) 5)*31 18, 3*J?,- )K )55*--,35,0 )K ,S,-M L)-+ 23 18, L)-O0 )K
:8.O,0F,.-,
7/24/2019 Cloudera Developer Exercise Instructions
12/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
12
Compiling and Submitting a MapReduce Job
C, N3 . 1,-J23.( L23+)LD 58.34, 1) 18, ,>,-520, 0)*-5, +2-,51)-MD .3+ (201 18,
5)31,310U
$ cd ~/workspace/wordcount/src
$ ls
@820 +2-,51)-M 5)31.230 18-,, [F.5O.4,\ 0*?+2-,51)-2,0U solutionD stubs.3+
hints< N3 1820 ,>.JF(, L, L2(( ?, *0234 18, 0)(*12)3 5)+,D 0) (201 18, K2(,0 23 18,
solutionF.5O.4, +2-,51)-MU
$ ls solution
@8, F.5O.4, 5)31.230 18, K)(()L234 Y.S. K2(,0U
WordCount.javaU E 02JF(, 7.F],+*5, +-2S,- 5(.00201234 +2-,51)-M< @820 20 ?M +,0243^
0235, 18, -,0*(1 )K . 7.F],+*5, P)? J.M ?, ,>F,302S, 1) -,F-)+*5,D B.+))F
F-,S,310 M)* K-)J .552+,31.((M )S,-L-21234 F-,S2)*0(M ,>201234 K2(,0,-520, C2F)-1#5V>" UF %//$"++,
T)*- 1.0O 20 1) 5)*31 18, 3*J?,- )K 8210 J.+, K-)J ,.58 NH .++-,00 23 18, 0.JF(,C.3)3MJ2a,+G L,? 0,-S,- ()4 K2(, 18.1 M)* *F().+,+ 1) 18,
/user/training/weblog+2-,51)-M 23 BAW: L8,3 M)* 5)JF(,1,+ 18, [b0234
BAW:\ ,>,-520,#* @$)8$%G T)> $%# "%$&5"$, [)> 4%# $">+" *I%* 4)/" )$ T)> 4%#
M$5*" T)>$ )M# 5X T)> @$"X"$,
J, `*2(+ M)*- .FF(25.12)3 P.- K2(, K)(()L234 18, 01,F0 23 18, F-,S2)*0 ,>,-520, M5&& $"@"%* *I" +%G" *%+N %+ 5# *I" @$"E5)>+ "3"$45+"6
M$5*5#8 % @$)8$%G *) 4%&4>&%*" %E"$%8" M)$/ &"#8*I+ X)$ &"**"$+, .)M"E"$\ T)>
M5&& M$5*" *I5+ %+ % +*$"%G5#8 @$)8$%G >+5#8 % +4$5@*5#8 &%#8>%8" )X T)>$
4I)54" $%*I"$ *I%# >+5#8 A%E%,
T)*- S2-1*.( J.5823, 8.0 H,-(D HM18)3D HBHD .3+ ]*?M 2301.((,+D 0) M)* 5.3 58))0,
.3M )K 18,0,i)- ,S,3 08,(( 05-2F1234i1) +,S,()F . :1-,.J234 0)(*12)31 ,+21)-
1) L-21, M)*- 7.FF,- 05-2F1 .3+ M)*- ],+*5,- 05-2F1< B,-, .-, 0)J, 3)1,0 .?)*1
0)(S234 18, F-)?(,J 23 B.+))F :1-,.J234U
C, @8, 7.FF,- :5-2F1
@8, 7.FF,- L2(( -,5,2S, (23,0 )K 1,>1 )3 stdin< W23+ 18, L)-+0 23 18, (23,0 1)
F-)+*5, 18, 231,-J,+2.1, )*1F*1D .3+ ,J21 231,-J,+2.1, CO,MD S.(*,G F.2-0 ?M
L-21234 01-2340 )K 18, K)-JU
key value
@8,0, 01-2340 08)*(+ ?, L-211,3 1) stdout.JF(,D 18, -,+*5, 05-2F1 J.M -,5,2S, 18, K)(()L234U
t 3
t 4
w 4
w 6
W)- 1820 23F*1D ,J21 18, K)(()L234 1) stdoutU
t 3.5
w 5.0
9?0,-S, 18.1 18, -,+*5,- -,5,2S,0 . O,M L218 ,.58 23F*1 (23,D .3+ J*01 [3)125,\
L8,3 18, O,M 58.34,0 )3 . 0*?0,Z*,31 (23, C)- L8,3 18, 23F*1 20 K23208,+G 1)
O3)L L8,3 18, S.(*,0 K)- . 42S,3 O,M 8.S, ?,,3 ,>8.*01,+< @820 20 +2KK,-,31
18.3 18, Y.S. S,-02)3 M)* L)-O,+ )3 23 18, F-,S2)*0 ,>,-520,,-520, L-211,3 23 HM18)3 23 18, +2-,51)-M
~/workspace/averagewordlength/python_sample_solution.
@) -*3 18, 0)(*12)3D 58.34, +2-,51)-M 1) ~/workspace/averagewordlength
.3+ -*3 1820 5)JJ.3+U
$ hadoop jar /usr/lib/hadoop-0.20-mapreduce\
/contrib/streaming/hadoop-streaming*.jar \
-input shakespeare -output avgwordstreaming \
-file python_sample_solution/mapper.py \
-file python_sample_solution/reducer.py \
-mapper mapper.py -reducer reducer.py
This is the end of the Exercise
7/24/2019 Cloudera Developer Exercise Instructions
29/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
29
Hands-On Exercise: Writing Unit
Tests With the MRUnit Framework
Projects Used in this Exercise
Eclipse project: mrunit
Java files:
SumReducer.java(Reducer from WordCount)
WordMapper.java(Mapper from WordCount)
TestWordCount.java(Test Driver)
U# *I5+ 23"$45+"\ T)> M5&& M$5*" 7#5* L"+*+ X)$ *I" D)$/Q)>#* 4)/",
C, =.*358 _5(2F0, C2K 3,5,00.-MG .3+ ,>F.3+ 18, mrunitK)(+,-.J23, 18, TestWordCount.javaK2(, 23 18, mrunitF-)P,51 stubs
F.5O.4,< R)125, 18.1 18-,, 1,010 8.S, ?,,3 5-,.1,+D )3, ,.58 K)- 18, 7.FF,-D
],+*5,-D .3+ 18, ,312-, 7.F],+*5, K()L< '*--,31(MD .(( 18-,, 1,010 02JF(M K.2(F()-,- F.3,( .3+ 58))0234 =># ]+ _ A7#5* L"+*1 1) 18, H.5O.4, _>F()-,- 1.?G
08)*(+ 23+25.1, 18.1 18-,, 1,010 -.3 L218 18-,, K.2(*-,0 M5&& 5G@&"G"#* % /$5E"$ >+5#8 L))&=>##"$,
W)(()L 18, 01,F0 ?,()L 1) 01.-1 L218 18, ES,-.4, X)-+ =,3418 F-)4-.J M)* L-)1,
23 .3 ,.-(2,- ,>,-520,D .3+ J)+2KM 18, +-2S,- 1) *0, @))(]*33,-< @8,3 J)+2KM 18,
7.FF,- 1) -,K,-,35, . `))(,.3 F.-.J,1,- 5.((,+ caseSensitive^ 2K 1-*,D 18,
J.FF,- 08)*(+ 1-,.1 *FF,- .3+ ()L,- 5.0, (,11,-0 .0 +2KK,-,31^ 2K K.(0, )- *30,1D .((
(,11,-0 08)*(+ ?, 5)3S,-1,+ 1) ()L,- 5.0,
7/24/2019 Cloudera Developer Exercise Instructions
31/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
31
Modify the Average Word Length Driver to useToolrunner
C, ')FM 18, ],+*5,-D 7.FF,- .3+ +-2S,- 5)+, M)* 5)JF(,1,+ 23 18, [X-21234 Y.S.
7.F],+*5, H-)4-.J0\ ,>,-520, ,.-(2,-D 23 18, averagewordlengthF-)P,51,-520,D *0, 18, 5)+, K-)J 18, solution
F.5O.4,
7/24/2019 Cloudera Developer Exercise Instructions
32/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
32
?
7/24/2019 Cloudera Developer Exercise Instructions
33/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
33
Optional Hands-On Exercise: Using a
Combiner
Files and Directories Used in this Exercise
Eclipse project: combiner
Java files:
WordCountDriver.java(Driver from WordCount)
WordMapper.java (Mapper from WordCount)
SumReducer.java (Reducer from WordCount)
Exercise directory: ~/workspace/combiner
U# *I5+ "3"$45+"\ T)> M5&& %// % Q)GB5#"$ *) *I" D)$/Q)>#* @$)8$%G *) $"/>4"
*I" %G)>#* )X 5#*"$G"/5%*" /%*% +"#* X$)G *I" ?%@@"$ *) *I" ="/>4"$,
`,5.*0, 0*JJ234 20 .00)52.12S, .3+ 5)JJ*1.12S,D 18, 0.J, 5(.00 5.3 ?, *0,+ K)-
?)18 18, ],+*5,- .3+ 18, ')J?23,- M5&& @$%4*54" $>##5#8 % ^)B &)4%&&T X)$/"B>885#8 %#/ *"+*5#8 @>$@)+"+,
N3 18, [b0234 @))(]*33,- .3+ H.00234 H.-.J,1,-0\ ,>,-520,D M)* J)+2K2,+ 18,
ES,-.4, X)-+ =,3418 F-)4-.J 1) *0, @))(]*33,-< @820 J.O,0 21 02JF(, 1) 0,1 P)?
5)3K24*-.12)3 F-)F,-12,0 )3 18, 5)JJ.3+ (23,#
Q)#X58>$%*5)#+
&
7/24/2019 Cloudera Developer Exercise Instructions
36/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
36
f
7/24/2019 Cloudera Developer Exercise Instructions
37/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
37
This is the end of the Exercise
7/24/2019 Cloudera Developer Exercise Instructions
38/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
38
Optional Hands-On Exercise: Logging
Files and Directories Used in this Exercise
Eclipse project: logging
Java files:
AverageReducer.java(Reducer from ToolRunner)
LetterMapper.java(Mapper from ToolRunner)
AvgWordLength.java(driver from ToolRunner)
Test data (HDFS):
shakespeare
Exercise directory: ~/workspace/logging
U# *I5+ .%#/+01# 23"$45+"\ T)> M5&& @$%4*54" >+5#8 &)8K^ M5*I ?%@="/>4",
7)+2KM 18, ES,-.4, X)-+ =,3418 F-)4-.J M)* ?*2(1 23 18, @64!: A""&>%!!)* 1!'
01664!: 01*1?)#)*6 ,>,-520, 0) 18.1 18, 7.FF,- ()40 . +,?*4 J,00.4, 23+25.1234
L8,18,- 21 20 5)JF.-234 L218 )- L218)*1 5.0, 0,30212S21M
7/24/2019 Cloudera Developer Exercise Instructions
39/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
39
Enable Mapper Logging for the Job
$,-520, 0)(*12)3 L218 7.FF,- +,?*4 ()44234 ,3.?(,+ ?M .++234
IDmapred.map.child.log.level=DEBUG1) 18, 5)JJ.3+ (23,< _
7/24/2019 Cloudera Developer Exercise Instructions
40/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
40
;
7/24/2019 Cloudera Developer Exercise Instructions
41/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
41
Hands-On Exercise: Using Counters
and a Map-Only Job
Files and Directories Used in this Exercise
Eclipse project: counters
Java files:
ImageCounter.java(driver)
ImageCounterMapper.java C7.FF,-G
Test data (HDFS):
weblog(full web server access log)
testlog (partial data set for testing)
Exercise directory: ~/workspace/counters
U# *I5+ "3"$45+" T)> M5&& 4$"%*" % ?%@0)#&T ?%@="/>4" ^)B,
T)*- .FF(25.12)3 L2(( F-)5,00 . L,? 0,-S,-/0 .55,00 ()4 1) 5)*31 18, 3*J?,- )K 12J,0
42K0D PF,40D .3+ )18,- -,0)*-5,0 8.S, ?,,3 -,1-2,S,+< T)*- P)? L2(( -,F)-1 18-,,
K24*-,0U 3*J?,- )K 42K -,Z*,010D 3*J?,- )K PF,4 -,Z*,010D .3+ 3*J?,- )K )18,--,Z*,010*@>*
5+ +"#* *),
The Problem
N3 18, [7)-, H-.5125, L218 X-21234 7.F],+*5, Y.S. H-)4-.J0\ ,>,-520, M)* +2+
F-,S2)*0(MD M)* ?*2(1 18, 5)+, 23 log_file_analysisF-)P,51< @8.1 F-)4-.J
5)*31,+ 18, 3*J?,- )K 8210 K)- ,.58 +2KK,-,31 NH .++-,00 23 . L,? ()4 K2(,< @8, K23.(
)*1F*1 L.0 . K2(, 5)31.23234 . (201 )K NH .++-,00,0D .3+ 18, 3*J?,- )K 8210 K-)J 18.1
.++-,00*@>* E%&>"UApr
B231U 23 18, 7.FF,-D M)* J.M *0, . -,4*(.- ,>F-,002)3 1) F.-0, 1) ()4 K2(, +.1. 2K
M)* .-, K.J2(2.- L218 -,4,> F-)5,00234< 918,-L20, L, 0*44,01 K)(()L234 18, 12F0
23 18, hints5)+,D )- P*01 5)FM 18, 5)+, K-)J 18, solutionF.5O.4,F,51,+ +.1. e 18.1 20D (23,0 18.1 +)
3)1 5)3K)-J 1) 18, ,>F,51,+ K)-J.1< `, 0*-, 18.1 M)*- 5)+, 5)F,0 L218 0*58
(23,0 M5&& 4$"%*" % 4>+*)G D$5*%B&"Q)G@%$%B&" *T@" *I%* I)&/+
*M) +*$5#8+,
@,01 18, 3,L 1MF, ?M 5-,.1234 . 02JF(, F-)4-.J 18.1 -,.+0 . (201 )K 3.J,0 CK2-01 .3+(.01G .3+ 5)*310 18, 3*J?,- )K )55*--,35,0 )K ,.58 3.J,.JF(,D K)- 23F*1U
Smith Joe 1963-08-12 Poughkeepsie, NYSmith Joe 1832-01-20 Sacramento, CA
Murphy Alice 2004-06-02 Berlin, MA
X, L.31 1) )*1F*1U
7/24/2019 Cloudera Developer Exercise Instructions
47/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
47
(Smith,Joe) 2
(Murphy,Alice) 1
Note: You will use your custom WritableComparable type in a future exercise, so
make sure it is working with the test job now.
StringPairWritable
T)* 3,,+ 1) 2JF(,J,31 . X-21.?(,')JF.-.?(, )?P,51 18.1 8)(+0 18, 1L) 01-2340< @8,
01*? F-)S2+,0 .3 ,JF1M 5)301-*51)- K)- 0,-2.(2a.12)3D . 01.3+.-+ 5)301-*51)- 18.1
L2(( ?, 42S,3 1L) 01-2340D . toStringJ,18)+D .3+ 18, 4,3,-.1,+ hashCode.3+equalsJ,18)+0< T)* L2(( 3,,+ 1) 2JF(,J,31 18, readFieldsD writeD .3+
compareToJ,18)+0 -,Z*2-,+ ?M X-21.?(,')JF.-.?(,01 K2(, 08)*(+ 8.S, 18, 0.J,
1,>1 +.1. .0 18, ()4 K2(,D F(*0 O,M0< @8, O,M0 5.3 5)31.23 .3M S.(*,0 M)* (2O,4"+ %# 5#E"$*"/
5#/"3,
W)- 1820 (.? M)* L2(( *0, .3 .(1,-3.1, 23F*1D F-)S2+,+ 23 18, K2(,
invertedIndexInput.tgz< X8,3 +,5)JF-,00,+D 1820 .-582S, 5)31.230 .+2-,51)-M )K K2(,0^ ,.58 20 . :8.O,0F,.-, F(.M K)-J.11,+ .0 K)(()L0U
0 HAMLET
1
2
3 DRAMATIS PERSONAE
4
56 CLAUDIUS king of Denmark. (KING CLAUDIUS:)
7
8 HAMLET son to the late, and nephew to the present
king.
9
7/24/2019 Cloudera Developer Exercise Instructions
55/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
55
10 POLONIUS lord chamberlain. (LORD POLONIUS:)
...
_.58 (23, 5)31.230U
C4!) !%?D)*
6)51*1#"*U . 1.? 58.-.51,-
1
@820 K)-J.1 5.3 ?, -,.+ +2-,51(M *0234 18, KeyValueTextInputFormat5(.00
F-)S2+,+ 23 18, B.+))F EHN< @820 23F*1 K)-J.1 F-,0,310 ,.58 (23, .0 )3, -,5)-+ 1)
M)*- 7.FF,-D L218 18, F.-1 ?,K)-, 18, 1.? 58.-.51,- .0 18, O,MD .3+ 18, F.-1 .K1,- 18,
1.? .0 18, S.(*,1 23 1820 K)-JD M)*- 23+,>,- 08)*(+ F-)+*5, .3 23+,> )K .(( 18,
L)-+0 23 18, 1,>1< W)- ,.58 L)-+D 18, 23+,> 08)*(+ 8.S, . (201 )K .(( 18, ()5.12)30
L8,-, 18, L)-+ .FF,.-0< W)- ,>.JF(,D K)- 18, L)-+ h8)3,M0*5O(,/ M)*- )*1F*1
08)*(+ ())O (2O, 1820U
honeysuckle 2kinghenryiv@1038,midsummernightsdream@2175,...
@8, 23+,> 08)*(+ 5)31.23 0*58 .3 ,31-M K)- ,S,-M L)-+ 23 18, 1,>11-.51 18, invertedIndexInput+2-,51)-M .3+ *F().+ 1) BAW:U
$ cd ~/training_materials/developer/data
$ tar zxvf invertedIndexInput.tgz
$ hadoop fs -put invertedIndexInput invertedIndexInput
Define the MapReduce Solution
],J,J?,- 18.1 K)- 1820 F-)4-.J M)* *0, . 0F,52.( 23F*1 K)-J.1 1) 0*21 18, K)-J )K
M)*- +.1.D 0) M)*- +-2S,- 5(.00 L2(( 235(*+, . (23, (2O,U
7/24/2019 Cloudera Developer Exercise Instructions
56/75
7/24/2019 Cloudera Developer Exercise Instructions
57/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
57
Have hamlet@282
heaven hamlet@282
and hamlet@282
earth hamlet@282together hamlet@282
The Reducer
T)*- ],+*5,- 02JF(M .44-,4.1,0 18, S.(*,0 F-,0,31,+ 1) 21 K)- 18, 0.J, O,MD 231) )3,
S.(*,< b0, . 0,F.-.1)- (2O, hD/ ?,1L,,3 18, S.(*,0 (201,+,-520,< NK M)* 5)JF(,1,+ 18, ,>,-520, C23 18,writablesF-)P,51G 5)FM 18.1 5)+, 1) 18, 5*--,31 F-)P,51< 918,-L20, 5)FM 18,
5(.00 K-)J 18, writablessolutionF.5O.4, &)%/ I"$" M5&& B" >+"/ +>B+"V>"#* "3"$45+"+,
')302+,- 18, 7M:o= +.1.?.0, movielensD +,-2S,+ K-)J 18, 7)S2,=,30 F-)P,51
K-)J b32S,-021M )K 7233,0)1.< C:,, 3)1, .1 18, ,3+ )K 1820 ,>,-520, DESCRIBE movie;
. . .
mysql> SELECT * FROM movie LIMIT 5;
-, R)1, 18, 5)(*J3 3.J,0 K)- 18, 1.?(,U
pppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppp
7/24/2019 Cloudera Developer Exercise Instructions
61/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
61
K, ],S2,L 18, 01-*51*-, .3+ 5)31,310 )K 18, movierating1.?(,U
mysql> DESCRIBE movierating;
mysql> SELECT * FROM movierating LIMIT 5;
21 JM0Z(U
mysql> quit
Import with Sqoop
T)* 23S)O, :Z))F )3 18, 5)JJ.3+ (23, 1) F,-K)-J 0,S,-.( 5)JJ.3+0< X218 21 M)*
5.3 5)33,51 1) M)*- +.1.?.0, 0,-S,- 1) (201 18, +.1.?.0,0 C058,J.0G 1) L8258 M)*
8.S, .55,00D .3+ (201 18, 1.?(,0 .S.2(.?(, K)- ().+234< W)- +.1.?.0, .55,00D M)*
F-)S2+, . 5)33,51 01-234 1) 2+,312KM 18, 0,-S,-D .3+ I 2K -,Z*2-,+ I M)*- *0,-3.J, .3+
F.00L)-+,-520, .-, 18, movie.3+ movierating+.1. 2JF)-1,+
K-)J 7M:o= 231) B.+))F 23 18, [NJF)-1234 A.1. L218 :Z))F\ ,>,-520,,-520,< ],S2,L
18, +.1. M)* .(-,.+M ().+,+ 231) BAW: 23 18.1 ,>,-520,U
$ hadoop fs -cat movie/part-m-00000 | head
$ hadoop fs -cat movierating/part-m-00000 | head
Prepare The Data For Hive
W)- B2S, +.1. 0,10D M)* 5-,.1, #1D&)6D L8258 .11.58 K2,(+ 3.J,0 .3+ +.1. 1MF,0 1)
M)*- B.+))F +.1. K)- 0*?0,Z*,31 Z*,-2,0< T)* 5.3 5-,.1, )7#)*!1&1.?(,0 )3 18,
movie.3+ movierating+.1. 0,10D L218)*1 8.S234 1) J)S, 18, +.1. .1 .((,-520, ?M F,-K)-J234 18, K)(()L234 01,F0U
7/24/2019 Cloudera Developer Exercise Instructions
64/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
64
J, N3S)O, 18, B2S, 08,((U
$ hive
-, '-,.1, 18, movie1.?(,U
hive> CREATE EXTERNAL TABLE movie
(id INT, name STRING, year INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/user/training/movie';
K, '-,.1, 18, movierating1.?(,U
hive> CREATE EXTERNAL TABLE movierating
(userid INT, movieid INT, rating INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/user/training/movierating';
QUIT;
Practicing HiveQL
NK M)* .-, K.J2(2.- L218 :o=D J)01 )K L8.1 M)* .(-,.+M O3)L 20 .FF(25.?(M 1) B2S,o=,-520,D .3+ 0,, 2K M)* 5.3
0)(S, 18, F-)?(,J0 ?.0,+ )3 M)*- O3)L(,+4, )K :o= DESCRIBE movieratings;
B231U T)* 5.3 *0, 18, *F .3+ +)L3 .--)L O,M0 1) 0,, .3+ ,+21 M)*- 5)JJ.3+
8201)-M 23 18, 82S, 08,((D P*01 .0 M)* 5.3 23 18, =23*> 5)JJ.3+ 08,(( SELECT * FROM movie LIMIT 10;
7/24/2019 Cloudera Developer Exercise Instructions
66/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
66
.JF(,D 0,(,51 J)S2,0 -,(,.0,+ ?,K)-, $mU
hive> SELECT * FROM movieWHERE year < 1930;
H, @8, -,0*(10 235(*+, J)S2,0 L8)0, M,.- K2,(+ 20 #D J,.3234 18.1 18, M,.- 20
*3O3)L3 )- *3.S.2(.?(,< _>5(*+, 18)0, J)S2,0 K-)J 18, -,0*(10U
hive> SELECT * FROM movie WHERE year < 1930
AND year != 0;
Z, @8, -,0*(10 3)L 5)--,51(M 235(*+, J)S2,0 ?,K)-, $mD ?*1 18, (201 20 *3)-+,-,+ SELECT * FROM movie WHERE year < 1930
AND year != 0 ORDER BY name;
S, R)L (,1/0 J)S, )3 1) 18, J)S2,-.1234 1.?(,< =201 .(( 18, -.12340 ?M . F.-125*(.-
*0,-D , SELECT * FROM movieratingWHERE userid=149;
O, SELECT *08)L0 .(( 18, 5)(*J30D ?*1 .0 L,/S, .(-,.+M 0,(,51,+ ?M useridD
+20F(.M 18, )18,- 5)(*J30 ?*1 3)1 18.1 )3,U
hive> SELECT movieid,ratingFROM movierating WHERE
userid=149;
CP, b0, 18, Y9NR K*3512)3 1) +20F(.M +.1. K-)J ?)18 1.?(,0< W)- ,>.JF(,D 235(*+, 18,
3.J, )K 18, J)S2, CK-)J 18, J)S2, 1.?(,G 23 18, (201 )K . *0,-/0 -.12340U
hive> select movieid,rating,namefrom movierating join
movie on movierating.movieid=movie.idwhere userid=149;
7/24/2019 Cloudera Developer Exercise Instructions
67/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
67
CC, B)L 1)*48 . -.1,- 20 *0,- $%mq W23+ )*1 ?M 5.(5*(.1234 18, .S,-.4, -.1234 08,
4.S, 1) .(( J)S2,0 *0234 18, E6c K*3512)3U
hive> SELECTAVG(rating) FROM movierating WHERE
userid=149;
CJ, =201 ,.58 *0,- L8) -.1,+ J)S2,0D 18, 3*J?,- )K J)S2,0 18,M/S, -.1,+D .3+ 18,2-
.S,-.4, -.1234 SELECT userid, COUNT(userid),AVG(rating) FROM
movierating GROUP BY userid;
C-, @.O, 18.1 0.J, +.1.D .3+ 5)FM 21 231) . 3,L 1.?(, 5.((,+ userrating CREATE TABLE USERRATING (userid INT,
numratings INT, avgrating FLOAT);
hive> insert overwrite table userrating
SELECT userid,COUNT(userid),AVG(rating)
FROM movierating GROUP BY userid;
R)L 18.1 M)*/S, ,>F()-,+ B2S,o=D M)* 08)*(+ ?, .?(, 1) .30L,- 18, Z*,012)30 ?,()L Enter interactive commands here
Or you can execute text files containing Hive commands with:
$ hive -f file_to_execute
C, X8.1 20 18, )(+,01 O3)L3 J)S2, 23 18, +.1.?.0,q R)1, 18.1 J)S2,0 L218
*3O3)L3 M,.-0 8.S, . S.(*, )K # 23 18, yearK2,(+^ 18,0, +) 3)1 ?,()34 23 M)*-
.30L,- M5&& $># % ?%@="/>4" ^)B 5# /5XX"$"#* M%T+ *) +"" *I""XX"4*+ )X E%$5)>+ 4)G@)#"#*+ 5# % +"4)#/%$T +)$* @$)8$%G,
@8, F-)4-.J .55,F10 (23,0 23 18, K)-J
lastname firstname birthdate
@8, 4).( 20 1) 2+,312KM 18, M)*34,01 F,-0)3 L218 ,.58 (.01 3.J,< W)- ,>.JF(,D K)-
23F*1U
Murphy Joanne 1963-08-12
Murphy Douglas 1832-01-20
Murphy Alice 2004-06-02
X, L.31 1) L-21, )*1U
Murphy Alice 2004-06-02
E(( 18, 5)+, 20 F-)S2+,+ 1) +) 1820< W)(()L234 18, 01,F0 ?,()L M)* .-, 4)234 1)
F-)4-,002S,(M .++ ,.58 5)JF)3,31 1) 18, P)? 1) .55)JF(208 18, K23.( 4).(
7/24/2019 Cloudera Developer Exercise Instructions
73/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
73
Build the Program
$
7/24/2019 Cloudera Developer Exercise Instructions
74/75
Copyright 2010-2014 Cloudera, Inc. All rights reserved.
Not to be reproduced without prior written consent.
74
m
7/24/2019 Cloudera Developer Exercise Instructions
75/75
Run with the NameYearReducer
$%