!"#$%&'()*+,-./'*.0'12*)$%-./'34'5#267+-52'/"-028'
9:;2$#-<2&''
• !"#$%#&'(')#(*+'%$',-"'./0"1'2%&(/1'3"4.4'-,,567784/9,459:4"2:'*-"#"'8'&"(19'(1;'9"<:"1)"'%$'
)-(#(),"#9=4'>#(*+':9:(++;'9,(#,'*/,-'('3%#'&(1;='9""2'?@A'*-/)-'*/++'B"'$",)-"2'$/#9,'(12',-"'
%:,+/1C9'$#%&',-/9'5(."'*/++'B"'$",)-"2'1"D,4'
• E",':5'('*"B'9"#0"#'*/,-'9"#0+",')%1,(/1"#'3,%&)(,'/1',-/9')(9"='(12'5"#$%#&',-"'9"(#)-'
%5"#(,/%1':5%1',-"')#(*+"2'5(."94'
455"=6#->.5&'
• F'$(&/+/(#/,;'*/,-',-"'B(9/)'+/1:D')%&&(129'(12'('+/1:D',"D,'"2/,%#'3@"<:/#"2')%&&(129'(#"'
)%0"#"2'(9'*"'.%'(+%1.'*/,-',-/9'2%):&"1,G'-%*"0"#G',-"'&%#"';%:'C1%*',-"'B",,"#',-(,'*%:+2'
B"4='
• H(0(G'I:,)-G',%&)(,'/19,(++"2'%1',-"'9;9,"&4'3J"'-(0"'(++'/1'%:#')(9"4='
A",K9'L"./16'
77.92(9'(22"2'
M",'EEN')+/"1,'%#'5:,,;'$#%&'-,,56772%*1+%(294/,9459:4"2:'
E5")/$;'/19,(++(,/%1'$%+2"#'(9'2"9C,%5'/$';%:'2%1K,'-(0"',-"'#"<:/#"2'5"#&/99/%19'
77"12'.92(9'
1#26?&'@>/-.'*.0'A.>+-./'B>")'+%2)2*:>"#5'>.'#%2'5B5#2='
A%./1',%',-"'E;9,"&':9/1.'99-4'O1'*/12%*9G',-"'9-"++'*/12%*'9-%:+2'(55"(#'+/C"6'
'
'
'
>+/)C'%1'P<:/)C')%11"),QG',-"'$%++%*/1.'*/12%*'9-%:+2'(55"(#4'R;5"',-"'2",(/+9'%$';%:#'+%./1'/1',-"'
(55#%5#/(,"'B%D'3S'(&'(99:&/1.',"(&'1:&B"#'TU'-"#"4='
'
O1)"';%:'(#"'+%.."2'/1G'6+0'')%&&(12'39,(129'$%#'P5#"9"1,'*%#C/1.'2/#"),%#;Q='9-%:+2'9-%*';%:'*-"#"'
;%:'(#"4'V4.G':5%1'+%../1.'*/,-',"(&TU':9"#1(&"G'/,'+%%C9'+/C"',-/96'
'
'
'
'
'
P+9Q')%&&(12'9-%:+2'9-%*';%:'(++',-"'$/+"9'(12'2/#"),%#/"9'/1',-"'*%#C/1.'2/#"),%#;4'S1',-"',"(&TU'
2/#"),%#;'/,'+%%C9'+/C"6'
'
E%G'*"'-(0"'1:,)-'(12',%&)(,'/1',-"',"(&TUK9'-%&"'2/#"),%#;4'S1'%:#')(9"G'B%,-'%$',-"'1:,)-'(12'
,%&)(,'(#"'(+#"(2;'/19,(++"24'E%'*"'W:9,'1""2',%')%1$/.:#"',-"9"',*%',%%+9'(12'*"'(#"'#"(2;',%'.%4'
CC/50*5'*0020'
DE'."#$%'-5'.>#'*,)2*0B'*<*-,*:,2'-.'B>")'%>=2'E>,02)5F'$>6B'-#'E)>='C"5)C,>$*,'>.'-5#GG?'
'
CC2.0'/50*5'
'
1#26H&'(>.E-/")-./'."#$%'E>)'$)*+,''
X/#9,G'*"'1""2',%'#"9,#/),',-"'1:,)-',%%+',%')#(*+'%1+;'9%&"'5(#,/):+(#'2%&(/19',-(,'*"'2"9/#"',%')#(*+4'
X%#'"D(&5+"G'/$'*"'*(1,',%')#(*+',-"'/9,459:4"2:'2%&(/1'3%#'*"BY9/,"=G'*"'1""2',%')%1$/.:#"'471:,)-Y
U4Z7)%1$7)#(*+Y:#+$/+,"#4,D,4''R-"'$%++%*/1.'9)#""19-%,')(5,:#"9'-%*',%'())"99',-/9'5(#,/):+(#'$/+"6'''
'
N"#"G'$/#9,'S')-(1."2'&;'*%#C/1.'2/#"),%#;',%',-"'1:,)-'-%&"'2/#"),%#;'3*-"#"'1:,)-'/9'/19,(++"2='(12'
,-"1'S'())"99/1.',-"'$/+"':9/1.'0/',"D,'"2/,%#4'F1;'%,-"#'"2/,%#')(1'(+9%'B"':9"2'%#';%:')(1'2#(.'(12'2#%5'
,-"'$/+"',%';%:#'*/12%*9'&()-/1"'(12',-"1'"2/,4'R-/9')(1'B"'()-/"0"2'(9'9-%*1'B"+%*6'
'
PI"*'X/+"'R#(19$"#Q',(B'9-%:+2'%5"1'('1"*'*/12%*'*-"#"';%:')(1'1(0/.(,"',%',-"'2/#"),%#/"9'(12'2#(.'
(12'2#%5'#"<:/#"2'$/+"94'N%*"0"#G'S'(&'(99:&/1.',-"'9-"++'5#%&5,'$%#',-"'9:B9"<:"1,'2",(/+94'
'
'
'
'
'
'
'
'
'
'
'
[='()*+,7"),E-,#2)I#J#&'R-/9'/9'-%*',-"')#(*+Y:#+$/+,"#4,D,'$/+"'$%#',"(&TU'+%%C9'+/C"4'S$';%:'+%%C'(,',-"'
9")%12'+(9,'+/1"'/1',-"'9)#""19-%,G',-/9'/9'*-"#"',-"')%1$/.:#(,/%1'$%#',-"'2%&(/1'/9'9:55+/"24'N"#"'(#"'
9%&"'5(#,/):+(#96'
• S,'/9'('#".:+(#'"D5#"99/%1'*-/)-'/&5+/"9'$",)-/1.'(1;'5(."'*-%9"'B(9"':#+'-(9'P/9,459:4"2:Q4''
• 3\(Y]UYZ^8_4='/&5+/"9',-(,'(1;'9"<:"1)"'&(2"'%$'(+5-(B",'/9'(++%*"2'(12'/,'&:9,'"12'*/,-'P4Q'(12'
P8Q'&"(19',-(,'(1;'1:&B"#'%$'9:)-'9"<:"1)"9'(#"'(++%*"24''
• P`Q'&"(19'())"5,',-"':#+'&(2"'%:,'%$'#".:+(#'"D5#"99/%1',-(,'$%++%*9'P`Q'(12'PYP'/&5+/"9'1%,',%'
())"5,4'F+9%G'P4Q')(1'B"'#"5+()"2'*/,-'(1;'9"<:"1)"4'
'R-"#"$%#"G'/$'('5(."'-(9'/,9'B(9"':#+'(9'-,,5677Ta[92.924/9,459:4"2:'*/++'B"'())"5,"2'B:,'
-,,5677,*",b2$94/9,459:4"2:'*/++'1%,'B"'())"5,"24''R-"'9(&"')(1'B"'2%1"'$%#'(1;'$/+"'"D,"19/%1'(9'*"++4'
@"9,G',-"'$/+"'/9'5#",,;'9"+$Y"D5+(1(,%#;4'
'
T='."#$%702E*",#IJ=,&'R-/9'$/+"'/9'#"95%19/B+"'$%#'5#%0/2/1.';%:#')#(*+"#'('1(&"',-(,'*/++'B"'#"./9,"#"2'/1'
,-"'+%.9'%$',-"'9/,"',-(,'/9'B"/1.')#(*+"24'N"#"'/9'-%*';%:')(1'())"99',-"'$+"6'
'
'
O1)"';%:'%5"1',-"'$/+"G';%:'1""2',')-(1."',-"'1(&"'%$';%:#'-,,5'(."1,4''R-/9'/9'-%*',-"'-,,5'5#%5"#,/"9'
9"),/%1'%$',-/9'$/+"'$%#',-"',"(&TU'+%%C9'+/C"4''
cdYY'NRR!'5#%5"#,/"9'YYe'
c5#%5"#,;e'
''c1(&"e-,,54(."1,41(&"c71(&"e'
''c0(+:"e#2*=HKc70(+:"e'
''c2"9)#/5,/%1eNRR!'f?9"#YF."1,f'#"<:"9,'-"(2"#4'g?ER'IOR'B"'"&5,;'Y'
''5+"(9"'9",',-/9',%'('9/1.+"'*%#2':1/<:"+;'#"+(,"2',%';%:#'%#.(1/](,/%14'
'
''IORV6'h%:'9-%:+2'(+9%')-")C'%,-"#'#"+(,"2'5#%5"#,/"96'
'
''''''''-,,54#%B%,94(."1,9'
''''''''-,,54(."1,42"9)#/5,/%1'
''''''''-,,54(."1,4:#+'
''''''''-,,54(."1,4"&(/+'
''''''''-,,54(."1,40"#9/%1'
'
''(12'9",',-"/#'0(+:"9'(55#%5#/(,"+;4'
'
''c72"9)#/5,/%1e'
c75#%5"#,;e'
'
1#26L&'()*+,-./&'I%*G'*"'(#"'#"(2;',%')#(*+4'E,(;/1.'/1',-"'1:,)-'-%&"'2/#"),%#;G'$/#9,'*"'1""2',%'
)#"(,"'('$/+"',-(,')(##/"9',-"'9""2':#+'3,-"'5(."',-(,'*/++'B"'$",)-"2'0"#;'$/#9,=4'N"#"'/9'-%*'/,')(1'B"'2%1"6'
'
R-/9')%&&(12'*/++')#"(,"'('$/+"'1(&"2':#+'/1',-"'5#"9"1,'*%#C/1.'2/#"),%#;'(12'P")-%Q'*/++'*#/,"'
-,,5677/9,459:4"2:'/1',-"'$/+"4'3I%,"6',-"'9""2':#+')(1'B"'9:55+/"2'(,',-"')%&&(12'+/1"'*-/+"')#(*+/1.G'B:,'
/,'/9'('.%%2'5#(),/)"',%'9:55+;',-"'9""2':#+7:#+9'/1'('$/+"='
I"D,G'*"'5"#$%#&',-"')#(*+'B;'/99:/1.',-"')#(*+')%&&(124'R-"'$%++%*/1.'9)#""19-%,'9-%*9'-%*',%'2%'
,-/96'
'
N"#"G''
• PB/171:,)-Q'/9',-"'"D"):,(B+"'(12',-"'P)#(*+Q'/9',-"'W(0(')+(99',-(,'/&5+"&"1,9',-"')#(*+/1.'
$:1),/%1(+/,;4''
• P:#+Q'/9',-"'$/+"',-(,')%1,(/19',-"'9""2':#+94'
• 'PY2/#',/1;)#(*+Q'/9',-"'(#.:&"1,9',-(,'9:55+/"9',-(,'1(&"'%$',-"'2/#"),%#;'*-"#"'(++',-"'
9".&"1,9'*/++'B"'9,%#"24''g(C"'9:#"',-(,'P,/1;)#(*+Q'2%"9'1%,'"D/9,'/1',-"'*%#C/1.'2/#"),%#;4'
• PY2"5,-'TQ'95")/$/"9',-"'2"5,-G'/4"4',-"'5(."9',-(,'(#"'(,'('2/9,(1)"':5,%'['*/++'B"'$",)-"24''
R-/9'*/++')%&5+","',-"')#(*+'%5"#(,/%14'J-"1',-"')#(*+/1.'%5"#(,/%1'/9'$/1/9-"2G';%:'*/++'9""'('2/#"),%#;'
1(&"2',/1;)#(*+'/1';%:#'1:,)-'2/#"),%#;4'
S$';%:'(#"'/1,"#"9,"2'/1'(1(+;]/1.'*-(,'-(9'B""1')#(*+"2'B"$%#"';%:'9,(#,'%5"#(,/%19'$%#'9"(#)-/1.G';%:'
9-%:+2'#"(2'-,,5677*/C/4(5()-"4%#.71:,)-7Ui>%&&(12A/1"O5,/%194'R-/9'5(."')%1,(/19',-"')%&&(12'
+/1"'%5,/%19'(0(/+(B+"'$%#'1:,)-'(12'#"(22B'(12'#"(2+/1C2B'(#"',-"')+(99"9',-(,')(1'B"':9"2'$%#'(1(+;9/9'
5:#5%9"94'
'
'
'
'
'
'
'
'
'
'
'
'
'
'
'
'
'
'
1#26G&'12*)$%-./'
X%#',-"'9"(#)-'#"<:"9,9'3/4"4'<:"#/"9=',%'B"'9"#0"2G'*"'1""2'('9"#0+",')%1,(/1"#',-(,')(1'#")"/0"',-"'
#"<:"9,G'#"(2',-"'/12"D'(12'9"12'B()C',-"'#"9:+,94'J"'*/++'B"':9/1.',%&)(,'$%#',-/9'5:#5%9"4'X/#9,G'+",':9'
9""'*-"#"',-"',%&)(,'/9'/19,(++"24'
'
'
E)#""19-%,'(B%0"'9-%*9'*-"#"',-"',%&)(,'/9'/19,(++"24'N"#"G'$/#9,'S'1(0/.(,"2',%',-"'5(#"1,'2/#"),%#;'3)2'
44=''(12'/99:"2',-"'+9')%&&(12',%'9""',-"')%1,"1,'%$',-"',"(&9TUK9'-%&"'2/#"),%#;4''P(5()-"Y,%&)(,Y
j4U4[aQ'/9',-"'2/#"),%#;'*-"#"',%&)(,'/9'/19,(++"24'R-"'2/#"),%#;'+/9,/1.'3+9='%$',-/9'2/#"),%#;'9-%*9',-"'
)%1,"1,'%$',-/9'2/#"),%#;4''PB/1Q'/9',-"'2/#"),%#;'*-"#"'(++',-"'"D"):,(B+"'(#"'+%)(,"2'(12'P*"B(55Q'/9'
,-"'2/#"),%#;'*-"#"'(++',-"'*"B'(55+/)(,/%1',-(,'#:1'/19/2"',%&)(,'(#"'2"5+%;"24''
I"D,G'*"'1""2',%'2"5+%;'%:#'1:,)-'(55+/)(,/%1',%',-/9',%&)(,'9"#0"#4'N"#"'/9'-%*',-/9'/9'2%1"6'
[= X/#9,G'*"'1""2',%'#"&%0"',-"'#%%,'(55+/)(,/%1',-(,'/9'#:11/1.'/1',-"',%&)(,'(12',-"1')%5;',-"'
1:,)-'*"B'(55+/)(,/%1'$/+"'31:,)-4*(#='/1,%',-/9'2/#"),%#;4'X%++%*/1.',*%')%&&(129',(C"')(#"'%$'
,-/9'%5"#(,/%14'''
• #&'Y#$'*"B(5597@OOR8'
• )5'4471:,)-YU4Z71:,)-YU4Z4*(#''*"B(5597@OOR4*(#'
g(C"'9:#"';%:'(#"'5#"9"1,'/1',-"'(55#%5#/(,"'2/#"),%#;'B"$%#"'#:11/1.',-"9"')%&&(129'(12',-"'
2/#"),%#;'/9',-"'-%&"'2/#"),%#;'%$',%&)(,4'X%++%*/1.'9)#""19-%,'9-%:+2')+"(#'(1;'2%:B,6'
'
I"D,G'*"'1""2',%'9,(#,',-"',%&)(,'9"#0"#'*-/)-'/9'2%1"'B;'$%++%*/1.')%&&(126'
• B/17)(,(+/1(49-'9,(#,'
S,'9-%:+2'2/95+(;'/1$%#&(,/%1'(9'9-%*1'/1',-"'9)#""19-%,6'
'
I%*G',-"',%&)(,'9"#0"#'/9'#:11/1.'B:,'/,'/9'1%,')%1$/.:#"2';",'(12',-"#"$%#"G'/,')(11%,'$/12',-"'
/12"D',-(,'1:,)-')#"(,"2'/1',-"'+(9,'9,"54'E%G'*"'*/++')%1$/.:#"',-"',%&)(,'9"#0"#'B"$%#"'9-%*/1.'
%$$'*/,-',-"'9"(#)-/1.4''
R-"#"'(#"',*%'$/+"9',-(,'1""2',%'B"')%1$/.:#"2'*-/)-'(#"''
• *"B(5597@OOR7JVLYSIX7)+(99"971:,)-Y9/,"4D&+'
• )%1$79"#0"#4D&+'
?8'."#$%75-#2IJ=,''
'
R-"')%1,"1,'%$',-"'$/#9,'$/+"'/9'9-%*1'B"+%*4''
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
</configuration>
'
F12G'/,'9-%:+2'+%%C'+/C"'(9'9-%*1'B"+%*6'
'
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<nutch-conf>
<property>
<name>searcher.dir</name>
<value>'/home/team20/nutch-0.9/tinycrawl</value> </property>
</nutch-conf>
'
'
I%,"',-"',*%'&(/1')-(1."9G''
• from href="configuration.xsl" to href="nutch-conf.xsl”'
• conten inside <nutch-conf>..</nutch-conf>'
R-"'$/#9,')-(1."'/9'/12/)(,/1.',-"'1"*')%1$/.:#(,/%1'$/+"'*-"#"',-"'2/95+(;'%#/"1,"2'/1$%#&(,/%1'
$%#';%:'1:,)-'*/++'B"'5:,4'F12',-"'9")%12')-(1."'/9',-"'5(,-'%$',-"'2/#"),%#;'*-"#"';%:#')#(*+'
/94'3N"#"',"(&TU'/9'(99:&"24'h%:'9-%:+2'5:,'())%#2/1.',%';%:#',"(&'1:&B"#4='
'
H8 52)<2)IJ=,&'
E/1)"G'"0"#;',"(&'*/++'B"'*%#C/1.'*/,-',-"'9(&"',%&)(,'9"#0"#G'*"'*/++'1""2',%'#:1'2/$$"#"1,'
/19,(1)"9'%$',-"',%&)(,'9"#0"#4'R%'/2"1,/$;'*-/)-'/12"D',%'*%#C'*/,-'3/4"4'*-/)-'/19,(1)"'%$'
,%&)(,',%'*%#C'*/,-=G',%&)(,'9"#0"#':9"9',-"')%&B/1(,/%1'%$'S!'(22#"99'%$',-"'9"#0"#'&()-/1"'
(12',-"'5%#,'%1'*-/)-',-"',%&)(,'/9'+/9,"1/1.4'E/1)"';%:')(11%,')-(1."',-"'S!'(22#"99'%$',-"'
&()-/1"G';%:'*/++')-(1."',-"'5%#,4''
'
R-"'9"#0"#4D&+'$/+"')%1,(/19',-"'(B%0"'&"1,/%1"2')%1$/.:#(,/%1'2",(/+'(12',-"'#"+"0(1,'D&+',(.K9'
)%1,"1,'%$'('$#"9-+;')#"(,"2'/19,(1)"'/9'9-%*1'B"+%*4'
<!-- A "Connector" represents an endpoint by which requests are
received
and responses are returned. Documentation at :
Java HTTP Connector: /docs/config/http.html (blocking & non-
blocking)
Java AJP Connector: /docs/config/ajp.html
APR (HTTP/AJP) Connector: /docs/apr.html
Define a non-SSL HTTP/1.1 Connector on port 8080
-->
<Connector port="8080" protocol="HTTP/1.1"
connectionTimeout="20000"
redirectPort="8443" />
R-"'+(9,',-#""'+/1"9')%1,(/19',-"')%1$/.:#(,/%14'N"#"',-"'5%#,'1:&B"#'/9'iUiU4'E/1)"'*"'*/++':9"'
2/$$"#"1,'5%#,'$%#'2/$$"#"1,',"(&9G'/,'/9'#")%&&"12"2',-(,';%:'&%2/$;',-"'5%#,'1:&B"#',%'
iUiU`;%:#',"(&'1:&B"#4'/4"4'/1',-/9')(9"G'iUiU`TUkZUUU4'39/1)"'S'(&'*%#C/1.'*/,-',"(&'TU='
'
I%*'*"'1""2',%'#"9,(#,',-"',%&)(,'9"#0"#4'E%G'9,%5'/,'$/#9,'(12',-"1'9,(#,4'E%G',-"9"',*%'
)%&&(129'9-%:+2'2%',-"'*%#C6'
• B/17)(,(+/1(49-'9,%5'
• B/17)(,(+/1(49-'9,(#,'
3N"#"'S'-(0"'(99:&"2',-(,';%:#'P5*2Q'/9'P7-%&"7,"(&l7(5()-"Y,%&)(,Yj4U4[aQ4',"(&l'&"(19'
,"(&'1:&B"#4='
'
F99:&/1.',-(,'&;',"(&'1:&B"#'/9'TU'(12'-"1)"',-"'5%#,'1:&B"#'/9'iUiU`TUki[UUG'$%#'
())"99/1.',-"'1:,)-'9"(#)-'/1,"#$()"G'S'*/++',;5"''-,,5677/9,aa[4/9,459:4"2:6i[UU7"17'/1',-"'
B#%*9"#4'
N"#"'/9',-"'9)#""19-%,'$%#',-"'/1,"#$()"6'
'
F12'/$'S'9"(#)-'$%#'P/1$%#&(,/%1QG'-"#"'/9'*-(,'S'.",6'
!+"(9"'9,%5';%:#',%&)(,'/19,(1)"'%1)"';%:'(#"'2%1"'$%#'5#%5"#'(0(/+(B/+/,;'%$',-"'9;9,"&'
'
'
'
'
'
'
'
'
'
'
'
'
4.*,BM-./'#%2'4.*,BM2)5&'
'
R-"')%2"'B"+%*':9"9',-"'(1(+;]"#9'95")/$/"2'(12'/&5+"&"1,"2'/1'A:)"1"',%'(1(+;]"'(12'
,%C"1/]"',-"'9,#/1.'5#%0/2"2'B;',-"':9"#4''E/1)"'A:)"1"'*/++'B"'/12"D/1.',-"',%C"19'."1"#(,"2'B;'
,-"'(1(+;]"#9G',-"#"$%#"'/,'/9'/19,#:),/0"',%'+%%C'(,',-"',%C"1/](,/%1'2%1"'B;',-"'2/$$"#"1,'
(1(+;]"#94''
//AnalysisDemo.java
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import java.io.*;
import java.io.IOException;
public class AnalysisDemo {
private static String string;
private static final Analyzer[] analyzers = new Analyzer[]{
new WhitespaceAnalyzer(),
new SimpleAnalyzer(),
new StopAnalyzer(),
new StandardAnalyzer(),
};
public static void main(String[] args) throws IOException {
BufferedReader br = new BufferedReader(new
InputStreamReader(System.in));
System.out.println("\nEnter the string that you want to analyze:");
string = br.readLine();
analyze(string);
}
private static void analyze(String text) throws IOException {
System.out.println("Analzying \"" + text + "\"");
for (int i = 0; i < analyzers.length; i++) {
Analyzer analyzer = analyzers[i];
System.out.println("\t" + analyzer.getClass().getName() + ":");
System.out.print("\t\t");
TokenStream stream = analyzer.tokenStream("contents", new
StringReader(text));
while (true) {
Token token = stream.next();
if (token == null) break;
System.out.print("[" + token.termText() + "] ");
}
System.out.println("\n");
}
}
}
()2*#-./'*'$"5#>='-.7=2=>)B'D.02J'*.0'52*)$%-./'>.'-#&'
R-"')%2"'B"+%*',(C"9'95")/$/"2'2%):&"1,9'(12')#"(,"9'('A:)"1"'/12"D'*-/)-'/9'C"5,'/1'
,-"'&"&%#;4'A:)"1"'5#%0/2"9')+(99"9'$%#'"()-'%$',-"'$%++%*/1.'%5"#(,/%19',-(,'(#"'
1")"99(#;'$%#'9"(#)-'"1./1"'%5"#(,/%16'
• /12"D')#"(,/%1'
• /12"D'9"(#)-/1.'
• <:"#;'5(#9/1.'
• )%1,"1,'(1(+;9/9'
R-"')%2"'B"+%*'9-%*9'(++',-"'%5"#(,/%19'/1'(),/%14'S,'/9'(20/9"2',%'5+(;'*/,-',-/9')%2"'
(12'9""'/,'/1'(),/%1'$%#'+"(#1/1.'5:#5%9"94'X%#'%,-"#')+(99"9'(12'$:1),/%1(+/,/"9G'.%',%',-"'
+:)"1"'*"B9/,"'(12'B#%*9"',-"')+(99'-/"#(#)-;4'
'
'
//LuceneExample.java
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
public class LuceneExample
{
public static void main(String[] args)
{
// Construct a RAMDirectory to hold the in-memory representation
// of the index.
RAMDirectory idx = new RAMDirectory();
try
{
// Make an writer to create the index
IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(),
true);
// Add some Document objects containing quotes
writer.addDocument(createDocument("Theodore Roosevelt",
"It behooves every man to remember that the work of the "
+ "critic, is of altogether secondary importance, and that, "
+ "in the end, progress is accomplished by the man who does "
+ "things."));
writer.addDocument(createDocument("Friedrich Hayek",
"The case for individual freedom rests largely on the "
+ "recognition of the inevitable and universal ignorance "
+ "of all of us concerning a great many of the factors on "
+ "which the achievements of our ends and welfare depend."));
writer.addDocument(createDocument("Ayn Rand",
"There is nothing to take a manâ!™s freedom away from "
+ "him, save other men. To be free, a man must be free "
+ "of his brothers."));
writer.addDocument(createDocument("Mohandas Gandhi",
"Freedom is not worth having if it does not connote "
+ "freedom to err."));
// Optimize and close the writer to finish building the index
writer.optimize();
writer.close();
// Build an IndexSearcher using the in-memory index
Searcher searcher = new IndexSearcher(idx);
// Run some queries
search(searcher, "freedom");
search(searcher, "free");
search(searcher, "progress or achievements");
searcher.close();
}
catch (IOException ioe)
{
// In this example we arenâ!™t really doing an I/O, so this
// exception should never actually be thrown.
ioe.printStackTrace();
}
catch (ParseException pe)
{
pe.printStackTrace();
}
}
/**
* Make a Document object with an un-indexed title field and an indexed
* content field.
*/
private static Document createDocument(String title, String content)
{
Document doc = new Document();
// Add the title as an unindexed fieldâ!"
doc.add(new Field("title", title, Field.Store.YES, Field.Index.NO));
// and the content as an indexed field. Note that indexed
// Text fields are constructed using a Reader. Lucene can read
// and index very large chunks of text, without storing the
// entire content verbatim in the index. In this example we
// can just wrap the content string in a StringReader.
doc.add(new Field("content", new StringReader(content)));
return doc;
}
/**
* Searches for the given string in the "content" field
*/
private static void search(Searcher searcher, String queryString)
throws ParseException, IOException
{
// Build a Query object
QueryParser parser = new QueryParser("content", new StandardAnalyzer());
Query query = parser.parse(queryString);
// Search for the query
Hits hits = searcher.search(query);
// Examine the Hits object to see if there were any matches
int hitCount = hits.length();
if (hitCount == 0)
{
System.out.println("No matches were found for \"" + queryString +
"\"");
}
else
{
System.out.println("Hits for \"" + queryString
+ "\" were found in quotes by:");
// Iterate over the Documents in the Hits object
for (int i = 0; i < hitCount; i++)
{
Document doc = hits.doc(i);
// Print the value that we stored in the "title" field. Note
// that this Field was not indexed, but (unlike the
// "contents" field) was stored verbatim and can be
// retrieved.
System.out.println(" " + (i + 1) + ". " + doc.get("title"));
}
}
System.out.println();
}
}
1#>)-./'*.'D.02J'>.'#%2'=2=>)B&'
R-"')%2"'B"+%*'9-%*9'-%*',%'9,%#"'(1'/12"D'%1',-"'-(#2Y2#/0"4'S,',(C"9',-"'/15:,'(9',-"'
2/#"),%#;',-(,'1""29',%'B"'/12"D"2'(12',-"'2/#"),%#;'*-"#"';%:'*(1,',%'*#/,"',-"'/12"D4'
//Indexer.java
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.io.File;
import java.io.IOException;
import java.io.FileReader;
public class Indexer {
public static void index(File indexDir, File dataDir) throws IOException
{
if (!dataDir.exists() || !dataDir.isDirectory()) {
throw new IOException(dataDir + " does not exist or is not a
directory");
}
IndexWriter writer = new IndexWriter(indexDir, new
StandardAnalyzer(), true);
indexDirectory(writer, dataDir);
writer.close();
}
private static void indexDirectory(IndexWriter writer, File dir) throws
IOException {
File[] files = dir.listFiles();
for (int i=0; i < files.length; i++) {
File f = files[i];
if (f.isDirectory()) {
indexDirectory(writer, f); // recurse
} else if (f.getName().endsWith(".txt")) {
indexFile(writer, f);
}
}
}
private static void indexFile(IndexWriter writer, File f) throws
IOException {
System.out.println("Indexing " + f.getName());
Document doc = new Document();
doc.add(Field.Text("contents", new FileReader(f)));
doc.add(Field.Keyword("filename", f.getCanonicalPath()));
writer.addDocument(doc);
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new Exception("Usage: " + Indexer.class.getName() + "
<index dir> <data dir>");
}
File indexDir = new File(args[0]);
File dataDir = new File(args[1]);
index(indexDir, dataDir);
}
}
'
'