View
223
Download
0
Category
Preview:
Citation preview
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
1/19
( h t t p s : / / w w w
. q u o l e
. c o m / )
P R O D U C T ( / F A T U R / ) C O M M U N I T Y ( / P A R T N R
/ )
R O U R C (
H T T P : / / W W W .
Q U O L .
C O M /
R O U R C / )
L O G (
H T T P : / / W W W .
Q U O L .
C O M /
L O G / )
C O M P A N Y ( / A O U T -
U )
H L P ( / U P P O R T )
L O G I N (
H T T P : / / A P I .
Q U O L .
C O M / U R /
I G N _
I N )
I G N U P F O R F R
( H T T P : / / W W W
. Q U O L
. C O M
/ T R I A L
-
P A G )
H o m e (
h t t p s : / / w w w .
q u o l e .
c o m / ) > H i v e F u n c t i o n C h e a t h e e t
H i v e F u n c t i o n C h e a t h e e t
D a t e F u n c t i o n s
M a t h e m a t i c a l F u n c t i o n s
t r i n g F u n c t i o n s
C o l l e c t i o n F u n c t i o n s
H i v e F u n c t i o n C h e a t h e e t
( h t t p : / / q u o l e 2
. w p e n g i n e
. c o m
/ w p
- c o n t e n t
/ u p l o a d s
/ 2 0 1 4
/ 0 1
/ h i v e
- f u n c t i o n
- c h e a t -
s h e e t . p d f
)
H i v e F u n c t i o n M e t a c o m m a n d s
H O W F U N C T I O N
l i s t s H i v e f u n c t i o n s a n d o p e r a t o r s
D C R I F U N C T I O N [
f u n c t i o n n a m e ] d i s p l a s s h o r t d e s c r i p t i o n
o f t h e f u n c t i o n
D C R I F U N C T I O N X T N D D [
f u n c t i o n n a m e ] a c c e s s
e x t e n d e d d e s c r i p t i o n o f t h e f u n c t i o n
T p e s o f H i v e F u n c t i o n s
http://qubole2.wpengine.com/wp-content/uploads/2014/01/hive-function-cheat-sheet.pdfhttps://www.qubole.com/https://www.qubole.com/trial-pagehttps://api.qubole.com/users/sign_inhttps://www.qubole.com/supporthttps://www.qubole.com/about-ushttps://www.qubole.com/blog/https://www.qubole.com/resources/https://www.qubole.com/partners/https://www.qubole.com/features/https://www.qubole.com/
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
2/19
U D A F
U D T F
C o n d i t i o n a l F u n c t i o n s
F u n c t i o n s f o r T e x t A n a l t i c s
G o t o
P i g F u n c t i o n C h e a t h e e t
( h t t p : / / w w w
. q u o l e
. c o m
/ r e s o u r c e s
/ c h e a t s h e e t
/ p i g
-
f u n c t i o n - c h e a t - s h e e t
/ )
U D F
i s a f u n c t i o n t h a t t a k e s o n e o r m o r e c o l u m n s f r o m a r o w a s
a r g u m e n t a n d r e t u r n s a s i n g l e v a l u e o r o j e c t .
g :
c o n c a t (
c o l 1 ,
c o l 2 )
U D T F t a k e s z e r o o r m o r e i n p u t s a n d a n d p r o d u c e s m u l t i p l e
c o l u m n s o r r o w s o f o u t p u t .
g :
e x p l o d e ( )
M a c r o s
a f u n c t i o n t h a t u s e s o t h e r H i v e f u n c t i o n s .
H o w T o D e v e l o p U D F s
p a c k a g e o r g . a p a c h e . h a d o o p .
h i v e .
c o n t r i .
u d f .
e x a m p l e ;
import java.util.Date;
import java.text.SimpleDateFormat;
import org.apache.hadoop.hive.ql.exec.UDF;
@Description(name = "YourUDFName",
value = "_FUNC_(InputDataType) - using the input datat
ype X argument, "+
"returns YYY.",
extended = "Example:\n"
+ " > SELECT _FUNC_(InputDataType) FROM tabl
ename;")
public class YourUDFName extends UDF{
..
public YourUDFName( InputDataType InputValue ){
..;
}
public String evaluate( InputDataType InputValue ){
..;
}
}
H o w T o D e v e l o p U D F s ,
G e n e r i c U D F s ,
U D A F s ,
a n d U D T F s
p u l i c c l a s s Y o u r U D F N a m e e x t e n d s U D F {
http://www.qubole.com/resources/cheatsheet/pig-function-cheat-sheet/
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
3/19
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
4/19
i g i n t u n i x _
t i m e s t a m p (
s t r i n g d a t e ,
s t r i n g p a t t e r n )
C o n v e r t t i m e s t r i n g w i t h g i v e n p a t t e r n t o U n i x t i m e
s t a m p ,
r e t u r n 0 i f f a i l :
u n i x _
t i m e s t a m p ( ‘ 2 0 0 9 -
0 3 -
2 0 ’ ,
‘ - M M
- d d ’ )
= 1 2 3 7 5 3 2 4 0 0
s t r i n g t o _
d a t e (
s t r i n g t i m e s t a m p )
R e t u r n s t h e d a t e p a r t o f a t i m e s t a m p s t r i n g :
t o _
d a t e ( 1 9 7 0 -
0 1 -
0 1 0 0 :
0 0 :
0 0 ) =
1 9 7 0 -
0 1 -
0 1
i n t e a r (
s t r i n g d a t e )
R e t u r n s t h e e a r p a r t o f a d a t e o r a t i m e s t a m p s t r i n g :
e a r ( 1 9 7 0
- 0 1
- 0 1 0 0
: 0 0
: 0 0 )
= 1 9 7 0
, e a r ( 1 9 7 0
- 0 1
-
0 1 ) =
1 9 7 0
i n t m o n t h (
s t r i n g d a t e )
R e t u r n s t h e m o n t h p a r t o f a d a t e o r a t i m e s t a m p
s t r i n g :
m o n t h ( 1 9 7 0 -
1 1 -
0 1 0 0 :
0 0 :
0 0 ) =
1 1 ,
m o n t h ( 1 9 7 0 -
1 1 -
0 1 ) =
1 1
i n t d a (
s t r i n g d a t e )
d a o f m o n t h (
d a t e )
R e t u r n t h e d a p a r t o f a d a t e o r a t i m e s t a m p s t r i n g :
d a ( 1 9 7 0 -
1 1 -
0 1 0 0 :
0 0 :
0 0 ) =
1 ,
d a ( 1 9 7 0 -
1 1 -
0 1 ) =
1
i n t h o u r (
s t r i n g d a t e )
R e t u r n s t h e h o u r o f t h e t i m e s t a m p :
h o u r ( ‘ 2 0 0 9 -
0 7 -
3 0 1 2 :
5 8 :
5 9 ′) = 1 2 , h o u r ( ’ 1 2 : 5 8 : 5 9 ’ ) = 1 2
i n t m i n u t e (
s t r i n g d a t e )
R e t u r n s t h e m i n u t e o f t h e t i m e s t a m p
i n t s e c o n d (
s t r i n g d a t e )
R e t u r n s t h e s e c o n d o f t h e t i m e s t a m p
i n t w e e k o f e a r (
s t r i n g d a t e )
R e t u r n t h e w e e k n u m e r o f a t i m e s t a m p s t r i n g :
w e e k o f e a r ( 1 9 7 0 -
1 1 -
0 1 0 0 :
0 0 :
0 0 ) =
4 4 ,
w e e k o f e a r ( 1 9 7 0 -
1 1 -
0 1 ) =
4 4
i n t d a t e d i f f (
s t r i n g e n d d a t e ,
s t r i n g
s t a r t d a t e )
R e t u r n t h e n u m e r o f d a s f r o m s t a r t d a t e t o
e n d d a t e :
d a t e d i f f ( ‘ 2 0 0 9 -
0 3 -
0 1 ’ , ‘ 2 0 0 9 -
0 2 -
2 7 ’ ) =
2
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
5/19
s t r i n g d a t e _
a d d (
s t r i n g s t a r t d a t e ,
i n t
d a s )
A d d a n u m e r o f d a s t o s t a r t d a t e :
d a t e _
a d d ( ‘ 2 0 0 8 -
1 2 -
3 1 ’ , 1 ) = ‘ 2 0 0 9 -
0 1 -
0 1 ’
s t r i n g d a t e _
s u (
s t r i n g s t a r t d a t e ,
i n t
d a s )
u t r a c t a n u m e r o f d a s t o s t a r t d a t e :
d a t e _
s u ( ‘ 2 0 0 8 -
1 2 -
3 1 ’ , 1 ) = ‘ 2 0 0 8 -
1 2 -
3 0 ’
t i m e s t a m p f r o m _
u t c _
t i m e s t a m p (
t i m e s t a m p ,
s t r i n g t i m e z o n e )
A s s u m e s g i v e n t i m e s t a m p i s t U T C a n d c o n v e r t s t o
g i v e n t i m e z o n e (
a s o f H i v e 0 . 8 . 0 )
t i m e s t a m p t o _
u t c _
t i m e s t a m p (
t i m e s t a m p ,
s t r i n g t i m e z o n e )
A s s u m e s g i v e n t i m e s t a m p i s i n g i v e n t i m e z o n e a n d
c o n v e r t s t o U T C (
a s o f H i v e 0 . 8 . 0 )
M a t h e m a t i c a l F u n c t i o n s
T h e f o l l o w i n g u i l t -
i n m a t h e m a t i c a l f u n c t i o n s a r e s u p p o r t e d i n h i v e ;
m o s t r e t u r n N U L L w h e n t h e a r g u m e n t ( s )
a r e N U L L :
R e t u r n
T p e N a m e (
i g n a t u r e )
x a m p l e
I G I N T r o u n d (
d o u l e a )
R e t u r n s t h e r o u n d e d I G I N T v a l u e o f t h e d o u l e
D O U L r o u n d (
d o u l e a ,
i n t d )
R e t u r n s t h e d o u l e r o u n d e d t o d d e c i m a l p l a c e s
I G I N T f l o o r (
d o u l e a )
R e t u r n s t h e m a x i m u m I G I N T v a l u e t h a t i s e q u a l o r l e s s
t h a n t h e d o u l e
I G I N T c e i l (
d o u l e a ) ,
c e i l i n g (
d o u l e a )
R e t u r n s t h e m i n i m u m I G I N T v a l u e t h a t i s e q u a l o r
g r e a t e r t h a n t h e d o u l e
d o u l e r a n d ( ) , r a n d (
i n t s e e d )
R e t u r n s a r a n d o m n u m e r (
t h a t c h a n g e s f r o m r o w t o r o w )
t h a t i s d i s t r i u t e d u n i f o r m l f r o m 0 t o 1 .
p e c i f i i n g t h e
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
6/19
s e e d w i l l m a k e s u r e t h e g e n e r a t e d r a n d o m n u m e r
s e q u e n c e i s d e t e r m i n i s t i c .
d o u l e e x p (
d o u l e a )
R e t u r n s e w h e r e e i s t h e a s e o f t h e n a t u r a l l o g a r i t h m
d o u l e l n (
d o u l e a )
R e t u r n s t h e n a t u r a l l o g a r i t h m o f t h e a r g u m e n t
d o u l e l o g 1 0 (
d o u l e a )
R e t u r n s t h e a s e -
1 0 l o g a r i t h m o f t h e a r g u m e n t
d o u l e l o g 2 (
d o u l e a )
R e t u r n s t h e a s e -
2 l o g a r i t h m o f t h e a r g u m e n t
d o u l e l o g (
d o u l e a s e ,
d o u l e a )
R e t u r n t h e a s e a s e
l o g a r i t h m o f t h e a r g u m e n t
d o u l e p o w (
d o u l e a ,
d o u l e p ) ,
p o w e r (
d o u l e a ,
d o u l e p )
R e t u r n a
d o u l e s q r t (
d o u l e a )
R e t u r n s t h e s q u a r e r o o t o f a
s t r i n g i n (
I G I N T a )
R e t u r n s t h e n u m e r i n i n a r f o r m a t
s t r i n g h e x (
I G I N T a )
h e x (
s t r i n g a )
I f t h e a r g u m e n t i s a n i n t ,
h e x r e t u r n s t h e n u m e r a s a
s t r i n g i n h e x f o r m a t .
O t h e r w i s e i f t h e n u m e r i s a s t r i n g ,
i t
c o n v e r t s e a c h c h a r a c t e r i n t o i t s h e x r e p r e s e n t a t i o n a n d
r e t u r n s t h e r e s u l t i n g s t r i n g .
s t r i n g u n h e x (
s t r i n g a )
I n v e r s e o f h e x .
I n t e r p r e t s e a c h p a i r o f c h a r a c t e r s a s a
h e x i d e c i m a l n u m e r a n d c o n v e r t s t o t h e c h a r a c t e r
r e p r e s e n t e d t h e n u m e r .
s t r i n g c o n v (
I G I N T n u m ,
i n t
f r o m _ a s e ,
i n t t o _ a s e
) ,
c o n v ( T R I N G n u m
, i n t
f r o m _ a s e ,
i n t t o _ a s e )
C o n v e r t s a n u m e r f r o m a g i v e n a s e t o a n o t h e r
a
p
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
7/19
d o u l e a s (
d o u l e a )
R e t u r n s t h e a s o l u t e v a l u e
i n t d o u l e p m o d (
i n t a ,
i n t )
p m o d (
d o u l e a ,
d o u l e )
R e t u r n s t h e p o s i t i v e v a l u e o f a m o d
d o u l e s i n (
d o u l e a )
R e t u r n s t h e s i n e o f a (
a i s i n r a d i a n s )
d o u l e a s i n (
d o u l e a )
R e t u r n s t h e a r c s i n o f x i f - 1
< = a
< = 1 o r n u l l o t h e r w i s e
d o u l e c o s (
d o u l e a )
R e t u r n s t h e c o s i n e o f a (
a i s i n r a d i a n s )
d o u l e a c o s (
d o u l e a )
R e t u r n s t h e a r c c o s i n e o f x i f - 1
< = a
< = 1 o r n u l l o t h e r w i s e
t a n (
d o u l e
a )
t a n (
d o u l e a )
R e t u r n s t h e t a n g e n t o f a (
a i s i n r a d i a n s )
d o u l e a t a n (
d o u l e a )
R e t u r n s t h e a r c t a n g e n t o f a
d o u l e d e g r e e s (
d o u l e a )
C o n v e r t s v a l u e o f a f r o m r a d i a n s t o d e g r e e s
d o u l e r a d i a n s (
d o u l e a )
C o n v e r t s v a l u e o f a f r o m d e g r e e s t o r a d i a n s
i n t d o u l e p o s i t i v e (
i n t a ) ,
p o s i t i v e (
d o u l e a )
R e t u r n s a
i n t d o u l e n e g a t i v e (
i n t a ) ,
n e g a t i v e (
d o u l e a )
R e t u r n s - a
f l o a t s i g n (
d o u l e a )
R e t u r n s t h e s i g n o f a a s ‘ 1 . 0 ’
o r ‘ - 1 . 0 ’
d o u l e e ( ) R e t u r n s t h e v a l u e o f e
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
8/19
d o u l e p i ( ) R e t u r n s t h e v a l u e o f p i
t r i n g F u n c t i o n s
T h e f o l l o w i n g a r e u i l t -
i n t r i n g f u n c t i o n s a r e s u p p o r t e d i n h i v e :
R e t u r n T p e N a m e (
i g n a t u r e )
x a m p l e
i n t a s c i i (
s t r i n g s t r )
R e t u r n s t h e n u m e r i c v a l u e o f t h e f i r s t
c h a r a c t e r o f s t r
s t r i n g c o n c a t (
s t r i n g |
i n a r A ,
s t r i n g |
i n a r )
R e t u r n s t h e s t r i n g o r t e s r e s u l t i n g f r o m
c o n c a t e n a t i n g t h e s t r i n g s o r t e s p a s s e d
i n a s p a r a m e t e r s i n o r d e r .
e . g .
c o n c a t ( ‘ f o o ’ ,
‘ a r ’ ) r e s u l t s i n
‘ f o o a r ’ . N o t e t h a t t h i s
f u n c t i o n c a n t a k e a n n u m e r o f i n p u t
s t r i n g s .
a r r a c o n t e x t _
n g r a m s (
a r r a ,
a r r a ,
i n t K ,
i n t p f )
R e t u r n s t h e t o p -
k c o n t e x t u a l N -
g r a m s f r o m
a s e t o f t o k e n i z e d s e n t e n c e s ,
g i v e n a s t r i n g
o f
c o n t e x t .
e e t a t i s t i c s A n d D a t a M i n i n g f o r m o r e
i n f o r m a t i o n .
s t r i n g c o n c a t _
w s (
s t r i n g P ,
s t r i n g
A , s t r i n g )
L i k e c o n c a t ( ) a o v e ,
u t w i t h c u s t o m
s e p a r a t o r P .
s t r i n g c o n c a t _
w s (
s t r i n g P ,
a r r a )
L i k e c o n c a t _
w s ( ) a o v e ,
u t t a k i n g a n a r r a
o f s t r i n g s . (
a s o f H i v e 0 . 9 . 0 )
i n t f i n d _
i n _
s e t (
s t r i n g s t r ,
s t r i n g R e t u r n s t h e f i r s t o c c u r a n c e o f s t r i n s t r L i s t
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
9/19
s t r L i s t )
w h e r e s t r L i s t i s a c o m m a -
d e l i m i t e d s t r i n g .
R e t u r n s n u l l i f e i t h e r a r g u m e n t i s n u l l .
R e t u r n s 0 i f t h e f i r s t a r g u m e n t c o n t a i n s a n
c o m m a s .
e . g .
f i n d _
i n _
s e t ( ‘ a ’ ,
‘ a c
, , a
, c , d e f ’ ) r e t u r n s 3
s t r i n g f o r m a t _
n u m e r (
n u m e r x ,
i n t
d )
F o r m a t s t h e n u m e r X t o a f o r m a t l i k e
‘ # , # # # , # # # . # # ’ , r o u n d e d t o D d e c i m a l
p l a c e s ,
a n d r e t u r n s t h e r e s u l t a s a s t r i n g .
I f
D i s 0 ,
t h e r e s u l t h a s n o d e c i m a l p o i n t o r
f r a c t i o n a l p a r t . (
a s o f H i v e 0 .
1 0 . 0 )
s t r i n g g e t _
j s o n _
o j e c t (
s t r i n g
j s o n _
s t r i n g ,
s t r i n g p a t h )
x t r a c t j s o n o j e c t f r o m a j s o n s t r i n g a s e d
o n j s o n p a t h s p e c i f i e d ,
a n d r e t u r n j s o n
s t r i n g o f t h e e x t r a c t e d j s o n o j e c t .
I t w i l l
r e t u r n n u l l i f t h e i n p u t j s o n s t r i n g i s
i n v a l i d .
N O T :
T h e j s o n p a t h c a n o n l h a v e
t h e c h a r a c t e r s [ 0 -
9 a - z
_ ] , i . e
. , n o u p p e r - c a s e
o r s p e c i a l c h a r a c t e r s .
A l s o ,
t h e k e s
* c a n n o t s t a r t w i t h n u m e r s . * T h i s i s d u e t o
r e s t r i c t i o n s o n H i v e c o l u m n n a m e s .
o o l e a n i n _ f i l e (
s t r i n g s t r ,
s t r i n g
f i l e n a m e )
R e t u r n s t r u e i f t h e s t r i n g s t r a p p e a r s a s a n
e n t i r e l i n e i n f i l e n a m e .
i n t i n s t r (
s t r i n g s t r ,
s t r i n g s u s t r )
R e t u r n s t h e p o s i t i o n o f t h e f i r s t o c c u r e n c e
o f s u s t r i n s t r
i n t l e n g t h (
s t r i n g A )
R e t u r n s t h e l e n g t h o f t h e s t r i n g
i n t l o c a t e (
s t r i n g s u s t r ,
s t r i n g R e t u r n s t h e p o s i t i o n o f t h e f i r s t o c c u r r e n c e
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
10/19
s t r [ , i n t p o s ] ) o f s u s t r i n s t r a f t e r p o s i t i o n p o s
s t r i n g l o w e r (
s t r i n g A )
l c a s e (
s t r i n g A )
s t r i n g l p a d (
s t r i n g s t r ,
i n t l e n ,
s t r i n g
p a d )
R e t u r n s s t r ,
l e f t -
p a d d e d w i t h p a d t o a l e n g t h
o f l e n
s t r i n g l t r i m (
s t r i n g A )
R e t u r n s t h e s t r i n g r e s u l t i n g f r o m t r i m m i n g
s p a c e s f r o m t h e e g i n n i n g (
l e f t h a n d s i d e )
o f
A e . g . l t r i m
( ‘ f o o a r ‘ ) r e s u l t s i n ‘
f o o a r ‘
a r r a n g r a m s (
a r r a , i n t N ,
i n t K ,
i n t p f )
R e t u r n s t h e t o p -
k N -
g r a m s f r o m a s e t o f
t o k e n i z e d s e n t e n c e s ,
s u c h a s t h o s e
r e t u r n e d t h e s e n t e n c e s ( ) U D A F .
e e t a t i s t i c s A n d D a t a M i n i n g f o r m o r e
i n f o r m a t i o n .
s t r i n g p a r s e _
u r l (
s t r i n g u r l t r i n g ,
s t r i n g p a r t T o x t r a c t [ , s t r i n g
k e T o x t r a c t ] )
R e t u r n s t h e s p e c i f i e d p a r t f r o m t h e U R L .
V a l i d v a l u e s f o r p a r t T o x t r a c t i n c l u d e
H O T ,
P A T H ,
Q U R Y ,
R F ,
P R O T O C O L ,
A U T H O R I T Y ,
F I L ,
a n d U R I N F O .
e . g .
p a r s e _
u r l ( ‘ h t t p : / / f a c e o o k .
c o m /
p a t h 1 / p .
p h p ?
k 1 =
v 1 &
k 2 =
v 2 #
R e f 1 ’ , ‘
H O T ’ ) r e t u r n s
‘ f a c e o o k
. c o m ’ . A l s o a v a l u e o f a p a r t i c u l a r
k e i n Q U R Y c a n e e x t r a c t e d
p r o v i d i n g t h e k e a s t h e t h i r d a r g u m e n t ,
e . g .
p a r s e _
u r l ( ‘ h t t p : / / f a c e o o k .
c o m /
p a t h 1 / p .
p h p ?
k 1 =
v 1 &
k 2 =
v 2 #
R e f 1 ’ , ‘
Q U R Y ’ , ‘
k 1 ’ ) r e t u r n s
‘ v 1 ’ .
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
11/19
s t r i n g p r i n t f (
t r i n g f o r m a t ,
O j
a r g s )
R e t u r n s t h e i n p u t f o r m a t t e d a c c o r d i n g d o
p r i n t f -
s t l e f o r m a t s t r i n g s (
a s o f H i v e 0 . 9 . 0 )
s t r i n g r e g e x p _
e x t r a c t (
s t r i n g s u j e c t ,
s t r i n g p a t t e r n ,
i n t i n d e x )
R e t u r n s t h e s t r i n g e x t r a c t e d u s i n g t h e
p a t t e r n .
e . g .
r e g e x p _
e x t r a c t ( ‘ f o o t h e a r ’ ,
‘ f o o ( . * ? ) ( a r ) ’ , 2
) r e t u r n s
‘ a r . ’ N o t e t h a t
s o m e c a r e i s n e c e s s a r i n u s i n g p r e d e f i n e d
c h a r a c t e r c l a s s e s :
u s i n g ‘ \ s ’
a s t h e s e c o n d
a r g u m e n t w i l l m a t c h t h e l e t t e r s ; ‘ s ’
i s
n e c e s s a r t o m a t c h w h i t e s p a c e ,
e t c .
T h e
‘ i n d e x
’ p a r a m e t e r i s t h e J a v a r e g e x M a t c h e r
g r o u p ( ) m e t h o d i n d e x .
e e
d o c s / a p i
/ j a v a
/ u t i l
/ r e g e x
/ M a t c h e r
. h t m l f o r
m o r e i n f o r m a t i o n o n t h e ‘
i n d e x ’
o r J a v a
r e g e x g r o u p ( ) m e t h o d .
s t r i n g r e g e x p _
r e p l a c e (
s t r i n g
I N I T I A L _
T R I N G ,
s t r i n g
P A T T R N , s t r i n g
R P L A C M N T )
R e t u r n s t h e s t r i n g r e s u l t i n g f r o m r e p l a c i n g
a l l s u s t r i n g s i n I N I T I A L _
T R I N G t h a t m a t c h
t h e j a v a r e g u l a r e x p r e s s i o n s n t a x d e f i n e d
i n P A T T R N w i t h i n s t a n c e s o f
R P L A C M N T ,
e . g .
r e g e x p _
r e p l a c e ( f o o a r ,
o o |
a r , ) r e t u r n s
‘ f . ’ N o t e t h a t s o m e c a r e i s n e c e s s a r i n
u s i n g p r e d e f i n e d c h a r a c t e r c l a s s e s :
u s i n g
‘ \ s ’
a s t h e s e c o n d a r g u m e n t w i l l m a t c h t h e
l e t t e r s ; ‘ s ’
i s n e c e s s a r t o m a t c h
w h i t e s p a c e ,
e t c .
s t r i n g r e p e a t (
s t r i n g s t r ,
i n t n )
R e p e a t s t r n t i m e s
s t r i n g r e v e r s e (
s t r i n g A )
R e t u r n s t h e r e v e r s e d s t r i n g
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
12/19
s t r i n g r p a d (
s t r i n g s t r ,
i n t l e n ,
s t r i n g
p a d )
R e t u r n s s t r ,
r i g h t -
p a d d e d w i t h p a d t o a
l e n g t h o f l e n
s t r i n g r t r i m (
s t r i n g A )
R e t u r n s t h e s t r i n g r e s u l t i n g f r o m t r i m m i n g
s p a c e s f r o m t h e e n d (
r i g h t h a n d s i d e )
o f A
e . g . r t r i m ( ‘ f o o a r ‘ ) r e s u l t s i n
‘ f o o a r
’
a r r a
s e n t e n c e s (
s t r i n g s t r ,
s t r i n g
l a n g ,
s t r i n g l o c a l e )
T o k e n i z e s a s t r i n g o f n a t u r a l l a n g u a g e t e x t
i n t o w o r d s a n d s e n t e n c e s ,
w h e r e e a c h
s e n t e n c e i s r o k e n a t t h e a p p r o p r i a t e
s e n t e n c e o u n d a r a n d r e t u r n e d a s a n
a r r a o f w o r d s .
T h e ‘
l a n g ’
a n d ‘
l o c a l e ’
a r e
o p t i o n a l a r g u m e n t s .
e . g .
s e n t e n c e s ( ‘ H e l l o
t h e r e !
H o w a r e o u ? ’ ) r e t u r n s (
( H e l l o ,
t h e r e ) , ( H o w ,
a r e ,
o u )
)
s t r i n g s p a c e (
i n t n )
R e t u r n a s t r i n g o f n s p a c e s
a r r a s p l i t (
s t r i n g s t r ,
s t r i n g p a t )
p l i t s t r a r o u n d p a t (
p a t i s a r e g u l a r
e x p r e s s i o n )
m a p
s t r _
t o _
m a p (
t e x t [ , d e l i m i t e r 1 ,
d e l i m i t e r 2 ] )
p l i t s t e x t i n t o k e -
v a l u e p a i r s u s i n g t w o
d e l i m i t e r s .
D e l i m i t e r 1 s e p a r a t e s t e x t i n t o K - V
p a i r s ,
a n d D e l i m i t e r 2 s p l i t s e a c h K -
V p a i r .
D e f a u l t d e l i m i t e r s a r e ‘ , ’ f o r d e l i m i t e r 1 a n d
‘ = ’ f o r d e l i m i t e r 2 .
s t r i n g s u s t r (
s t r i n g |
i n a r A ,
i n t
s t a r t ) s u s t r i n g
( s t r i n g
| i n a r
A , i n t s t a r t
)
R e t u r n s t h e s u s t r i n g o r s l i c e o f t h e t e
a r r a o f A s t a r t i n g f r o m s t a r t p o s i t i o n t i l l t h e
e n d o f s t r i n g A e . g .
s u s t r ( ‘ f o o a r ’ , 4 )
r e s u l t s
i n‘
a r ’
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
13/19
s t r i n g s u s t r (
s t r i n g |
i n a r A ,
i n t
s t a r t , i n t l e n
)
s u s t r i n g (
s t r i n g |
i n a r A ,
i n t
s t a r t , i n t l e n
)
R e t u r n s t h e s u s t r i n g o r s l i c e o f t h e t e
a r r a o f A s t a r t i n g f r o m s t a r t p o s i t i o n w i t h
l e n g t h l e n e . g .
s u s t r ( ‘ f o o a r ’ , 4 ,
1 )
r e s u l t s i n
‘ ’
s t r i n g t r a n s l a t e (
s t r i n g i n p u t ,
s t r i n g
f r o m ,
s t r i n g t o )
T r a n s l a t e s t h e i n p u t s t r i n g r e p l a c i n g t h e
c h a r a c t e r s p r e s e n t i n t h e f r o m s t r i n g w i t h
t h e c o r r e s p o n d i n g c h a r a c t e r s i n
t h e t o s t r i n g .
T h i s i s s i m i l a r t o
t h e t r a n s l a t e f u n c t i o n i n P o s t g r e Q L .
I f a n
o f t h e p a r a m e t e r s t o t h i s U D F a r e N U L L ,
t h e
r e s u l t i s N U L L a s w e l l (
a v a i l a l e a s o f
H i v e 0 .
1 0 . 0 )
s t r i n g t r i m (
s t r i n g A )
R e t u r n s t h e s t r i n g r e s u l t i n g f r o m t r i m m i n g
s p a c e s f r o m o t h e n d s o f A e . g . t r i m
( ‘
f o o a r ‘ ) r e s u l t s i n ‘
f o o a r ’
s t r i n g u p p e r (
s t r i n g A )
u c a s e (
s t r i n g
A )
R e t u r n s t h e s t r i n g r e s u l t i n g f r o m c o n v e r t i n g
a l l c h a r a c t e r s o f A t o u p p e r c a s e e . g .
u p p e r ( ‘ f O o a R ’ ) r e s u l t s i n
‘ F O O A R
’
C o l l e c t i o n F u n c t i o n s
T h e f o l l o w i n g u i l t -
i n c o l l e c t i o n f u n c t i o n s a r e s u p p o r t e d i n h i v e :
R e t u r n
T p e N a m e (
i g n a t u r e )
x a m p l e
i n t s i z e (
M a p )
R e t u r n s t h e n u m e r o f e l e m e n t s i n t h e m a p t p e
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
14/19
i n t s i z e (
A r r a )
R e t u r n s t h e n u m e r o f e l e m e n t s i n t h e a r r a t p e
a r r a m a p _ k e s (
M a p )
R e t u r n s a n u n o r d e r e d a r r a c o n t a i n i n g t h e k e s o f t h e i n p u t m a p
a r r a m a p _
v a l u e s (
M a p )
R e t u r n s a n u n o r d e r e d a r r a c o n t a i n i n g t h e v a l u e s o f t h e i n p u t
m a p
o o l e a n a r r a _
c o n t a i n s (
A r r a ,
v a l u e )
R e t u r n s T R U i f t h e a r r a c o n t a i n s v a l u e
a r r a s o r t _
a r r a (
A r r a )
o r t s t h e i n p u t a r r a i n a s c e n d i n g o r d e r a c c o r d i n g t o t h e n a t u r a l
o r d e r i n g o f t h e a r r a e l e m e n t s a n d r e t u r n s i t (
a s o f v e r s i o n 0 . 9 . 0 )
u i l t - i n A g g r e g a t e F u n c t i o n s ( U D A F )
T h e f o l l o w i n g a r e u i l t -
i n a g g r e g a t e f u n c t i o n s a r e s u p p o r t e d i n H i v e :
R e t u r n
T p e N a m e (
i g n a t u r e )
x a m p l e
i g i n t c o u n t ( * ) ,
c o u n t ( e x p r
) ,
c o u n t ( D I T I N C T e x p r [ ,
e x p r _ . ] )
c o u n t ( * )
R e t u r n s t h e t o t a l n u m e r o f r e t r i e v e d r o w s
,
i n c l u d i n g r o w s c o n t a i n i n g N U L L v a l u e s ;
c o u n t ( e x p r )
R e t u r n s t h e n u m e r o f r o w s f o r w h i c h t h e s u p p l i e d
e x p r e s s i o n i s n o n -
N U L L ;
c o u n t (
D I T I N C T e x p r [ , e x p r ] )
R e t u r n s t h e n u m e r o f r o w s f o r w h i c h t h e s u p p l i e d
e x p r e s s i o n ( s )
a r e u n i q u e a n d n o n -
N U L L .
d o u l e s u m (
c o l ) , s u m (
D I T I N C T
c o l )
R e t u r n s t h e s u m o f t h e e l e m e n t s i n t h e g r o u p o r t h e s u m o f
t h e d i s t i n c t v a l u e s o f t h e c o l u m n i n t h e g r o u p
d o u l e a v g (
c o l ) , a v g (
D I T I N C T c o l )
R e t u r n s t h e a v e r a g e o f t h e e l e m e n t s i n t h e g r o u p o r t h e
a v e r a g e o f t h e d i s t i n c t v a l u e s o f t h e c o l u m n i n t h e g r o u p
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
15/19
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
16/19
e t w e e n 0 a n d 1 .
N O T :
A t r u e p e r c e n t i l e c a n o n l e
c o m p u t e d f o r i n t e g e r v a l u e s .
U s e P R C N T I L _
A P P R O X i f
o u r i n p u t i s n o n -
i n t e g r a l .
d o u l e p e r c e n t i l e _
a p p r o x (
D O U L
c o l ,
p [ , ] )
R e t u r n s a n a p p r o x i m a t e p p e r c e n t i l e o f a n u m e r i c c o l u m n
( i n c l u d i n g f l o a t i n g p o i n t t p e s
) i n t h e g r o u p
. T h e p a r a m e t e r
c o n t r o l s a p p r o x i m a t i o n a c c u r a c a t t h e c o s t o f m e m o r .
H i g h e r v a l u e s i e l d e t t e r a p p r o x i m a t i o n s ,
a n d t h e d e f a u l t i s
1 0 ,
0 0 0 .
W h e n t h e n u m e r o f d i s t i n c t v a l u e s i n c o l i s s m a l l e r
t h a n ,
t h i s g i v e s a n e x a c t p e r c e n t i l e v a l u e .
a r r a p e r c e n t i l e _
a p p r o x (
D O U L
c o l ,
a r r a (
p 1 [ , p 2 ] ) [ , ] )
a m e a s a o v e ,
u t a c c e p t s a n d r e t u r n s a n a r r a o f
p e r c e n t i l e v a l u e s i n s t e a d o f a s i n g l e o n e .
a r r a h i s t o g r a m _
n u m e r i c (
c o l ,
)
C o m p u t e s a h i s t o g r a m o f a n u m e r i c c o l u m n i n t h e g r o u p
u s i n g n o n -
u n i f o r m l s p a c e d i n s .
T h e o u t p u t i s a n a r r a o f
s i z e o f d o u l e -
v a l u e d ( x , )
c o o r d i n a t e s t h a t r e p r e s e n t t h e
i n c e n t e r s a n d h e i g h t s
a r r a c o l l e c t _
s e t (
c o l )
R e t u r n s a s e t o f o j e c t s w i t h d u p l i c a t e e l e m e n t s e l i m i n a t e d
u i l t - i n T a l e - G e n e r a t i n g F u n c t i o n s ( U D T F )
N o r m a l u s e r -
d e f i n e d f u n c t i o n s ,
s u c h a s c o n c a t ( ) , t a k e i n a s i n g l e i n p u t r o w a n d o u t p u t a s i n g l e o u t p u t r o w .
I n
c o n t r a s t ,
t a l e -
g e n e r a t i n g f u n c t i o n s t r a n s f o r m a s i n g l e i n p u t r o w t o m u l t i p l e o u t p u t r o w s .
R e t u r n T p e N a m e (
i g n a t u r e )
x a m p l e
i n l i n e ( A R R A Y ) x p l o d e s a n a r r a o f s t r u c t s i n t o a t a l e (
a s o f H i v e
0 . 1 0
)
t h
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
17/19
x p l o d e e x p l o d e ( ) t a k e s i n a n a r r a a s a n i n p u t a n d o u t p u t s
t h e e l e m e n t s o f t h e a r r a a s s e p a r a t e r o w s .
U D T F ’ s
c a n e u s e d i n t h e L C T e x p r e s s i o n l i s t a n d a s a
p a r t o f L A T R A L V I W .
C o n d i t i o n a l F u n c t i o n s
R e t u r n
T p e N a m e (
i g n a t u r e )
x a m p l e
T i f (
o o l e a n t e s t C o n d i t i o n ,
T
v a l u e T r u e ,
T v a l u e F a l s e O r N u l l )
R e t u r n v a l u e T r u e w h e n t e s t C o n d i t i o n i s t r u e ,
r e t u r n s v a l u e F a l s e O r N u l l o t h e r w i s e
T C O A L C (
T v 1 ,
T v 2 ,
) R e t u r n t h e f i r s t v t h a t i s n o t N U L L ,
o r N U L L i f a l l v ’ s
a r e N U L L
T C A a W H N T H N c [
W H N d
T H N e ] * [
L f ]
N D
W h e n a =
,
r e t u r n s c ;
w h e n a =
d ,
r e t u r n e ;
e l s e
r e t u r n f
T C A W H N a T H N [
W H N c
T H N d ] * [
L e ]
N D
W h e n a =
t r u e ,
r e t u r n s ;
w h e n c =
t r u e ,
r e t u r n d ;
e l s e r e t u r n e
F u n c t i o n s f o r T e x t A n a l t i c s
R e t u r n T p e N a m e (
i g n a t u r e )
x a m p l e
a r r a c o n t e x t _
n g r a m s (
a r r a ,
a r r a ,
i n t K ,
i n t p f )
R e t u r n s t h e t o p -
k c o n t e x t u a l N -
g r a m s
f r o m a s e t o f t o k e n i z e d s e n t e n c e s ,
g i v e n a s t r i n g o f
c o n t e x t .
e e t a t i s t i c s A n d D a t a M i n i n g f o r
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
18/19
m o r e i n f o r m a t i o n . N -
g r a m s a r e
s u s e q u e n c e s o f l e n g t h N d r a w n
f r o m a l o n g e r s e q u e n c e .
T h e
p u r p o s e o f t h e n g r a m s ( ) U D A F i s t o
f i n d t h e k m o s t f r e q u e n t n -
g r a m s
f r o m o n e o r m o r e s e q u e n c e s .
I t c a n
e u s e d i n c o n j u n c t i o n w i t h t h e
s e n t e n c e s ( ) U D F t o a n a l z e
u n s t r u c t u r e d n a t u r a l l a n g u a g e t e x t ,
o r t h e c o l l e c t ( ) f u n c t i o n t o a n a l z e
m o r e g e n e r a l s t r i n g d a t a .
a r r a n g r a m s (
a r r a , i n t N ,
i n t
K , i n t p f
)
R e t u r n s t h e t o p -
k N -
g r a m s f r o m a s e t
o f t o k e n i z e d s e n t e n c e s ,
s u c h a s
t h o s e r e t u r n e d t h e s e n t e n c e s ( )
U D A F .
e e t a t i s t i c s A n d D a t a M i n i n g f o r
m o r e i n f o r m a t i o n .
C o n t e x t u a l n -
g r a m s
a r e s i m i l a r t o n -
g r a m s ,
u t a l l o w o u
t o s p e c i f a ‘
c o n t e x t ’
s t r i n g a r o u n d
w h i c h n -
g r a m s a r e t o e e s t i m a t e d .
F o r e x a m p l e ,
o u c a n s p e c i f t h a t
o u ’
r e o n l i n t e r e s t e d i n f i n d i n g t h e
m o s t c o m m o n t w o -
w o r d p h r a s e s i n
t e x t t h a t f o l l o w t h e c o n t e x t
I l o v e .
Y o u c o u l d a c h i e v e t h e s a m e r e s u l t
m a n u a l l s t r i p p i n g s e n t e n c e s o f
n o n -
c o n t e x t u a l c o n t e n t a n d t h e n
p a s s i n g t h e m t o n g r a m s ( ) , u t
c o n t e x t _
n g r a m s ( ) m a k e s i t m u c h
8/18/2019 Hadoop Hive Cheat Sheet - Developer Guide for SQL to HiveQL _ Qubole
19/19
e a s i e r .
s t e m s v i a a F l u m e (
h t t p : / / w w w .
q u o r a .
c o m /
F l u m e )
t h a t w r i t e s d a t a o u t
t o A m a z o n 3 (
h t t p : / / w w w .
q u o r a .
c o m /
A m a z o n -
3 ) . F r o m t h e r e ,
w e u s e a
d a t a p r o c e s s i n g p i p e l i n e h o s t e d Q u o l e
( h t t p : / / w w w
. q u o r a
. c o m
/ Q u o l e
) t o p r o c e s s a n d a g g r e g a t e s t a t i s t i c s t o H i v e
( c o m p u t i n g
) ( h t t p : / / w w w
. q u o r a
. c o m
/ H i v e
- c o m p u t i n g
) t a l e s a n d t o a n A W
R e d s h i f t (
h t t p : / / w w w .
q u o r a .
c o m /
A W -
R e d s h i f t )
a s e d d a t a w a r e h o u s e
P r a k a s h J a n a k i r a m a n ,
C o -
F o u n d e r a n d V P n g i n e e r i n g
N e x t d o o r
C o n t a c t U s
( h t t p s : / / w w w
. q u o l e
. c o m
/ c o n t a c t
-
u s / )
u p p o r t
( h t t p s : / / q u o l e
. z e n d e s k . c o m
/ h c
/ e n
-
u s )
F r e e T r i a l
( h t t p s : / / w w w
. q u o l e
. c o m
/ t r i a l
-
p a g e )
A o u t U s
( h t t p s : / / w w w
. q u o l e
. c o m
/ a o u t -
u s / )
P r e s s R e l e a s e s
( h t t p s : / / w w w
. q u o l e
. c o m
/ p r e s s -
r e l e a s e s / )
i g D a t a U s e C a s e s
( / r e s o u r c e s /
s o l u t i o n / e s t -
u s e - c a s e s -
f o r -
i g -
d a t a -
a n a l t i c s / )
W e i n a r s
( h t t p s : / / w w w
. q u o l e
. c o m
/ w e i n a r s / )
© 2 0 1 6 Q u o l e
, I n c
. A l l r i g h t s r e s e r v e d e c u r i t
R e p o r t i n g ( / s e c u r i t -
r e p o r t i n g / ) P r i v a c ( / p r i v a c -
p o l i c / )
( h t t p s : / / w w w
. f a c e o o k
. c o m
/ q u o l e
) ( h t t p : / / w w w
. l i n k e d i n
. c o m
/ c o m p a n
/ q u o l e
) ( h t t p s : / / t w i t t e r
. c o m / @ q u o l e
)
https://www.qubole.com/privacy-policy/https://www.qubole.com/security-reporting/https://www.qubole.com/webinars/https://www.qubole.com/resources/solution/best-use-cases-for-big-data-analytics/https://www.qubole.com/press-releases/https://www.qubole.com/about-us/https://www.qubole.com/trial-pagehttps://qubole.zendesk.com/hc/en-ushttps://www.qubole.com/contact-us/http://www.quora.com/AWS-Redshifthttp://www.quora.com/Hive-computinghttp://www.quora.com/Qubolehttp://www.quora.com/Amazon-S3http://www.quora.com/FlumeRecommended