Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Real-World Performance TrainingSQL Reference
Real-World Performance Team
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Sub-query ( correlated )
SQL
select empno, ename, sal
from emp e1
where sal > (
select avg(sal)
from emp e2
where e2.deptno = e1.deptno
group by deptno
)
Sub-query
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Inline View
SQL
select empno, ename, sal, avg_sal
from emp e
join ( select deptno
, avg(sal) avg_sal
from emp
group by deptno
) v
on e.deptno = v.deptno
InlineView
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Common Table Expression (CTE)
SQL
with
avg_sal as
( select deptno
, avg(sal) avg_sal
from emp
group by deptno
)
select empno, ename, sal, avg_sal
from emp e
join avg_sal a
on e.deptno = a.deptno
CommonTable Expression
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Window Function
SQL
select dname, ename, sal
, rank()
over (
partition by e.deptno
order by sal desc
) sal_rank
from emp e
join dept d
on e.deptno = d.deptno
order by dname, sal_rank
analytic function
partition by clause
order by clause forfunction
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Anatomy of an External Table
Data Loading
create table FAST_LOAD
(
column definition list ...
)
organization external
( type oracle_loader
default directory SPEEDY_FILESYSTEM
preprocessor exec_file_dir:’zcat.sh’
characterset ‘ZHS16GBK’
badfile ERROR_DUMP:’FAST_LOAD.bad’
logfile ERROR_DUMP:’FAST_LOAD.log’
(
file column mapping list ...
)
location
(file_1.gz, file_2.gz, file_3.gz, file_4.gz )
reject limit 1000
parallel 4
/
External Table Definition
Reference the Mount Point Uncompress the data
using a secure wrapper
The Character set must match the Character set of the Files
Note Compressed Files
Parallel should match or be less than the number of Files
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Gathering Statistics
Begin
DBMS_STATS.GATHER_TABLE_STATS(USER,'TABLE_NAME');
End;
/
Gathering Table Statistics (Default)
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Gathering Statistics
• Use the dbms_stats.create_extended_stats function to create extended statistics
Select
dbms_stats.create_extended_stats(USER,'CARS','(MAKE,MODEL)')
from dual;
Select
dbms_stats.create_extended_stats(USER,'EMP','(UPPER(EMP_LAST_NAME))')
from dual;
Extended Statistics
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Set based processing
Data Processing Techniques
insert /*+ append */ into west
select *
from emp
where deptno = 20;
commit;
insert /*+ append */ into east
select *
from emp
where deptno != 20;
commit;
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Multiple ways to get the same result
Data Processing Techniques
insert /*+ append */ first
when deptno = 20 then
into west values …
else
into east values …
select *
from emp;
commit;
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Duplicate Rows
Data Validation SQL
Simply Check the Data Obtain one of the ROWIDs of duplicates to investigate
Query the rows you wish to keep eliminating duplicates based on the load time
select
pk,count(*)
from DIRTY_DATA
group by pk
having count(*)>1;
select
pk,
count(*),
max(rowid)
from DIRTY_DATA
group by pk
having count(*)>1;
select column_list
from
(
select
a.*,row_number() over
(
partition by pk
order by load_time desc
) rowno
from DIRTY_DATA a
)
where rowno=1
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Orphaned Row Check
Data Validation SQL
Look For Orphans Look for Parents with no Children
select C.rowid
from PARENT P
right outer join
CHILD C
on P.pk = C.fk
where P.pk is null;
select P.rowid
from PARENT P
left outer join
CHILD C
on P.pk = C.fk
where C.fk is null;
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Delete
Rewriting DML
alter session enable parallel
dml;
delete from tx_log
where
symbol = ‘JAVA’;
commit;
alter session enable parallel dml;
insert /*+ append */ into tx_log_new
select * from tx_log
where
symbol != ‘JAVA’;
alter table tx_log
rename to tx_log_old;
alter table tx_log_new
rename to tx_log;
or
alter table tx_log exchange partition
part_201409 with table tx_log_new;
The predicate is the compliment of the DELETE, it selects the rows to keep
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Update
Rewriting DML
alter session enable parallel dml;
update sales_ledger
set tax_rate = 9.9
where tax_rate = 9.3
and sales_date > ‘01-Jan-09’;
commit;
alter session enable parallel dml;
insert /*+ append */ into tx_log_new
select
<column list>,
case
sales_date>‘01-Jan-09’
and
tax_rate=9.3
then
9.9
else
tax_rate
end,
<column list>
from sales_ledger;
The UPDATE predicates are moved to the SELECT list in a CASE statement to transform the rows
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
• For each row in lineorder, how many rows are returned from customer?
• Without constraints, what if lo_custkeyis NULL?
• Even if lo_custkey is NOT NULL, how many rows will join with customer? 0? 1? More than 1?
• NOT NULL constraints are essentially free, no sense not to implement
• Several optimizations depend on this information!
Rules and FrameworkNOT NULL Constraints
FROM lineorder
JOIN customer ON
lo_custkey = c_custkey
SQL> desc lineorder
Name Null? Type
----------- ------- --------
...
LO_CUSTKEY NOT NULL NUMBER
...
SQL> desc customer
Name Null? Type
--------- --------- --------
C_CUSTKEY NOT NULL NUMBER
....
ALTER TABLE tname MODIFY (cname NOT NULL)
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
• There must be a primary key on the dimension table
• There must be a foreign key on the fact table
• The state of the constraint depends on trust in the ETL process and volume of data
• Constraints must be in RELY state
• It is not necessary to enforce constraints on the fact table
• You need to tell the optimizer you can trust constraints in the RELY state
With PK/FK constraints, exactly 1 row is returned from dimension table for a fact row
Rules and FrameworkPrimary Key and Foreign Key Constraints
alter table customer
add constraint customer_pk
primary key (c_custkey)
RELY;
alter table lineorder
add constraint lo_customer_pk
foreign key (lo_custkey)
references
customer (c_custkey)
RELY
DISABLE NOVALIDATE;
alter system
set query_rewrite_integrity=TRUSTED;
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
• Example: Interval partitioning
Rules and FrameworkPartition the Fact Table on the Time Dimension
CREATE TABLE
LINEORDER
(
"LO_ORDERKEY" NUMBER NOT NULL ENABLE
,"LO_LINENUMBER" NUMBER
... other columns
)
partition by range
(
LO_ORDERDATE
)
interval (numtoyminterval(1, 'MONTH'))
(
partition R199201 values less than
(to_date('19920201', 'YYYYMMDD'))
)
;
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Configure Table for In-Memory Column Store
• Use MEMCOMPRESS FOR QUERY for performance
• Use DUPLICATE ALL to ensure extents are loaded in all RAC instances on Exadata
SQL> alter table lineorder
2 inmemory memcompress for query
3 duplicate all;
Table altered.
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Populate the In-Memory Column Store
• Query the table for on-demand population
• Ensure the optimizer is choosing a full scan
SQL> select /*+ full */ count(*)
2 from lineorder;
Copyright © 2014, Oracle and/or its affiliates. All rights reserved. |
Validate Population Status
• Query GV$IM_SEGMENTS
• Look for populate_status=‘COMPLETED’ and bytes_not_populated=0
set lines 150
column name format a30
col pname format a30
column owner format a20
column segment_name format a30
column populate_status format a20
column bytes_not_populated format 999,999,999,999.99
set echo on
SELECT v.inst_id,v.owner,
v.segment_name name,
v.partition_name pname,
v.populate_status status,
v.bytes_not_populated
FROM gv$im_segments v
order by 3,1,2
/