前几天,看到一篇公众号文章谈到一个SQL优化的问题,原SQL逻辑大致如下,看到这个SQL,发现其中一个小问题感觉需要提示出来,否则很容易忽视。
select min(created)
FROM t1
WHERE '1606270446000' = '1606270446000'
AND EXISTS
(SELECT 1
FROM t2 m
WHERE m.object_type = t1.object_type
AND m.owner = t1.owner
AND m.object_id = '33'
AND t1.namespace = '4')
OR EXISTS
(SELECT 1
FROM t3 n
WHERE n.object_type = t1.object_type
AND n.owner = t1.owner
AND n.object_id = '33'
AND t1.namespace = '4');
Plan hash value: 1283715866
-----------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
-----------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 395 (100)| | 1 |00:00:00.15 | 14591 |
| 1 | SORT AGGREGATE | | 1 | 1 | | | 1 |00:00:00.15 | 14591 |
|* 2 | FILTER | | 1 | | | | 2628 |00:00:00.15 | 14591 |
| 3 | TABLE ACCESS FULL | T1 | 1 | 101K| 391 (1)| 00:00:05 | 203K|00:00:00.04 | 2867 |
| 4 | UNION-ALL | | 7925 | | | | 1 |00:00:00.03 | 11724 |
|* 5 | FILTER | | 7925 | | | | 1 |00:00:00.01 | 5863 |
|* 6 | TABLE ACCESS BY INDEX ROWID| T3 | 2929 | 1 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 5863 |
|* 7 | INDEX RANGE SCAN | N_IDX01 | 2929 | 1 | 1 (0)| 00:00:01 | 2929 |00:00:00.01 | 2934 |
|* 8 | FILTER | | 7924 | | | | 0 |00:00:00.01 | 5861 |
|* 9 | TABLE ACCESS BY INDEX ROWID| T2 | 2928 | 1 | 2 (0)| 00:00:01 | 0 |00:00:00.01 | 5861 |
|* 10 | INDEX RANGE SCAN | M_IDX01 | 2928 | 1 | 1 (0)| 00:00:01 | 2928 |00:00:00.01 | 2933 |
-----------------------------------------------------------------------------------------------------------------------------
COLUMN_NAME NDV NUL NUM_NULLS DATA_TYPE LOW_VAL_25 HIGH_VAL_25 LAST_ANALYZED HISTOGRAM
------------------------------ ---------- --- ---------- ------------ ------------------------- ------------------------- -------------------- ---------
CREATED 28480 Y 0 DATE 78760202122A38 78790318010404 2021-03-07 10:26:30 NONE
DATA_OBJECT_ID 29582 Y 144370 NUMBER 80 C31A3E5F 2021-03-07 10:26:30 NONE
EDITION_NAME 0 Y 203482 VARCHAR2 2021-03-07 10:26:30 NONE
GENERATED 2 Y 0 VARCHAR2 4E 59 2021-03-07 10:26:30 NONE
LAST_DDL_TIME 3050 Y 10 DATE 78760201122A38 787903040E3124 2021-03-07 10:26:30 NONE
NAMESPACE 19 Y 10 NUMBER C102 C141 2021-03-07 10:26:30 FREQUENCY
OBJECT_ID 203482 Y 0 NUMBER C102 C3152353 2021-03-07 10:26:30 NONE
OBJECT_NAME 48784 Y 0 VARCHAR2 2F31303030333233645F44656 73756E2F7574696C2F7265736 2021-03-07 10:26:30 NONE
OBJECT_TYPE 44 Y 0 VARCHAR2 434C5553544552 57494E444F57 2021-03-07 10:26:30 FREQUENCY
OWNER 251 Y 0 VARCHAR2 414752 5A4142424958 2021-03-07 10:26:30 FREQUENCY
SECONDARY 1 Y 0 VARCHAR2 4E 4E 2021-03-07 10:26:30 NONE
STATUS 2 Y 0 VARCHAR2 494E56414C4944 56414C4944 2021-03-07 10:26:30 NONE
SUBOBJECT_NAME 171 Y 202738 VARCHAR2 5030 575248245F5741495453545F3 2021-03-07 10:26:30 NONE
TEMPORARY 2 Y 0 VARCHAR2 4E 59 2021-03-07 10:26:30 NONE
TIMESTAMP 3887 Y 10 VARCHAR2 313939372D30342D31323A313 323032312D30332D30343A313 2021-03-07 10:26:30 NONE
从执行计划中看到,采用了FILTER的方式,原文中该SQL执行了“2小时39分钟40秒,逻辑读2874多万,物理读270多万”,T表数据量很大,Filter 主查询结果集大,子查询结果小,也不能影响驱动关系,驱动表只能是主查询。驱动表驱动次数越多,效率越差。主查询结果集大,只能利用改写的方式消除FILTER。原文作者利用了如下方式进行改写:
SELECT min(t.created)
FROM t1 t,
t2 m ,
t3 n
WHERE ('1606270446000' = '1606270446000')
AND m.object_type = t.object_type
AND m.owner = t.owner
AND m.object_id = '33'
AND t.namespace = '1'
OR (n.object_type = t.object_type
AND n.owner = t.owner
AND n.object_id = '33'
AND t.namespace = '4');
因为最终的结果集取MIN,则将半连接直接改为了inner join,无需考虑重复数据的问题,但是需要注意的是,半连接(in或exists)改为inner join时,如果最终结果集没有去重逻辑,需要将中间涉及关联的结果集去重后,再改写为内连接,从而体现“半”连接,这是在我们日常SQL改写中,常见却又容易忽略的问题:
create index t1_idx01 on t1(object_type,owner,namespace,created);
create index m_idx01 on t2(object_id);
create index n_idx01 on t3(object_id);
SELECT created --普通结果集
FROM t1,
(SELECT DISTINCT m.object_type, --distinct去重
m.owner
FROM t2 m
WHERE m.object_id = '33') x1,
(SELECT DISTINCT n.object_type, --distinct去重
n.owner
FROM t3 n
WHERE n.object_id = '33') x2
WHERE 1=1
AND (x1.object_type = t1.object_type
AND x1.owner = t1.owner)
OR (x2.object_type = t1.object_type
AND x2.owner = t1.owner)
AND t1.namespace = '4';
Plan hash value: 3647106578
--------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 16 (100)| | 2628 |00:00:00.01 | 215 |
| 1 | CONCATENATION | | 1 | | | | 2628 |00:00:00.01 | 215 |
| 2 | NESTED LOOPS | | 1 | 3 | 8 (25)| 00:00:01 | 2628 |00:00:00.01 | 195 |
| 3 | MERGE JOIN CARTESIAN | | 1 | 1 | 6 (34)| 00:00:01 | 1 |00:00:00.01 | 6 |
| 4 | VIEW | | 1 | 1 | 3 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 5 | HASH UNIQUE | | 1 | 1 | 3 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 6 | TABLE ACCESS BY INDEX ROWID | T2 | 1 | 1 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 3 |
|* 7 | INDEX RANGE SCAN | M_IDX01 | 1 | 1 | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
| 8 | BUFFER SORT | | 1 | 1 | 6 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 9 | VIEW | | 1 | 1 | 3 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 10 | HASH UNIQUE | | 1 | 1 | 3 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 11 | TABLE ACCESS BY INDEX ROWID| T3 | 1 | 1 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 3 |
|* 12 | INDEX RANGE SCAN | N_IDX01 | 1 | 1 | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
|* 13 | INDEX RANGE SCAN | T1_IDX01 | 1 | 3 | 2 (0)| 00:00:01 | 2628 |00:00:00.01 | 189 |
| 14 | NESTED LOOPS | | 1 | 1 | 8 (25)| 00:00:01 | 0 |00:00:00.01 | 20 |
| 15 | NESTED LOOPS | | 1 | 1 | 5 (20)| 00:00:01 | 2628 |00:00:00.01 | 17 |
| 16 | VIEW | | 1 | 1 | 3 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 17 | HASH UNIQUE | | 1 | 1 | 3 (34)| 00:00:01 | 1 |00:00:00.01 | 3 |
| 18 | TABLE ACCESS BY INDEX ROWID | T2 | 1 | 1 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 3 |
|* 19 | INDEX RANGE SCAN | M_IDX01 | 1 | 1 | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
|* 20 | INDEX RANGE SCAN | T1_IDX01 | 1 | 1 | 2 (0)| 00:00:01 | 2628 |00:00:00.01 | 14 |
|* 21 | VIEW | | 2628 | 1 | 3 (34)| 00:00:01 | 0 |00:00:00.01 | 3 |
| 22 | SORT UNIQUE | | 2628 | 1 | 3 (34)| 00:00:01 | 2628 |00:00:00.01 | 3 |
| 23 | TABLE ACCESS BY INDEX ROWID | T3 | 1 | 1 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 3 |
|* 24 | INDEX RANGE SCAN | N_IDX01 | 1 | 1 | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
--------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
4 - SEL$2 / X1@SEL$1
5 - SEL$2
6 - SEL$2 / M@SEL$2
7 - SEL$2 / M@SEL$2
9 - SEL$3 / X2@SEL$1
10 - SEL$3
11 - SEL$3 / N@SEL$3
12 - SEL$3 / N@SEL$3
13 - SEL$1_1 / T1@SEL$1
16 - SEL$2 / X1@SEL$1_2
18 - SEL$2 / M@SEL$2
19 - SEL$2 / M@SEL$2
20 - SEL$1_2 / T1@SEL$1_2
21 - SEL$3 / X2@SEL$1_2
23 - SEL$3 / N@SEL$3
24 - SEL$3 / N@SEL$3
Outline Data
-------------
/*+
BEGIN_OUTLINE_DATA
IGNORE_OPTIM_EMBEDDED_HINTS
OPTIMIZER_FEATURES_ENABLE('11.2.0.4')
DB_VERSION('11.2.0.4')
OPT_PARAM('_optim_peek_user_binds' 'false')
OPT_PARAM('_optimizer_use_feedback' 'false')
ALL_ROWS
OUTLINE_LEAF(@"SEL$2")
OUTLINE_LEAF(@"SEL$3")
OUTLINE_LEAF(@"SEL$1")
OUTLINE_LEAF(@"SEL$1_1")
USE_CONCAT(@"SEL$1" 8 OR_PREDICATES(1))
OUTLINE_LEAF(@"SEL$1_2")
OUTLINE(@"SEL$1")
NO_ACCESS(@"SEL$1_1" "X1"@"SEL$1")
NO_ACCESS(@"SEL$1_1" "X2"@"SEL$1")
INDEX(@"SEL$1_1" "T1"@"SEL$1" ("T1"."OBJECT_TYPE" "T1"."OWNER" "T1"."NAMESPACE" "T1"."CREATED"))
NO_ACCESS(@"SEL$1_2" "X1"@"SEL$1_2")
INDEX(@"SEL$1_2" "T1"@"SEL$1_2" ("T1"."OBJECT_TYPE" "T1"."OWNER" "T1"."NAMESPACE" "T1"."CREATED"))
NO_ACCESS(@"SEL$1_2" "X2"@"SEL$1_2")
LEADING(@"SEL$1_1" "X1"@"SEL$1" "X2"@"SEL$1" "T1"@"SEL$1")
LEADING(@"SEL$1_2" "X1"@"SEL$1_2" "T1"@"SEL$1_2" "X2"@"SEL$1_2")
USE_MERGE_CARTESIAN(@"SEL$1_1" "X2"@"SEL$1")
USE_NL(@"SEL$1_1" "T1"@"SEL$1")
USE_NL(@"SEL$1_2" "T1"@"SEL$1_2")
USE_NL(@"SEL$1_2" "X2"@"SEL$1_2")
INDEX_RS_ASC(@"SEL$2" "M"@"SEL$2" ("T2"."OBJECT_ID"))
USE_HASH_AGGREGATION(@"SEL$2")
INDEX_RS_ASC(@"SEL$3" "N"@"SEL$3" ("T3"."OBJECT_ID"))
END_OUTLINE_DATA
*/
Predicate Information (identified by operation id):
---------------------------------------------------
7 - access("M"."OBJECT_ID"=33)
12 - access("N"."OBJECT_ID"=33)
13 - access("X2"."OBJECT_TYPE"="T1"."OBJECT_TYPE" AND "X2"."OWNER"="T1"."OWNER" AND "T1"."NAMESPACE"=4)
19 - access("M"."OBJECT_ID"=33)
20 - access("X1"."OBJECT_TYPE"="T1"."OBJECT_TYPE" AND "X1"."OWNER"="T1"."OWNER")
21 - filter((LNNVL("X2"."OBJECT_TYPE"="T1"."OBJECT_TYPE") OR LNNVL("X2"."OWNER"="T1"."OWNER") OR
LNNVL("T1"."NAMESPACE"=4)))
24 - access("N"."OBJECT_ID"=33)