SQL EXISTS写法遭遇FILTER

前几天,看到一篇公众号文章谈到一个SQL优化的问题,原SQL逻辑大致如下,看到这个SQL,发现其中一个小问题感觉需要提示出来,否则很容易忽视。

select min(created)
FROM t1
WHERE '1606270446000' = '1606270446000'
  AND EXISTS
    (SELECT 1
     FROM t2 m
     WHERE m.object_type = t1.object_type
       AND m.owner = t1.owner
       AND m.object_id = '33'
       AND t1.namespace = '4')
  OR EXISTS
    (SELECT 1
     FROM t3 n
     WHERE n.object_type = t1.object_type
       AND n.owner = t1.owner
       AND n.object_id = '33'
       AND t1.namespace = '4');

Plan hash value: 1283715866

-----------------------------------------------------------------------------------------------------------------------------
| Id  | Operation			| Name	  | Starts | E-Rows | Cost (%CPU)| E-Time   | A-Rows |	 A-Time   | Buffers |
-----------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT		|	  |	 1 |	    |	395 (100)|	    |	   1 |00:00:00.15 |   14591 |
|   1 |  SORT AGGREGATE 		|	  |	 1 |	  1 |		 |	    |	   1 |00:00:00.15 |   14591 |
|*  2 |   FILTER			|	  |	 1 |	    |		 |	    |	2628 |00:00:00.15 |   14591 |
|   3 |    TABLE ACCESS FULL		| T1	  |	 1 |	101K|	391   (1)| 00:00:05 |	 203K|00:00:00.04 |    2867 |
|   4 |    UNION-ALL			|	  |   7925 |	    |		 |	    |	   1 |00:00:00.03 |   11724 |
|*  5 |     FILTER			|	  |   7925 |	    |		 |	    |	   1 |00:00:00.01 |    5863 |
|*  6 |      TABLE ACCESS BY INDEX ROWID| T3	  |   2929 |	  1 |	  2   (0)| 00:00:01 |	   1 |00:00:00.01 |    5863 |
|*  7 |       INDEX RANGE SCAN		| N_IDX01 |   2929 |	  1 |	  1   (0)| 00:00:01 |	2929 |00:00:00.01 |    2934 |
|*  8 |     FILTER			|	  |   7924 |	    |		 |	    |	   0 |00:00:00.01 |    5861 |
|*  9 |      TABLE ACCESS BY INDEX ROWID| T2	  |   2928 |	  1 |	  2   (0)| 00:00:01 |	   0 |00:00:00.01 |    5861 |
|* 10 |       INDEX RANGE SCAN		| M_IDX01 |   2928 |	  1 |	  1   (0)| 00:00:01 |	2928 |00:00:00.01 |    2933 |
-----------------------------------------------------------------------------------------------------------------------------

COLUMN_NAME			      NDV NUL  NUM_NULLS DATA_TYPE    LOW_VAL_25		HIGH_VAL_25		  LAST_ANALYZED        HISTOGRAM
------------------------------ ---------- --- ---------- ------------ ------------------------- ------------------------- -------------------- ---------
CREATED 			    28480 Y	       0 DATE	      78760202122A38		78790318010404		  2021-03-07 10:26:30  NONE
DATA_OBJECT_ID			    29582 Y	  144370 NUMBER       80			C31A3E5F		  2021-03-07 10:26:30  NONE
EDITION_NAME				0 Y	  203482 VARCHAR2							  2021-03-07 10:26:30  NONE
GENERATED				2 Y	       0 VARCHAR2     4E			59			  2021-03-07 10:26:30  NONE
LAST_DDL_TIME			     3050 Y	      10 DATE	      78760201122A38		787903040E3124		  2021-03-07 10:26:30  NONE
NAMESPACE			       19 Y	      10 NUMBER       C102			C141			  2021-03-07 10:26:30  FREQUENCY
OBJECT_ID			   203482 Y	       0 NUMBER       C102			C3152353		  2021-03-07 10:26:30  NONE
OBJECT_NAME			    48784 Y	       0 VARCHAR2     2F31303030333233645F44656 73756E2F7574696C2F7265736 2021-03-07 10:26:30  NONE
OBJECT_TYPE			       44 Y	       0 VARCHAR2     434C5553544552		57494E444F57		  2021-03-07 10:26:30  FREQUENCY
OWNER				      251 Y	       0 VARCHAR2     414752			5A4142424958		  2021-03-07 10:26:30  FREQUENCY
SECONDARY				1 Y	       0 VARCHAR2     4E			4E			  2021-03-07 10:26:30  NONE
STATUS					2 Y	       0 VARCHAR2     494E56414C4944		56414C4944		  2021-03-07 10:26:30  NONE
SUBOBJECT_NAME			      171 Y	  202738 VARCHAR2     5030			575248245F5741495453545F3 2021-03-07 10:26:30  NONE
TEMPORARY				2 Y	       0 VARCHAR2     4E			59			  2021-03-07 10:26:30  NONE
TIMESTAMP			     3887 Y	      10 VARCHAR2     313939372D30342D31323A313 323032312D30332D30343A313 2021-03-07 10:26:30  NONE

从执行计划中看到,采用了FILTER的方式,原文中该SQL执行了“2小时39分钟40秒,逻辑读2874多万,物理读270多万”,T表数据量很大Filter 主查询结果集大,子查询结果小,也不能影响驱动关系,驱动表只能是主查询。驱动表驱动次数越多,效率越差。主查询结果集大,只能利用改写的方式消除FILTER。原文作者利用了如下方式进行改写:

SELECT min(t.created)
FROM t1 t,
     t2 m ,
     t3 n
WHERE ('1606270446000' = '1606270446000')
  AND m.object_type = t.object_type
  AND m.owner = t.owner
  AND m.object_id = '33'
  AND t.namespace = '1'
  OR (n.object_type = t.object_type
      AND n.owner = t.owner
      AND n.object_id = '33'
      AND t.namespace = '4');

因为最终的结果集取MIN,则将半连接直接改为了inner join,无需考虑重复数据的问题,但是需要注意的是,半连接(in或exists)改为inner join时,如果最终结果集没有去重逻辑,需要将中间涉及关联的结果集去重后,再改写为内连接,从而体现“半”连接,这是在我们日常SQL改写中,常见却又容易忽略的问题:

create index t1_idx01 on t1(object_type,owner,namespace,created);
create index m_idx01 on t2(object_id);
create index n_idx01 on t3(object_id);

SELECT created  --普通结果集
FROM t1,
  (SELECT DISTINCT m.object_type,  --distinct去重
                   m.owner
   FROM t2 m
   WHERE m.object_id = '33') x1,
  (SELECT DISTINCT n.object_type,  --distinct去重
                   n.owner
   FROM t3 n
   WHERE n.object_id = '33') x2
WHERE 1=1
  AND (x1.object_type = t1.object_type
       AND x1.owner = t1.owner)
  OR (x2.object_type = t1.object_type
      AND x2.owner = t1.owner)
  AND t1.namespace = '4';

Plan hash value: 3647106578

--------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation			  | Name     | Starts | E-Rows | Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |
--------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT		  |	     |	    1 |        |    16 (100)|	       |   2628 |00:00:00.01 |	   215 |
|   1 |  CONCATENATION			  |	     |	    1 |        |	    |	       |   2628 |00:00:00.01 |	   215 |
|   2 |   NESTED LOOPS			  |	     |	    1 |      3 |     8	(25)| 00:00:01 |   2628 |00:00:00.01 |	   195 |
|   3 |    MERGE JOIN CARTESIAN 	  |	     |	    1 |      1 |     6	(34)| 00:00:01 |      1 |00:00:00.01 |	     6 |
|   4 |     VIEW			  |	     |	    1 |      1 |     3	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|   5 |      HASH UNIQUE		  |	     |	    1 |      1 |     3	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|   6 |       TABLE ACCESS BY INDEX ROWID | T2	     |	    1 |      1 |     2	 (0)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|*  7 |        INDEX RANGE SCAN 	  | M_IDX01  |	    1 |      1 |     1	 (0)| 00:00:01 |      1 |00:00:00.01 |	     2 |
|   8 |     BUFFER SORT 		  |	     |	    1 |      1 |     6	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|   9 |      VIEW			  |	     |	    1 |      1 |     3	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|  10 |       HASH UNIQUE		  |	     |	    1 |      1 |     3	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|  11 |        TABLE ACCESS BY INDEX ROWID| T3	     |	    1 |      1 |     2	 (0)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|* 12 | 	INDEX RANGE SCAN	  | N_IDX01  |	    1 |      1 |     1	 (0)| 00:00:01 |      1 |00:00:00.01 |	     2 |
|* 13 |    INDEX RANGE SCAN		  | T1_IDX01 |	    1 |      3 |     2	 (0)| 00:00:01 |   2628 |00:00:00.01 |	   189 |
|  14 |   NESTED LOOPS			  |	     |	    1 |      1 |     8	(25)| 00:00:01 |      0 |00:00:00.01 |	    20 |
|  15 |    NESTED LOOPS 		  |	     |	    1 |      1 |     5	(20)| 00:00:01 |   2628 |00:00:00.01 |	    17 |
|  16 |     VIEW			  |	     |	    1 |      1 |     3	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|  17 |      HASH UNIQUE		  |	     |	    1 |      1 |     3	(34)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|  18 |       TABLE ACCESS BY INDEX ROWID | T2	     |	    1 |      1 |     2	 (0)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|* 19 |        INDEX RANGE SCAN 	  | M_IDX01  |	    1 |      1 |     1	 (0)| 00:00:01 |      1 |00:00:00.01 |	     2 |
|* 20 |     INDEX RANGE SCAN		  | T1_IDX01 |	    1 |      1 |     2	 (0)| 00:00:01 |   2628 |00:00:00.01 |	    14 |
|* 21 |    VIEW 			  |	     |	 2628 |      1 |     3	(34)| 00:00:01 |      0 |00:00:00.01 |	     3 |
|  22 |     SORT UNIQUE 		  |	     |	 2628 |      1 |     3	(34)| 00:00:01 |   2628 |00:00:00.01 |	     3 |
|  23 |      TABLE ACCESS BY INDEX ROWID  | T3	     |	    1 |      1 |     2	 (0)| 00:00:01 |      1 |00:00:00.01 |	     3 |
|* 24 |       INDEX RANGE SCAN		  | N_IDX01  |	    1 |      1 |     1	 (0)| 00:00:01 |      1 |00:00:00.01 |	     2 |
--------------------------------------------------------------------------------------------------------------------------------

Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------

   1 - SEL$1
   4 - SEL$2   / X1@SEL$1
   5 - SEL$2
   6 - SEL$2   / M@SEL$2
   7 - SEL$2   / M@SEL$2
   9 - SEL$3   / X2@SEL$1
  10 - SEL$3
  11 - SEL$3   / N@SEL$3
  12 - SEL$3   / N@SEL$3
  13 - SEL$1_1 / T1@SEL$1
  16 - SEL$2   / X1@SEL$1_2
  18 - SEL$2   / M@SEL$2
  19 - SEL$2   / M@SEL$2
  20 - SEL$1_2 / T1@SEL$1_2
  21 - SEL$3   / X2@SEL$1_2
  23 - SEL$3   / N@SEL$3
  24 - SEL$3   / N@SEL$3

Outline Data
-------------

  /*+
      BEGIN_OUTLINE_DATA
      IGNORE_OPTIM_EMBEDDED_HINTS
      OPTIMIZER_FEATURES_ENABLE('11.2.0.4')
      DB_VERSION('11.2.0.4')
      OPT_PARAM('_optim_peek_user_binds' 'false')
      OPT_PARAM('_optimizer_use_feedback' 'false')
      ALL_ROWS
      OUTLINE_LEAF(@"SEL$2")
      OUTLINE_LEAF(@"SEL$3")
      OUTLINE_LEAF(@"SEL$1")
      OUTLINE_LEAF(@"SEL$1_1")
      USE_CONCAT(@"SEL$1" 8 OR_PREDICATES(1))
      OUTLINE_LEAF(@"SEL$1_2")
      OUTLINE(@"SEL$1")
      NO_ACCESS(@"SEL$1_1" "X1"@"SEL$1")
      NO_ACCESS(@"SEL$1_1" "X2"@"SEL$1")
      INDEX(@"SEL$1_1" "T1"@"SEL$1" ("T1"."OBJECT_TYPE" "T1"."OWNER" "T1"."NAMESPACE" "T1"."CREATED"))
      NO_ACCESS(@"SEL$1_2" "X1"@"SEL$1_2")
      INDEX(@"SEL$1_2" "T1"@"SEL$1_2" ("T1"."OBJECT_TYPE" "T1"."OWNER" "T1"."NAMESPACE" "T1"."CREATED"))
      NO_ACCESS(@"SEL$1_2" "X2"@"SEL$1_2")
      LEADING(@"SEL$1_1" "X1"@"SEL$1" "X2"@"SEL$1" "T1"@"SEL$1")
      LEADING(@"SEL$1_2" "X1"@"SEL$1_2" "T1"@"SEL$1_2" "X2"@"SEL$1_2")
      USE_MERGE_CARTESIAN(@"SEL$1_1" "X2"@"SEL$1")
      USE_NL(@"SEL$1_1" "T1"@"SEL$1")
      USE_NL(@"SEL$1_2" "T1"@"SEL$1_2")
      USE_NL(@"SEL$1_2" "X2"@"SEL$1_2")
      INDEX_RS_ASC(@"SEL$2" "M"@"SEL$2" ("T2"."OBJECT_ID"))
      USE_HASH_AGGREGATION(@"SEL$2")
      INDEX_RS_ASC(@"SEL$3" "N"@"SEL$3" ("T3"."OBJECT_ID"))
      END_OUTLINE_DATA
  */

Predicate Information (identified by operation id):
---------------------------------------------------

   7 - access("M"."OBJECT_ID"=33)
  12 - access("N"."OBJECT_ID"=33)
  13 - access("X2"."OBJECT_TYPE"="T1"."OBJECT_TYPE" AND "X2"."OWNER"="T1"."OWNER" AND "T1"."NAMESPACE"=4)
  19 - access("M"."OBJECT_ID"=33)
  20 - access("X1"."OBJECT_TYPE"="T1"."OBJECT_TYPE" AND "X1"."OWNER"="T1"."OWNER")
  21 - filter((LNNVL("X2"."OBJECT_TYPE"="T1"."OBJECT_TYPE") OR LNNVL("X2"."OWNER"="T1"."OWNER") OR
	      LNNVL("T1"."NAMESPACE"=4)))
  24 - access("N"."OBJECT_ID"=33)