Re: Select max within max

From: Charles Hooper <hooperc2000_at_yahoo.com>
Date: Fri, 11 Jul 2008 15:07:03 -0700 (PDT)
Message-ID: <5a32c4eb-1176-4eee-a5e6-821b8fe64598@56g2000hsm.googlegroups.com>


On Jul 11, 1:07 pm, Mladen Gogala <mgog..._at_yahoo.com> wrote:
> On Fri, 11 Jul 2008 04:42:20 -0700, Charles Hooper wrote:
> > SELECT DISTINCT
> >   COLX,
> >   COLY,
> >   COLZ
> > FROM
> >   (SELECT
> >     COLX,
> >     COLY OLD_COLY,
> >     MAX(COLY) OVER (PARTITION BY COLX) COLY, MAX(COLZ) OVER (PARTITION
> >     BY COLX, COLY) COLZ
> >   FROM
> >     T1)
> > WHERE
> >   OLD_COLY=COLY;
>
> And, if T1 has few thousand rows, it's going to be full table scan,
> window sort and sort distinct, in that order. In other words, the
> resource consumption will just explode. As this is obviously a report,
> materialized view looks like the only reasonable solution. This is a
> sort of "mad max" query.
>
> --http://mgogala.freehostia.com

In the interest of curiosity, a test set up with 260,000 rows on a table with 26 columns:
CREATE TABLE T1(
  COLX VARCHAR2(5),

  COLY NUMBER(5),
  COLZ NUMBER(5),
  COLA NUMBER(5),
  COLB NUMBER(5),
  COLC NUMBER(5),
  COLD NUMBER(5),
  COLE NUMBER(5),
  COLF NUMBER(5),
  COLG NUMBER(5),
  COLH NUMBER(5),
  COLI NUMBER(5),
  COLJ NUMBER(5),
  COLK NUMBER(5),
  COLL NUMBER(5),
  COLM NUMBER(5),
  COLN NUMBER(5),
  COLO NUMBER(5),
  COLP NUMBER(5),
  COLQ NUMBER(5),
  COLR NUMBER(5),
  COLS NUMBER(5),
  COLT NUMBER(5),
  COLU NUMBER(5),
  COLV NUMBER(5),
  COLW NUMBER(5));

INSERT INTO
  T1
SELECT
  COL.COLX,
  MOD(ROWNUM,1000)+1 COLY,

  ABS(ROUND(SIN(CNT.RN/180)*100,0)) COLZ,
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100)),
  ROUND(DBMS_RANDOM.VALUE(1,100))

FROM
  (SELECT
    ROWNUM RN
  FROM
    DUAL
  CONNECT BY
    LEVEL<=10000) CNT,
  (SELECT
    CHR(ROWNUM+64) COLX,
    ROWNUM C
  FROM
    DUAL
  CONNECT BY
    LEVEL<=26) COL;

260000 rows created.

COMMIT; SELECT

  COLX,
  COLY,
  COLZ,

  COLA
FROM
  T1
WHERE
  ROWNUM<=200;

COLX COLY COLZ COLA
----- ---------- ---------- ----------

K             90          2         14
L             91          2         85
M             92          2         79
N             93          2         82
O             94          2          3
P             95          2         83
...
Z            287          6         64
A            288          7         79
B            289          7         61

EXEC DBMS_STATS.GATHER_TABLE_STATS(OWNNAME=>USER,TABNAME=>'T1'); Now that the table has been created, and statistics gathered, we start the experiments. SQL statement, DBMS_XPLAN with all stats last, and the row source execution plan from a 10046 trace (Maxim's SQL statement is fast!):
SELECT DISTINCT
  COLX,
  COLY,
  COLZ
FROM
  (SELECT
    COLX,
    COLY OLD_COLY,
    MAX(COLY) OVER (PARTITION BY COLX) COLY,     MAX(COLZ) OVER (PARTITION BY COLX, COLY) COLZ   FROM
    T1)
WHERE
  OLD_COLY=COLY; COLX COLY COLZ
----- ---------- ----------

I           1000        100
J            999        100
T            999         99
U           1000         99
B            999        100
F            999        100
P            999        100
R            999        100
H            999        100
N            999        100
Q           1000        100
E           1000        100
G           1000        100
M           1000        100
O           1000        100
X            999        100
K           1000        100
W           1000        100
L            999        100
Y           1000        100
D            999        100
V            999        100
Z            999        100
A           1000        100
C           1000        100
S           1000        100

---------------------------------------------------------------------------------------------------------------------------
| Id  | Operation            | Name | Starts | E-Rows | A-Rows |   A-
Time | Buffers | Reads | OMem | 1Mem | Used-Mem |
|   1 |  HASH UNIQUE         |      |      1 |     26 |     26 |
00:00:01.51 |    3022 |   2899 |       |       |          |
|*  2 |   VIEW               |      |      1 |    264K|    520 |
00:00:01.50 |    3022 |   2899 |       |       |          |
|   3 |    WINDOW SORT       |      |      1 |    264K|    260K|
00:00:01.33 |    3022 |   2899 |  8628K|  1150K| 7669K (0)|
|   4 |     TABLE ACCESS FULL| T1   |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
---------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):


   2 - filter("OLD_COLY"="COLY")

1.421 Seconds of CPU time
FETCH
#8:c=1421875,e=1505456,p=2899,cr=3022,cu=0,mis=0,r=26,dep=0,og=1,tim=4406046677 STAT #8 id=1 cnt=26 pid=0 pos=1 obj=0 op='HASH UNIQUE (cr=3022 pr=2899 pw=0 time=1505477 us)'
STAT #8 id=2 cnt=520 pid=1 pos=1 obj=0 op='VIEW (cr=3022 pr=2899 pw=0 time=1502946 us)'

STAT #8 id=3 cnt=260000 pid=2 pos=1 obj=0 op='WINDOW SORT (cr=3022
pr=2899 pw=0 time=1334835 us)'
STAT #8 id=4 cnt=260000 pid=3 pos=1 obj=54782 op='TABLE ACCESS FULL T1
(cr=3022 pr=2899 pw=0 time=260081 us)'

SELECT
  COLX,
  COLY,
  COLZ
FROM
  (SELECT
    COLX,
    COLY OLD_COLY,
    MAX(COLY) OVER (PARTITION BY COLX) COLY,     MAX(COLZ) OVER (PARTITION BY COLX, COLY) COLZ   FROM
    T1)
WHERE
  OLD_COLY=COLY
GROUP BY
  COLX,
  COLY,
  COLZ;
| Id  | Operation            | Name | Starts | E-Rows | A-Rows |   A-
Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
---------------------------------------------------------------------------------------------------------------------------
|   1 |  HASH GROUP BY       |      |      1 |     26 |     26 |
00:00:01.56 |    3022 |   2899 |       |       |          |
|*  2 |   VIEW               |      |      1 |    264K|    520 |
00:00:01.56 |    3022 |   2899 |       |       |          |
|   3 |    WINDOW SORT       |      |      1 |    264K|    260K|
00:00:01.35 |    3022 |   2899 |  8628K|  1150K| 7669K (0)|
|   4 |     TABLE ACCESS FULL| T1   |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
---------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):


   2 - filter("OLD_COLY"="COLY")

1.515 Seconds of CPU Time
FETCH
#13:c=1515625,e=1558810,p=2899,cr=3022,cu=0,mis=0,r=26,dep=0,og=1,tim=4603512997

STAT #13 id=1 cnt=26 pid=0 pos=1 obj=0 op='HASH GROUP BY (cr=3022
pr=2899 pw=0 time=1558837 us)'
STAT #13 id=2 cnt=520 pid=1 pos=1 obj=0 op='VIEW  (cr=3022 pr=2899
pw=0 time=1556393 us)'
STAT #13 id=3 cnt=260000 pid=2 pos=1 obj=0 op='WINDOW SORT (cr=3022
pr=2899 pw=0 time=1351194 us)'
STAT #13 id=4 cnt=260000 pid=3 pos=1 obj=54782 op='TABLE ACCESS FULL
T1 (cr=3022 pr=2899 pw=0 time=260089 us)'

SELECT ColX, ColY, ColZ
FROM ( select ColX, ColY, ColZ, rank () over (partition by ColX order by colY desc, colZ desc) my_rank from T1 ) a where a.my_rank=1;
| Id  | Operation                | Name | Starts | E-Rows | A-Rows |
A-Time | Buffers | Reads | OMem | 1Mem | Used-Mem |
|*  1 |  VIEW                    |      |      1 |    264K|     38 |
00:00:02.09 |    3022 |   2899 |       |       |          |
|*  2 |   WINDOW SORT PUSHED RANK|      |      1 |    264K|    260K|
00:00:01.84 |    3022 |   2899 |    12M|  1367K|   11M (0)|
|   3 |    TABLE ACCESS FULL     | T1   |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
-------------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):


   1 - filter("A"."MY_RANK"=1)
   2 - filter(RANK() OVER ( PARTITION BY "COLX" ORDER BY INTERNAL_FUNCTION("COLY") DESC ,INTERNAL_FUNCTION("COLZ")               DESC )<=1)

2.078 Seconds of CPU Time
FETCH
#13:c=2078125,e=2088059,p=2899,cr=3022,cu=0,mis=0,r=38,dep=0,og=1,tim=4783262353 WAIT #13: nam='SQL*Net message from client' ela= 30556 driver id=1413697536 #bytes=1 p3=0 obj#=54782 tim=4783295432 STAT #13 id=1 cnt=38 pid=0 pos=1 obj=0 op='VIEW (cr=3022 pr=2899 pw=0 time=2087814 us)'

STAT #13 id=2 cnt=260000 pid=1 pos=1 obj=0 op='WINDOW SORT PUSHED RANK
(cr=3022 pr=2899 pw=0 time=1837749 us)'
STAT #13 id=3 cnt=260000 pid=2 pos=1 obj=54782 op='TABLE ACCESS FULL
T1 (cr=3022 pr=2899 pw=0 time=260091 us)'

select colx,
max(coly) keep(dense_rank last order by coly,colz) , max(colz) keep(dense_rank last order by coly,colz) from t1
group by colx;

| Id | Operation | Name | Starts | E-Rows | A-Rows | A- Time | Buffers | Reads | OMem | 1Mem | Used-Mem |

| 1 | SORT GROUP BY | | 1 | 26 | 26 | 00:00:00.65 | 3022 | 2899 | 9216 | 9216 | 8192 (0)|
|   2 |   TABLE ACCESS FULL| T1   |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
-------------------------------------------------------------------------------------------------------------------------

0.609 Seconds of CPU Time
FETCH
#13:c=609375,e=651450,p=2899,cr=3022,cu=0,mis=0,r=26,dep=0,og=1,tim=4954930446 WAIT #13: nam='SQL*Net message from client' ela= 5631 driver id=1413697536 #bytes=1 p3=0 obj#=54782 tim=4954936214

STAT #13 id=1 cnt=26 pid=0 pos=1 obj=0 op='SORT GROUP BY (cr=3022
pr=2899 pw=0 time=651409 us)'
STAT #13 id=2 cnt=260000 pid=1 pos=1 obj=54782 op='TABLE ACCESS FULL
T1 (cr=3022 pr=2899 pw=0 time=260118 us)'

SELECT ColX, ColY, ColZ
FROM T1
WHERE (ColX, ColY, ColZ) IN
  (SELECT A.ColX, A.ColY, max(B.ColZ)
   FROM
     (SELECT ColX, max(ColY) ColY
      FROM T1
      GROUP BY ColX) A,
     T1 B

   WHERE A.ColX = B.ColX
   AND A.ColY = B.ColY
   GROUP BY A.ColX, A.ColY);

| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads | OMem | 1Mem | Used-Mem |

|* 1 | HASH JOIN RIGHT SEMI | | 1 | 1 | 38 | 00:00:01.75 | 9066 | 8697 | 1206K| 1206K| 1212K (0)|
|   2 |   VIEW                  | VW_NSO_1 |      1 |     26 |     26 |
00:00:01.13 |    6044 |   5798 |       |       |          |
|   3 |    HASH GROUP BY        |          |      1 |     26 |     26 |
00:00:01.13 |    6044 |   5798 |       |       |          |
|*  4 |     HASH JOIN           |          |      1 |    264 |    520 |
00:00:01.13 |    6044 |   5798 |  1348K|  1348K| 1177K (0)|
|   5 |      VIEW               |          |      1 |     26 |     26 |
00:00:00.52 |    3022 |   2899 |       |       |          |
|   6 |       HASH GROUP BY     |          |      1 |     26 |     26 |
00:00:00.52 |    3022 |   2899 |       |       |          |
|   7 |        TABLE ACCESS FULL| T1       |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
|   8 |      TABLE ACCESS FULL  | T1       |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
|   9 |   TABLE ACCESS FULL     | T1       |      1 |    264K|    260K|
00:00:00.26 |    3022 |   2899 |       |       |          |
----------------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):


   1 - access("COLX"="$nso_col_1" AND "COLY"="$nso_col_2" AND "COLZ"="$nso_col_3")

   4 - access("A"."COLX"="B"."COLX" AND "A"."COLY"="B"."COLY")

1.609 Seconds of CPU Time
FETCH
#13:c=1609375,e=1751476,p=8697,cr=9066,cu=0,mis=0,r=38,dep=0,og=1,tim=40897698

STAT #13 id=1 cnt=38 pid=0 pos=1 obj=0 op='HASH JOIN RIGHT SEMI
(cr=9066 pr=8697 pw=0 time=1751397 us)'
STAT #13 id=2 cnt=26 pid=1 pos=1 obj=0 op='VIEW  VW_NSO_1 (cr=6044
pr=5798 pw=0 time=1132471 us)'
STAT #13 id=3 cnt=26 pid=2 pos=1 obj=0 op='HASH GROUP BY (cr=6044
pr=5798 pw=0 time=1132438 us)'
STAT #13 id=4 cnt=520 pid=3 pos=1 obj=0 op='HASH JOIN  (cr=6044
pr=5798 pw=0 time=1128901 us)'
STAT #13 id=5 cnt=26 pid=4 pos=1 obj=0 op='VIEW (cr=3022 pr=2899 pw=0 time=519125 us)'
STAT #13 id=6 cnt=26 pid=5 pos=1 obj=0 op='HASH GROUP BY (cr=3022
pr=2899 pw=0 time=519095 us)'
STAT #13 id=7 cnt=260000 pid=6 pos=1 obj=54782 op='TABLE ACCESS FULL
T1 (cr=3022 pr=2899 pw=0 time=260092 us)' STAT #13 id=8 cnt=260000 pid=4 pos=2 obj=54782 op='TABLE ACCESS FULL T1 (cr=3022 pr=2899 pw=0 time=260095 us)' STAT #13 id=9 cnt=260000 pid=1 pos=2 obj=54782 op='TABLE ACCESS FULL T1 (cr=3022 pr=2899 pw=0 time=260092 us)'

It might still be a "mad max" query, but it was a fun exercise.

Charles Hooper
IT Manager/Oracle DBA
K&M Machine-Fabricating, Inc. Received on Fri Jul 11 2008 - 17:07:03 CDT

Original text of this message