Home » RDBMS Server » Performance Tuning » Need to improve query performance (Oracle 10g)
Need to improve query performance [message #349657] |
Mon, 22 September 2008 09:47 |
RAJANKK4
Messages: 4 Registered: September 2008
|
Junior Member |
|
|
Hi,
I need to improve the performance of a query. Currently the query is failing due to huge amount of data in tables. Oracle gurus, please help.
SELECT TL_PAYABLE_TIME.EMPLID
, TL_PAYABLE_TIME.EMPL_RCD
, TL_PAYABLE_TIME.DUR
, TL_PAYABLE_TIME.SEQ_NBR
, PERSONAL_DATA.NAME
, JOB.BARG_UNIT
, TL_PAYABLE_TIME.TRC
, TL_TRC.DESCR AS OH_TRC_DESCR
, TL_ERNCD.TL_ERNCD
, TL_PAYABLE_TIME.TL_QUANTITY
, SCH_ASSIGN.SCHEDULE_ID
, SCH_CLND.SCHED_HRS
, TL_PAYABLE_TIME.PAYABLE_STATUS
, TL_PAYABLE_TIME.PAYROLL_REQ_NUM
, TL_PAYABLE_TIME.FROZEN_SW
, TL_PAYABLE_TIME.FROZEN_DATE
, TL_PAYABLE_TIME.PUBLISH_SW
, TL_PAYABLE_TIME.ACTUAL_PUB_DATE
, TL_PAYABLE_TIME.APPRV_PRCS_DTTM
, TL_PAYABLE_TIME.OPRID
, TL_PAYABLE_TIME.ORIG_SEQ_NBR
, TL_PAYABLE_TIME.OVERRIDE_RATE
, TL_PAYABLE_TIME.RECORD_ONLY_ADJ
, TL_PAYABLE_TIME.LBR_DIST_AMT
, TL_PAYABLE_TIME.EST_GROSS
, TL_PAYABLE_TIME.DILUTED_GROSS
, TL_PAYABLE_TIME.SOURCE_IND
, TL_PAYABLE_TIME.TASKGROUP
, TL_PAYABLE_TIME.BILLABLE_IND
, TL_PAYABLE_TIME.ACCT_CD
, TL_PAYABLE_TIME.ACCOUNT
, TL_PAYABLE_TIME.DEPTID_CF AS OH_CF_DEPTID
, TL_PAYABLE_TIME.PROJECT_ID
, TL_PAYABLE_TIME.FUND_CODE
, TL_PAYABLE_TIME.PROGRAM_CODE
, TL_PAYABLE_TIME.CLASS_FLD
, TL_PAYABLE_TIME.CHARTFIELD1
, TL_PAYABLE_TIME.CHARTFIELD2
, TL_PAYABLE_TIME.CHARTFIELD3
, TL_PAYABLE_TIME.BUDGET_REF
, JOB.COMPANY
, TL_PAYABLE_TIME.BUSINESS_UNIT
, TL_PAYABLE_TIME.PRODUCT
, TL_PAYABLE_TIME.CUSTOMER
, TL_PAYABLE_TIME.BUSINESS_UNIT_PC
, TL_PAYABLE_TIME.BUSINESS_UNIT_PF
, TL_PAYABLE_TIME.SETID_ACTIVITY
, TL_PAYABLE_TIME.ACTIVITY_ID
, TL_PAYABLE_TIME.RESOURCE_TYPE
, TL_PAYABLE_TIME.SETID_RESOURCE
, TL_PAYABLE_TIME.RESOURCE_CATEGORY
, TL_PAYABLE_TIME.RESOURCE_SUB_CAT
, TL_PAYABLE_TIME.TASK
, TL_PAYABLE_TIME.USER_FIELD_1
, TL_PAYABLE_TIME.USER_FIELD_2
, TL_PAYABLE_TIME.USER_FIELD_3
, TL_PAYABLE_TIME.USER_FIELD_4
, TL_PAYABLE_TIME.USER_FIELD_5
, PERS_NID.NATIONAL_ID
, JOB.SETID_DEPT AS SETID
, JOB.DEPTID
, DEPT.DESCR AS OH_DEPT_DESCR
, JOB.POSITION_NBR
, POSITION_DATA.DESCR AS OH_POSITION_DESCR
, JOB.JOBCODE
, JOBCODE.DESCR AS OH_JOBCODE_DESCR
, JOB.LOCATION
, LOCATION.DESCR AS OH_LOCATION_DESCR
, JOB.UNION_CD
, UNION_TBL.DESCR AS OH_UNION_DESCR
FROM PS_TL_PAYABLE_TIME TL_PAYABLE_TIME LEFT OUTER JOIN PS_OH_S_TL_ERNCD TL_ERNCD ON TL_ERNCD.TRC = TL_PAYABLE_TIME.TRC
AND TL_ERNCD.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM LEFT OUTER JOIN PS_OH_S_SCH_ASSIGN SCH_ASSIGN ON SCH_ASSIGN.EMPLID = TL_PAYABLE_TIME.EMPLID
AND SCH_ASSIGN.EMPL_RCD = TL_PAYABLE_TIME.EMPL_RCD LEFT OUTER JOIN PS_OH_S_SCH_CLD_DL SCH_CLND ON SCH_CLND.SETID = SCH_ASSIGN.SETID
AND SCH_CLND.OH_SCH_ADHOC_IND = SCH_ASSIGN.OH_SCH_ADHOC_IND
AND SCH_CLND.SCHEDULE_ID = SCH_ASSIGN.SCHEDULE_ID
AND SCH_CLND.OH_ROTATION_ID = SCH_ASSIGN.OH_ROTATION_ID
AND SCH_CLND.DUR = TL_PAYABLE_TIME.DUR
, PS_JOB JOB LEFT OUTER JOIN PS_S_POSITION_DATA POSITION_DATA ON POSITION_DATA.POSITION_NBR = JOB.POSITION_NBR LEFT OUTER JOIN PS_DEPT_TBL DEPT ON DEPT.DEPTID = JOB.DEPTID
AND DEPT.SETID = JOB.SETID_DEPT LEFT OUTER JOIN PS_S_UNION_TBL UNION_TBL ON UNION_TBL.UNION_CD = JOB.UNION_CD
, PS_PERSONAL_DATA PERSONAL_DATA
, PS_PERS_NID PERS_NID
, PS_TL_TRC_TBL TL_TRC
, PS_LOCATION_TBL LOCATION
, PS_JOBCODE_TBL JOBCODE
WHERE TL_PAYABLE_TIME.EMPLID = PERSONAL_DATA.EMPLID
AND TL_PAYABLE_TIME.EMPLID = PERS_NID.EMPLID
AND PERS_NID.COUNTRY = 'USA'
AND PERS_NID.NATIONAL_ID_TYPE = 'PR'
AND TL_PAYABLE_TIME.EMPLID = JOB.EMPLID
AND TL_PAYABLE_TIME.EMPL_RCD = JOB.EMPL_RCD
AND LOCATION.SETID = JOB.SETID_LOCATION
AND LOCATION.LOCATION = JOB.LOCATION
AND JOBCODE.JOBCODE = JOB.JOBCODE
AND JOBCODE.SETID = JOB.SETID_JOBCODE
AND TL_TRC.TRC = TL_PAYABLE_TIME.TRC
AND TL_TRC.EFFDT = (
SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC.TRC = TL_TRC_ED.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
AND ( TL_ERNCD.EFFDT = (
SELECT MAX(TL_ERNCD_ED.EFFDT)
FROM PS_OH_S_TL_ERNCD TL_ERNCD_ED
WHERE TL_ERNCD.TRC = TL_ERNCD_ED.TRC
AND TL_ERNCD.PAY_SYSTEM = TL_ERNCD_ED.PAY_SYSTEM
AND TL_ERNCD_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
OR TL_ERNCD.EFFDT IS NULL )
AND ( SCH_ASSIGN.EFFDT = (
SELECT MAX(SCH_ASSIGN_ED.EFFDT)
FROM PS_OH_S_SCH_ASSIGN SCH_ASSIGN_ED
WHERE SCH_ASSIGN.EMPLID = SCH_ASSIGN_ED.EMPLID
AND SCH_ASSIGN.EMPL_RCD = SCH_ASSIGN_ED.EMPL_RCD
AND SCH_ASSIGN_ED.EFFDT <= TL_PAYABLE_TIME.DUR
AND SCH_ASSIGN_ED.END_EFFDT >= TL_PAYABLE_TIME.DUR )
OR SCH_ASSIGN.EFFDT IS NULL )
AND ( SCH_CLND.EFFDT = (
SELECT MAX(SCH_CLND_ED.EFFDT)
FROM PS_OH_S_SCH_CLD_DL SCH_CLND_ED
WHERE SCH_CLND.SETID = SCH_CLND_ED.SETID
AND SCH_CLND.OH_SCH_ADHOC_IND = SCH_CLND_ED.OH_SCH_ADHOC_IND
AND SCH_CLND.SCHEDULE_ID = SCH_CLND_ED.SCHEDULE_ID
AND SCH_CLND.OH_ROTATION_ID = SCH_CLND_ED.OH_ROTATION_ID
AND SCH_CLND.DUR = SCH_CLND_ED.DUR
AND SCH_CLND.OH_SHIFTNUM = SCH_CLND_ED.OH_SHIFTNUM
AND SCH_CLND_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
OR SCH_CLND.EFFDT IS NULL )
AND JOB.EFFDT = (
SELECT MAX(JOB_ED.EFFDT)
FROM PS_JOB JOB_ED
WHERE JOB.EMPLID = JOB_ED.EMPLID
AND JOB.EMPL_RCD = JOB_ED.EMPL_RCD
AND JOB_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
AND JOB.EFFSEQ = (
SELECT MAX(JOB_ES.EFFSEQ)
FROM PS_JOB JOB_ES
WHERE JOB.EMPLID = JOB_ES.EMPLID
AND JOB.EMPL_RCD = JOB_ES.EMPL_RCD
AND JOB.EFFDT = JOB_ES.EFFDT )
AND LOCATION.EFFDT = (
SELECT MAX(LOCATION_ED.EFFDT)
FROM PS_LOCATION_TBL LOCATION_ED
WHERE LOCATION.SETID = LOCATION_ED.SETID
AND LOCATION.LOCATION = LOCATION_ED.LOCATION
AND LOCATION_ED.EFFDT <= JOB.EFFDT )
AND JOBCODE.EFFDT = (
SELECT MAX(JOBCODE_ED.EFFDT)
FROM PS_JOBCODE_TBL JOBCODE_ED
WHERE JOBCODE.SETID = JOBCODE_ED.SETID
AND JOBCODE.JOBCODE = JOBCODE_ED.JOBCODE
AND JOBCODE_ED.EFFDT <= JOB.EFFDT )
AND ( POSITION_DATA.EFFDT = (
SELECT MAX(POSITION_DATA_ED.EFFDT)
FROM PS_S_POSITION_DATA POSITION_DATA_ED
WHERE POSITION_DATA.POSITION_NBR = POSITION_DATA_ED.POSITION_NBR
AND POSITION_DATA_ED.EFFDT <= JOB.EFFDT )
OR POSITION_DATA.EFFDT IS NULL )
AND ( DEPT.EFFDT = (
SELECT MAX(DEPT_ED.EFFDT)
FROM PS_DEPT_TBL DEPT_ED
WHERE DEPT.DEPTID = DEPT_ED.DEPTID
AND DEPT.SETID = DEPT_ED.SETID
AND DEPT_ED.EFFDT <= JOB.EFFDT )
OR DEPT.EFFDT IS NULL )
AND ( UNION_TBL.EFFDT = (
SELECT MAX(UNION_TBL_ED.EFFDT)
FROM PS_S_UNION_TBL UNION_TBL_ED
WHERE UNION_TBL.UNION_CD = UNION_TBL_ED.UNION_CD
AND UNION_TBL_ED.EFFDT <= JOB.EFFDT )
OR UNION_TBL.EFFDT IS NULL )
AND TL_PAYABLE_TIME.DUR >= TO_DATE('2008-01-01' , 'YYYY-MM-DD')
|
|
|
|
Re: Need to improve query performance [message #349669 is a reply to message #349657] |
Mon, 22 September 2008 10:36 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
supah
1) post execution plan
2) post tables rowcount
3) do something with you query, that it can be readable, use code tags to save formating in your post.
4) write down execution time
5) what oracle version you using, because in oracle such sub selects in where clause
AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC.TRC = TL_TRC_ED.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
refering to 2 tables are wery slow, ms sql server deals with them in no time
maybe you can write smaller size select in which will be that sub select I mentioned, so we can tune small size select, and know what to do next.
At the moment tune that large select without knowing tables and logic is really hard.
I have few ideas how to optimize that query, but you need to do all that things I mentioned.
[Updated on: Mon, 22 September 2008 11:18] Report message to a moderator
|
|
|
Re: Need to improve query performance [message #349690 is a reply to message #349669] |
Mon, 22 September 2008 12:51 |
RAJANKK4
Messages: 4 Registered: September 2008
|
Junior Member |
|
|
Hi Kriptas,
I have put the Execution plan along with execution time in the attached file so that it is readable. The row counts are as follows:
PS_OH_S_SCH_CLD_DL: 277,380
PS_DEPT_TBL: 16,907
PS_LOCATION_TBL: 13,909
PS_JOBCODE_TBL: 11,171
PS_JOB: 650,347
PS_OH_S_TL_ERNCD: 234
PS_OH_S_SCH_ASSIGN: 24,600
PS_TL_PAYABLE_TIME: 40,730,349
PS_PERS_NID: 101,866
PS_PERSONAL_DATA: 101,854
PS_S_UNION_TBL: 22
PS_TL_TRC_TBL: 268
PS_S_POSITION_DATA: 177,736
I use Oracle version 10.2.0.
|
|
|
|
|
|
Re: Need to improve query performance [message #349739 is a reply to message #349657] |
Mon, 22 September 2008 22:49 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
run this query
CREATE TABLE TMP_XXX_1 as
SELECT TL_TRC_ED.TRC TRC1, MAX(TL_TRC_ED.EFFDT) MAX1
FROM PS_TL_TRC_TBL TL_TRC_ED
INNER JOIN PS_TL_PAYABLE_TIME TL_PAYABLE_TIME ON TL_TRC_ED.TRC = TL_PAYABLE_TIME.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR
GROUP BY TL_TRC_ED.TRC;
TRUNCATE TABLE TMP_XXX_1;
DROP TABLE TMP_XXX_1;
show me execution plan
ant write down the execution time
|
|
|
Re: Need to improve query performance [message #349798 is a reply to message #349669] |
Tue, 23 September 2008 02:55 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
You are a big fan of the unsupported assertion, aren't you @Kriptas.
This is a beautiful example:
Quote: | 5) what oracle version you using, because in oracle such sub selects in where clause
AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC.TRC = TL_TRC_ED.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
refering to 2 tables are wery slow, ms sql server deals with them in no time
|
I have never found this to be the case - sub selects are not very slow unless you've forgotten to index them, in which case no Db on the planet can do anything to help you.
Does it really strike you as believable that with all the effort Oracle have put into the CB Optimizer, they'd ignore something as fundamental as sub-query performance?
I put it to you that it seems slower in Orcale because in Sql Server you would never dream of running a query this complex - Instead you'd start by creating and populating a set of temporary tables for the sub queries, and run the final query against them.
Thus while the final execution of the query seems to take less time, this is only true if you ignore all the time it took to set things up in the first place.
IT is certainly the ubqueries that are going to be the problem here, but that's because you're running 11 seperate subqueries.
Are they all indexed? Ideally, you'd want an index on each of the sub-query tables that contained the join columns used, adnthe field returned - that way you could get away with an Index Range scan for each one, with no associated table access.
|
|
|
Re: Need to improve query performance [message #349810 is a reply to message #349657] |
Tue, 23 September 2008 03:42 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
JRowbottom,
I have tested such query performance, and can say, that oracle have lots of limitation on that query type (just for that type of subselect which I wrote down in my post)
MS SQL SERVER deals with such queries about 100 times faster.
oracle 10g can't have such subselect depending join if it is outer join and etc.
and other thing, i have made more complex queries in ms sql and they ran fine
and if I load some temporary tables to run some queries, i always calculate all time of query with temporary tables load times.. and that time usualy a lot les then oracle execution time with huge subqueries.
so, if you do not have what to compare then the turtle is fasters animal in the world.
[Updated on: Tue, 23 September 2008 04:41] Report message to a moderator
|
|
|
Re: Need to improve query performance [message #349869 is a reply to message #349810] |
Tue, 23 September 2008 06:29 |
rajatratewal
Messages: 507 Registered: March 2008 Location: INDIA
|
Senior Member |
|
|
Quote: |
I have tested such query performance, and can say, that oracle have lots of limitation on that query type (just for that type of subselect which I wrote down in my post)
MS SQL SERVER deals with such queries about 100 times faster.
|
Kriptas No two databases are the same.
The thing applied to one cannot be same for another because they
are internally different.
You can only compare them if you know the internals how they interpret the query.
Regards,
Rajat
|
|
|
Re: Need to improve query performance [message #349872 is a reply to message #349810] |
Tue, 23 September 2008 06:33 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
My mileage on this matter seems to vary significantly from yours.
I ran this little test on my out of the box 10g database, running on my laptop (1.8Ghz, 1Gb Ram:drop table test_0105;
drop table test_0106;
create table test_0105 (col_1 number, col_2 number);
create table test_0106 (col_1 number,col_3 number);
insert into test_0105 select level,mod(level,1000) from dual connect by level <=1000000;
insert into test_0106 select level, mod(level,100) from dual connect by level <= 1000000;
create index test_0106_idx on test_0106(col_3,col_1);
select count(*)
from test_0105 t5
where t5.col_1 <= (select max(col_1)
from test_0106 t6
where t6.col_3 = t5.col_2);
It took about 1.4 seconds the first time, and an average of 0.8 seconds thereafter.
I then repeated the test on my out of the box SqlServer Express 2005 database (same laptop)
Table Definitions
CREATE TABLE [dbo].[Test_1](
[col_1] [numeric](18, 0) NULL,
[col_2] [numeric](18, 0) NULL
) ON [PRIMARY];
CREATE TABLE [dbo].[Test_2](
[col_1] [numeric](18, 0) NULL,
[col_3] [numeric](18, 0) NULL
) ON [PRIMARY];
CREATE NONCLUSTERED INDEX [test_2_idx] ON [dbo].[Test_2]
(
[col_3] ASC,
[col_1] ASC
)WITH (PAD_INDEX = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF) ON [PRIMARY];
create table numbers (num numeric(18,0));
insert into numbers(num)
select rank() over (order by object_id,column_id)
from sys.all_columns;
select (n1.num-1)*1000+(n2.num-1) totnum
from numbers n1, numbers n2
where n1.num<= 1000
and n2.num<= 1000
order by totnum;
delete test_1;
delete test_2;
insert into test_1 (col_1,col_2)
select (n1.num-1)*1000+(n2.num-1),n1.num-1
from numbers n1, numbers n2
where n1.num<= 1000
and n2.num<= 1000;
insert into test_2 (col_1,col_3)
select (n1.num-1)*1000+(n2.num-1),n2.num-1
from numbers n1, numbers n2
where n1.num<= 1000
and n2.num<= 1000;
select count(*)
from test_1 t1
where t1.col_1 <= (select max(col_1)
from test_2 t2
where t2.col_3 = t1.col_2);
The Sql Server implemebntation, that you would have us believe should take in the order of 10 milliseconds.
It took 28 seconds the first time I rand, and an average of 7-8 seconds for subsequent runs.
I put it to you that you have some work to do if you wish to defend your claim that SqlServer will perform queries like this in 1% of the time that an Oracle database can.
|
|
|
Re: Need to improve query performance [message #349878 is a reply to message #349657] |
Tue, 23 September 2008 06:45 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
I do not say, that 2 RDBMS must be the same.
I just say, that MS SQL SERVER with that kind of sub query deals normaly, ORACLE have BIG trouble with that sub select.
for example :
create table tmp_01 as
select accnt_code, crnc_rate
from accounts
inner join crnc_rates on accnt_crnc=crnc_crnc
and crnc_date=(select max(crnc_date) from crnc_rates
where crnc_rates=accnt_crnc and crnc_date<=accnt_open_dt)
table accounts rowcount:4 000 000
Table crnc_rates rowcount 150 000, contains currency rate changes and currency value at that change time.
all statistics are gathered
by that query I need to get account currency rate at account open day.
crnc_rates is ordered by crnc_crnc asc, crnc_date desc and indexed with index on that 2 columns (crnc_crnc asc, crnc_date desc)
that query in ms sql runs about 10-15seconds.
the same query on oracle 10G already runs 15 mins, and still do not finished.
tables on ms sql server, and oracle 10g are the same 100%
indexed with the same indexes
in the provided select from @RAJANKK4 is few sub selects simmilar to that which I posted there.
I think that problem is in that sub selects.
oh, while I wrote this post, oracle finished my select, and it run time was 1566,578 seconds!!!! (compare to 15seconds in ms sql server, and oracle server hardware is 2-5 times powerful then ms sql server)
|
|
|
Re: Need to improve query performance [message #349892 is a reply to message #349657] |
Tue, 23 September 2008 07:11 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
dear @JRowbottom,
thank you for posting that example.
Using your script i created tables indexed one of them and runned query on oracle,
it run time first time 2 sec, and other times 1,1-1,4 sec.
after that, i pumped data to ms sql server from oracle tables created by your script (do not indexed tables)
and runned query.
run time was 00:00:00, how meny miliseconds it taked i do not know....
so it relly faster than oracle...
and i can compare sql server express, because it is whole different product line, and it may have super weak optimiser.
i use oracle enterprise, and sql server standart editions.
and @JRowbottom, then i said, that oracle do have some troubles with some subqueries, i mentioned not your posted subquer type.
look at my posted subquery :
create table tmp_01 as
select accnt_code, crnc_rate
from accounts
inner join crnc_rates on accnt_crnc=crnc_crnc
and crnc_date=(select max(crnc_date) from crnc_rates
where crnc_rates=accnt_crnc and crnc_date<=accnt_open_dt)
if i delete where clause statement "and crnc_date<=accnt_open_dt" in subselect, query will be fast. ir run time will be 20seconds. but if that statement present, query run time will be 1500 seconds.
Try it and you will see.
and by the way... oracle cant join thas sub select with LEFT OUTER JOIN.
[Updated on: Tue, 23 September 2008 08:07] Report message to a moderator
|
|
|
|
Re: Need to improve query performance [message #349935 is a reply to message #349657] |
Tue, 23 September 2008 09:32 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
i do not think so, update is slow operation, and that update you post there must be wery slow.
if you want update one table with data from other table, and tables are big, use MERGE statement, it updates tables more quickly.
|
|
|
|
Re: Need to improve query performance [message #349942 is a reply to message #349892] |
Tue, 23 September 2008 09:49 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
In general:
Don't post annecdotes - post test cases. as the phrase goes: the plural of annecdote is not data.
Generate a set of test data that reproduces this problem that you are having, and someone here will show you where you have gone wrong.
Re the test case:
You seem to be saying that you've got a machine that will run the SQLServer test case at least an order of magnitude faster than mine, and yet it can't match my performance of the Oracle test.
I remember from some of your earlier posts that your production Oracle database seemed to have some major configuration problems - it was slower than my laptop then, if memory serves.
Would I be right in assuming that you are still having these problems and that this is a symptom of them?
|
|
|
Re: Need to improve query performance [message #349950 is a reply to message #349935] |
Tue, 23 September 2008 10:17 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
Quote: | if you want update one table with data from other table, and tables are big, use MERGE statement, it updates tables more quickly.
|
@Kriptas: If you're going to make blanket pronouncements about whether one thing is faster tahn another, you might want to provide a little evidence
As you'll see from the results below, there's nothing in it.:create table test_0109 (col_1 number, col_2 number);
create table test_0110 (col_1 number, col_2 number);
insert into test_0109 select level,null from dual connect by level <=10000;
insert into test_0110 select level, mod(level,100) from dual connect by level <= 10000;
create index test_0110_idx on test_0110(col_1,col_2);
declare
v_iter pls_integer := 10;
v_time pls_integer;
begin
v_time := dbms_utility.get_time;
for i in 1..v_iter loop
update test_0109 t9
set col_2 = (select col_2
from test_0110 t0
where t9.col_1 = t0.col_1);
end loop;
dbms_output.put_line('Update '||to_char(dbms_utility.get_time-v_time));
v_time := dbms_utility.get_time;
for i in 1..v_iter loop
merge into test_0109 t9
using test_0110 t0
on (t0.col_1 = t9.col_1)
when matched then update set t9.col_2 = t0.col_2;
end loop;
dbms_output.put_line('Merge '||to_char(dbms_utility.get_time-v_time));
end;
/
Results
set serveroutput on
Update 753
Merge 727
Update 525
Merge 551
Update 365
Merge 297
Update 461
Merge 652
Even repeating the tests with 10x as many rows shows no conclusive advantage one way or the other:Update 3323
Merge 2750
Update 3002
Merge 3365
I'll grant you that it performs better than any other Update/Insert combination when you are doing conditional Upserts, but when you're just updating, it's just the same as Update.
|
|
|
Re: Need to improve query performance [message #349972 is a reply to message #349657] |
Tue, 23 September 2008 11:25 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
@JRowbottom, i can say that you are good oponent and it is pleasure to have this conversation with you. You provide real examples, but examples can be different
so I prepare example from my situation, where oracle performanse is wery poor.
drop table gt_test_0105;
drop table gt_test_0106;
create table gt_test_0105 (col_1 number, col_2 number, date_1 date);
create table gt_test_0106 (col_1 number,col_3 number, date_2 date);
insert into gt_test_0105 select level,mod(level,10), sysdate date_1 from dual connect by level <=1000000;
insert into gt_test_0106 select level, mod(level,10), sysdate-(1+ABS(MOD(dbms_random.random,1000))) date_2 from dual connect by level <= 100000;
create index gt_test_0106_idx on gt_test_0106(col_3 , date_2 );
select count(*)
from gt_test_0105 t5
inner join gt_test_0106 t6 on t6.col_3=t5.col_2
and date_2=(select max(t7.date_2)
from gt_test_0106 t7
where t7.col_3= t5.col_2
and t7.date_2<=t5.date_1)
it returned for me count ~10 000 000
ms sql server standart edition query runs 1 second (tables and gt_test_0105, gt_test_0106 has no indexes)
and about the same amount of time when table gt_test_0106 is indexes with the same index like in oracle. but execution plans are different.
in oracle... it will take a while i'm still wayting for it ends (already about 5 minutes and i think it will take about 20 minutes or more)
you can test it...
regarding servers.... i do not know why, but i think oracle server do not perform @100%, but now he running litle bit faster.
but there is still HUGE problem selecting rows from table where is big index clustering factor.
and about merge statement, i'll test your axamples and provide my test cases.
thanx again for good conversation
|
|
|
Re: Need to improve query performance [message #349995 is a reply to message #349657] |
Tue, 23 September 2008 12:33 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
@JRowbottom, and about merge statement
it is wrong use loop for index based query, after 1 run index will be in cached, and other iterations will be fast, so is you using loop, flush buffer please
there is remake of your provided script, but you need to run it whole only one time, and again drop tables, load data to tables and reindex
drop table gt_test_0109
drop table gt_test_0110
create table gt_test_0109 (col_1 number, col_2 number);
create table gt_test_0110 (col_1 number, col_2 number);
insert into gt_test_0109 select level,null from dual connect by level <=100000;
insert into gt_test_0110 select level, mod(level,100) from dual connect by level <= 100000;
create index test_0110_idx on gt_test_0110(col_1,col_2);
declare
v_iter pls_integer := 1;
v_time pls_integer;
begin
v_time := dbms_utility.get_time;
for i in 1..v_iter loop
update gt_test_0109 t9
set col_2 = (select col_2
from gt_test_0110 t0
where t9.col_1 = t0.col_1)
--where col_2 is null
;
end loop;
dbms_output.put_line('Update '||to_char(dbms_utility.get_time-v_time));
v_time := dbms_utility.get_time;
for i in 1..v_iter loop
merge into gt_test_0109 t9
using gt_test_0110 t0
on (t0.col_1 = t9.col_1 /*and t9.col_2 is null*/)
when matched then update set t9.col_2 = t0.col_2;
end loop;
dbms_output.put_line('Merge '||to_char(dbms_utility.get_time-v_time));
end;
in the scrip I use 10 times larger tables, and only 1 loop, so indexes are not cached and etc.
results are
I do not think that there is lots of queries where you need to update the same table with the same values 10 times... always need to do job at 1 time so lets measure only 1 loop.
and results shows that merge is 10 times faster.
Merge uses HASH JOIN of 2 tables and then updates one of them (it is like update view, but it has less limitations)
in simple update there used nested loops to find row which you need to update.. so in merge you do not need index, in simple update you need index.
oh by the way, last query with count( and max in sub select just finished in oracle, it run time 4399.516 seconds (ms sql server standart done in 1 second)
and I droped index test_0110_idx, and run update script...
it runs now for 10 mins, and I think there will be huge difference between simple update and merge update (~100 times or more merge will be faster)
[Updated on: Tue, 23 September 2008 12:36] Report message to a moderator
|
|
|
Re: Need to improve query performance [message #350000 is a reply to message #349657] |
Tue, 23 September 2008 13:26 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
@RAJANKK4
did you run query which i asked to run and show execution plan and write down run time?
because with updates you do not get goos performance results.
there is problem with sub selects, and we need to find aproach...
please write down short select (select few rows)
with few joins and one join containing subselect
for example one with this join predicate:
AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC.TRC = TL_TRC_ED.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
and it execution plan, and run time
then we will tune that small select, and will know what to do next.
I do not have your data and can't tune query without your help
|
|
|
Re: Need to improve query performance [message #350203 is a reply to message #350000] |
Wed, 24 September 2008 06:32 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
Re Update / Merge
Thank you for the compliment.
You have overlooked two things in your single pass Update/Merge example -
1) The table gets cached, as well as the index
2) Merge uses the index too, and gets to benefit if the index is cached.
Thus whichever proces goes first in the loop gets the handicap of doing the most physical reads the first time it runs.
If you re-run your test and put the Merge and the update tests the other way round, you'll see what a difference it makes.
When I did this test in these two fashions, I got these results:
Merge First
Merge 1239
Update 422
Merge 1244
Update 433
Update First
Update 1793
Merge 319
Update 1399
Merge 325
|
|
|
Re: Need to improve query performance [message #350205 is a reply to message #349972] |
Wed, 24 September 2008 06:42 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
Re Subqueries:
Here's a Subquery test case. now, the figures aren't conclusive, as my laptop is groaning under the weight of applications, and (being only a single disk) I'm getting all sorts of I/O wait problems with the Oracle Db:
I modeled this on the problem you described as best as I could.
I cut the number of rows down to 4000, because I've only got a limited amount of patience.
Results
Run 1 Run 2
Oracle 136s 146s
Sql Server 495s 511s
Those performance figures you quite for the SQL Server Db are staggering.
Questions:
1) What sort of box is it running on?
2) Is the Oracle box running on the same box
3) What execution plan is it using?
4) What happens if you replace the count(*) with count(col_1||col_4)?
Oracle Code
create table test_0107 (col_1 number, col_2 number,col_3 date);
create table test_0108 (col_2 number, col_3 date, col_4 number);
insert into test_0107 select level, mod(level,10)+1,sysdate from dual connect by level <=4000;
insert into test_0108 select l1.lvl,trunc(sysdate)+10-l2.lvl, l1.lvl + l2.lvl
from (select level lvl from dual connect by level <= 10) l1
,(select level lvl from dual connect by level <= 75000) l2;
create index test_0108_idx2 on test_0108(col_3 desc,col_2 ,col_4);
create index test_0108_idx on test_0108(col_2,col_3 desc,col_4);
begin
dbms_stats.gather_table_stats(ownname => null
,tabname => 'TEST_0107'
,method_opt => 'FOR ALL COLUMNS'
,cascade => true);
dbms_stats.gather_table_stats(ownname => null
,tabname => 'TEST_0108'
,method_opt => 'FOR ALL COLUMNS'
,cascade => true);
end;
/
select count(col_4)
from test_0107 t7
,test_0108 t8
where t7.col_2 = t8.col_2
and t8.col_3 = (select max(t81.col_3)
from test_0108 t81
where t7.col_2 = t81.col_2
and t8.col_2 = t81.col_2
and t81.col_3 <= t7.col_3
);
[CODE]
SQL Server Code
===============
The Numbers table is the same as the previous test
create table big_numbers (num numeric(10,0));
insert into big_numbers
select (n1.num-1)*1000 + n2.num
from numbers n1,numbers n2
where n1.num <=1000
and n2.num <=1000;
create table test_03 (col_1 numeric(10,0), col_2 numeric(10,0),col_3 datetime);
create table test_04 (col_2 numeric(10,0), col_3 datetime, col_4 numeric(10,0));
create index test_04_idx2 on test_04(col_3 desc,col_2 ,col_4);
create index test_04_idx on test_04(col_2,col_3 desc,col_4);
insert into test_03
select n1.num*10+n2.num, n2.num,getdate()
from numbers n1
,numbers n2
where n1.num <=400
and n2.num <=10;
insert into test_04
select n1.num,getdate()+10-n2.num, n1.num + n2.num
from numbers n1
,big_numbers n2
where n1.num <= 10
and n2.num <= 75000;
select count(col_4)
from test_03 t3
,test_04 t4
where t3.col_2 = t4.col_2
and t4.col_3 = (select max(t41.col_3)
from test_04 t41
where t3.col_2 = t41.col_2
and t4.col_2 = t41.col_2
and t41.col_3 <= t3.col_3
);
[/CODE
|
|
|
|
Re: Need to improve query performance [message #350219 is a reply to message #349657] |
Wed, 24 September 2008 07:47 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
yep in that context ms sql server works slower,
but reason for that ir wrongly writen query ....
your query:
select count(col_4)
from test_0107 t7
,test_0108 t8
where t7.col_2 = t8.col_2
and t8.col_3 = (select max(t81.col_3)
from test_0108 t81
where t7.col_2 = t81.col_2
and t8.col_2 = t81.col_2
and t81.col_3 <= t7.col_3
);
why do you need in sub select this part? :
where t7.col_2 = t81.col_2
and t8.col_2 = t81.col_2
this is redundant predicate, t7.col2 will be allways equal to t8.col2 (it is got from first join predicate t7.col_2 = t8.col_2)
so you need only one of them.
use this query, result set will be the same
select count(col_4)
from test_0107 t7
,test_0108 t8
where t7.col_2 = t8.col_2
and t8.col_3 = (select max(t81.col_3)
from test_0108 t81
where t7.col_2 = t81.col_2
and t81.col_3 <= t7.col_3
);
and show results now
|
|
|
Re: Need to improve query performance [message #350246 is a reply to message #349657] |
Wed, 24 September 2008 08:36 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
and just try this query on your created tables
select count(1)
from test_0108 t8
where t8.col_3=(select max(t81.col_3)
from test_0108 t81
where t8.col_2 = t81.col_2
and t81.col_3 < t8.col_3
);
in normal writen queries ms sql beets oracle so badly...
in last select you used redundant predicates, but you do not writen down run time result of my posted queries as examples...
this query is wery simple i say, ms sql do it in 1 sec, oracle...oh oracle .... im still wayting for it finish his job.
try this query and post tun time results on ms sql and oracle...
but with no scheating, just this query with the same tables which data loading scrip you posted in last your post.
[Updated on: Wed, 24 September 2008 08:47] Report message to a moderator
|
|
|
Re: Need to improve query performance [message #350261 is a reply to message #350219] |
Wed, 24 September 2008 09:13 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
Well cover me in fish paste and leave me out for the Deep Ones.
I apologise unreservedly. Sql server runs that query nearly 3 oredrs of magnitude faster than oracle does - I upped the number of rows in the Sql server test to 40,000 and it took 14 seconds.
Can you explain what it's doing behind the scenes? I'd love to know.
I also apologise for the delay in replying - for the sake of my pride, I thought I'd see if there was a way of rewriting the query to improve the performance, and there is:
This query here, which returns the same set of data runs on a 4,000,000 row data set in 8.5 seconds
select count(col_4)
from (
select t7.col_1
,t7.col_2
,(select distinct first_value(t8.col_4) over (order by t8.col_3 desc)
from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3 <= t7.col_3) col_4
from test_0107 t7);
Hoever, this does not alter the fact that you are right, and Sql Server does run that query much faster than Oracle does.
[Corrected query - performance figures unchanged]
[Updated on: Wed, 24 September 2008 09:23] Report message to a moderator
|
|
|
Re: Need to improve query performance [message #350275 is a reply to message #349657] |
Wed, 24 September 2008 10:08 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
i do not undestand your sentence ? :
This query here, which returns the same set of data runs on a 4,000,000 row data set in 8.5 seconds
select count(col_4)
from (
select t7.col_1
,t7.col_2
,(select distinct first_value(t8.col_4) over (order by t8.col_3 desc)
from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3 <= t7.col_3) col_4
from test_0107 t7);
it ried it at home now, and it runs on the same tables for 2.5 seconds.
for table creation was used your script
insert into test_0107 select level, mod(level,10)+1,sysdate from dual connect by level <=4000;
your provided query has not the same purpose as mine, but if you know what data you need to get in output record set, you can use workarounds.
best workaround we found for the same query is :
select count(col_4)
from (
select t7.col_1
,t7.col_2
,(select t8.col_4 from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3= (select max(t81.col_3)
from test_0108 t81
where t81.col_2 = t7.col_2
and t81.col_3<=t7.col_3)) col_4
from test_0107 t7);
if we need, we can put that inline query to function, but use of any function (UDF) hit performance ~10 times for ms sql server and oracle, but in oracle you can use predicated option in function creation header, so performance hit will be minimal.
|
|
|
Re: Need to improve query performance [message #350280 is a reply to message #349657] |
Wed, 24 September 2008 10:19 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
and about ms sql server,
he have good query remake algorithms, and what he does behind the scenes i do not know..
you can see execution plan, he uses eager index spools, lazy index spools, online bitmaping and etc.
but at the moment, i seeking for answer, why ms sql server uses indexes 10000 times faster then oracle in some cases?
if index clustering factos is huge, oracle uses indexes so slow, it is big problem for me.
like i wrote in my firs post in that forum, the same query on ms sql server on the same data runs 4 minutes, and the same query, on the same data and on oracle runs 3 hours...
after table reorganisation and clustering factor minimisation query on oracle runs for ~30secons, on ms sql server it runs 15 seconds.
so i cant undestand why there is so big performance hit on oracle if table have big clustering factor?
(the same clustering factor is on the ms sql server mashine)
i tried to generate the test data, but cang get the same clustering fctor.
table reorganisation on oracle database is not the answer, it takes too much time... we need to periodicaly reorganise whole database... and it is inposible..
and what to do with tables where is few colums used in filtering/join predicates? if i reorganize table by one column, the result will be negatyve to other column...
|
|
|
Re: Need to improve query performance [message #350283 is a reply to message #350210] |
Wed, 24 September 2008 10:28 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
True - if there is no index, the Update will perfrom poorly.
however, if the number of rows to be updated would make the hash join too large for your Hash area size, then the performance of the Merge will go downhill very quickly.
Whether or not the index has a high clustering factor doesn't make much of a difference. I ran an additional set of tests on a table populated with this command:insert into gt_test_0111 select to_number(reverse(to_char(level,'000000'))), mod(level,100) from dual connect by level < 100000; to ensure that the clustering factor would be as high as possible, and the results show no marked performance drop for the clustered index.
Also, if you need to update a column conditionally, then Merge is not the tool of choice:
Populate Gt_Test_0109 like this:insert into gt_test_0109 select level,case when mod(level,100)=1 then null else 1 end from dual connect by level <100000; and run the tests like this:declare
v_iter pls_integer := 1;
v_time pls_integer;
begin
-- Pre cache table
for i in 1..v_iter loop
update gt_test_0109 t9
set col_2 = (select col_2
from gt_test_0110 t0
where t9.col_1 = t0.col_1)
;
end loop;
rollback;
v_time := dbms_utility.get_time;
for i in 1..v_iter loop
merge into gt_test_0109 t9
using (select * from gt_test_0110 where col_2 is null) t0
on (t0.col_1 = t9.col_1)
when matched then update set t9.col_2 = t0.col_2;
end loop;
dbms_output.put_line('Merge '||to_char(dbms_utility.get_time-v_time));
rollback;
v_time := dbms_utility.get_time;
for i in 1..v_iter loop
update gt_test_0109 t9
set col_2 = (select col_2
from gt_test_0110 t0
where t9.col_1 = t0.col_1)
where col_2 is null;
end loop;
dbms_output.put_line('Update '||to_char(dbms_utility.get_time-v_time));
end;
/ and I get these results:Merge 77
Update 9
Merge 53
Update 11
Merge 67
Update 15
Now, if you use 10* as many rows, then Merge starts to pull ahead again. They seems to break even somewhere round the 300,000 mark on mine
I would say (in conclusion) that neither one is consistently better than the other in all cases.
|
|
|
Re: Need to improve query performance [message #350291 is a reply to message #349657] |
Wed, 24 September 2008 10:39 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
i think you wrongly writed merge statement in your last post.
merge better performs is conditions are listes after ON in join predicate list
merge into gt_test_0109 t9
using gt_test_0110 t0
on (t0.col_1 = t9.col_1 AND t0.col_2 is null)
when matched then update set t9.col_2 = t0.col_2;
i do not tried that query for now, because my oracle sql developer hang up (damn JAVA )
and waytin till it be operational again.
and one more thing, in your script, in merge statement you use t0.col_2 is not null, and in simple update statement you use t9.col_2 is not null.....
and I cant run my writen down query at home. sql developer shows me error that columns referenced in ON clause cannot be updated.
but at work i used that statement (see my previos posts, where i put my version of script, and there will be comented predicades t0.col_2 is null, and results there was better for merge statement... but there was other mistakes with table precaching, so i can see for now another results)
and one more thing, simple update and merge statement use is like
join.
if you need few rows from joined table, then nested loops will be faster. is you need lots of rows, then hash join wil be faster.
simple update = nested loops
merge update = hash join.
im working with large tables, so in 90% i use only hash join in my work...
[Updated on: Wed, 24 September 2008 11:07] Report message to a moderator
|
|
|
Re: Need to improve query performance [message #350298 is a reply to message #350291] |
Wed, 24 September 2008 11:04 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
Well spotted with the Merge
The "correct" merge statement would be:merge into gt_test_0109 t9
using gt_test_0110 t0
on (t0.col_1 = t9.col_1 AND t9.col_2 is null)
when matched then update set t9.col_2 = t0.col_2;
as the test is to update gt_test_0109 only where gt_test_0109.col_2 is null.
If you try that merge, you'll find it errors - you're not allowed to update any of the columns in the ON clause.
I'm not certain you actually can use MERGE to do that sort of update without a view and an Instead Of trigger.
I'll try and look at that tomorrow.
[rewrite to add more details]
[Updated on: Wed, 24 September 2008 11:15] Report message to a moderator
|
|
|
|
Re: Need to improve query performance [message #350327 is a reply to message #349657] |
Wed, 24 September 2008 14:34 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
@RAJANKK4
i made new query, i do not know will it be faster than yours, but please try it, and inform about results
one column i do not select at the moment, i need to know if this query performs better, if so then i finish it.
there is indexes on tables which accesed with 'MAX' subqueries?
and if it is what indexes?
SELECT TL_PAYABLE_TIME.EMPLID
, TL_PAYABLE_TIME.EMPL_RCD
, TL_PAYABLE_TIME.DUR
, TL_PAYABLE_TIME.SEQ_NBR
, PERSONAL_DATA.NAME
, JOB.BARG_UNIT
, TL_PAYABLE_TIME.TRC
-----------------------------------------------------------------------------------------------------------
-- , TL_TRC.DESCR AS OH_TRC_DESCR
,( SELECT TL_TRC.DESCR
FROM PS_TL_TRC_TBL TL_TRC
WHERE TL_TRC.TRC = TL_PAYABLE_TIME.TRC
AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC_ED.TRC = TL_PAYABLE_TIME.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )) AS OH_TRC_DESCR
-----------------------------------------------------------------------------------------------------------
-- , TL_ERNCD.TL_ERNCD
,( SELECT TL_ERNCD.TL_ERNCD
FROM PS_OH_S_TL_ERNCD TL_ERNCD
WHERE TL_ERNCD.TRC = TL_PAYABLE_TIME.TRC
AND TL_ERNCD.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM
AND TL_ERNCD.EFFDT = ( SELECT MAX(TL_ERNCD_ED.EFFDT)
FROM PS_OH_S_TL_ERNCD TL_ERNCD_ED
WHERE TL_ERNCD_ED.TRC = TL_PAYABLE_TIME.TRC
AND TL_ERNCD_ED.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM
AND TL_ERNCD_ED.EFFDT <= TL_PAYABLE_TIME.DUR )) AS TL_ERNCD
, TL_PAYABLE_TIME.TL_QUANTITY
-----------------------------------------------------------------------------------------------------------
-- , SCH_ASSIGN.SCHEDULE_ID
,( SELECT SCH_ASSIGN.SCHEDULE_ID
FROM PS_OH_S_SCH_ASSIGN SCH_ASSIGN
WHERE SCH_ASSIGN.EMPLID = TL_PAYABLE_TIME.EMPLID
AND SCH_ASSIGN.EMPL_RCD = TL_PAYABLE_TIME.EMPL_RCD
AND SCH_ASSIGN.EFFDT = ( SELECT MAX(SCH_ASSIGN_ED.EFFDT)
FROM PS_OH_S_SCH_ASSIGN SCH_ASSIGN_ED
WHERE SCH_ASSIGN_ED.EMPLID = TL_PAYABLE_TIME.EMPLID
AND SCH_ASSIGN_ED.EMPL_RCD = TL_PAYABLE_TIME.EMPL_RCD
AND SCH_ASSIGN_ED.EFFDT <= TL_PAYABLE_TIME.DUR
AND SCH_ASSIGN_ED.END_EFFDT >= TL_PAYABLE_TIME.DUR )) AS SCHEDULE_ID
-----------------------------------------------------------------------------------------------------------
--!!!!! , SCH_CLND.SCHED_HRS !!!! --------------------
, NULL AS SCHED_HRS
, TL_PAYABLE_TIME.PAYABLE_STATUS
, TL_PAYABLE_TIME.PAYROLL_REQ_NUM
, TL_PAYABLE_TIME.FROZEN_SW
, TL_PAYABLE_TIME.FROZEN_DATE
, TL_PAYABLE_TIME.PUBLISH_SW
, TL_PAYABLE_TIME.ACTUAL_PUB_DATE
, TL_PAYABLE_TIME.APPRV_PRCS_DTTM
, TL_PAYABLE_TIME.OPRID
, TL_PAYABLE_TIME.ORIG_SEQ_NBR
, TL_PAYABLE_TIME.OVERRIDE_RATE
, TL_PAYABLE_TIME.RECORD_ONLY_ADJ
, TL_PAYABLE_TIME.LBR_DIST_AMT
, TL_PAYABLE_TIME.EST_GROSS
, TL_PAYABLE_TIME.DILUTED_GROSS
, TL_PAYABLE_TIME.SOURCE_IND
, TL_PAYABLE_TIME.TASKGROUP
, TL_PAYABLE_TIME.BILLABLE_IND
, TL_PAYABLE_TIME.ACCT_CD
, TL_PAYABLE_TIME.ACCOUNT
, TL_PAYABLE_TIME.DEPTID_CF AS OH_CF_DEPTID
, TL_PAYABLE_TIME.PROJECT_ID
, TL_PAYABLE_TIME.FUND_CODE
, TL_PAYABLE_TIME.PROGRAM_CODE
, TL_PAYABLE_TIME.CLASS_FLD
, TL_PAYABLE_TIME.CHARTFIELD1
, TL_PAYABLE_TIME.CHARTFIELD2
, TL_PAYABLE_TIME.CHARTFIELD3
, TL_PAYABLE_TIME.BUDGET_REF
, JOB.COMPANY
, TL_PAYABLE_TIME.BUSINESS_UNIT
, TL_PAYABLE_TIME.PRODUCT
, TL_PAYABLE_TIME.CUSTOMER
, TL_PAYABLE_TIME.BUSINESS_UNIT_PC
, TL_PAYABLE_TIME.BUSINESS_UNIT_PF
, TL_PAYABLE_TIME.SETID_ACTIVITY
, TL_PAYABLE_TIME.ACTIVITY_ID
, TL_PAYABLE_TIME.RESOURCE_TYPE
, TL_PAYABLE_TIME.SETID_RESOURCE
, TL_PAYABLE_TIME.RESOURCE_CATEGORY
, TL_PAYABLE_TIME.RESOURCE_SUB_CAT
, TL_PAYABLE_TIME.TASK
, TL_PAYABLE_TIME.USER_FIELD_1
, TL_PAYABLE_TIME.USER_FIELD_2
, TL_PAYABLE_TIME.USER_FIELD_3
, TL_PAYABLE_TIME.USER_FIELD_4
, TL_PAYABLE_TIME.USER_FIELD_5
, PERS_NID.NATIONAL_ID
, JOB.SETID_DEPT AS SETID
, JOB.DEPTID
-----------------------------------------------------------------------------------------------------------
-- , DEPT.DESCR AS OH_DEPT_DESCR
,( SELECT
FROM PS_DEPT_TBL DEPT
WHERE DEPT.DEPTID = JOB.DEPTID
AND DEPT.SETID = JOB.SETID_DEPT
AND DEPT.EFFDT = ( SELECT MAX(DEPT_ED.EFFDT)
FROM PS_DEPT_TBL DEPT_ED
WHERE DEPT_ED.DEPTID = JOB.DEPTID
AND DEPT_ED.SETID = JOB.SETID_DEPT
AND DEPT_ED.EFFDT <= JOB.EFFDT )) AS OH_DEPT_DESCR
, JOB.POSITION_NBR
-----------------------------------------------------------------------------------------------------------
-- , POSITION_DATA.DESCR AS OH_POSITION_DESCR
,( SELECT POSITION_DATA.DESCR
FROM PS_S_POSITION_DATA POSITION_DATA
WHERE POSITION_DATA.POSITION_NBR = JOB.POSITION_NBR
AND POSITION_DATA.EFFDT = (SELECT MAX(POSITION_DATA_ED.EFFDT)
FROM PS_S_POSITION_DATA POSITION_DATA_ED
WHERE POSITION_DATA_ED.POSITION_NBR = JOB.POSITION_NBR
AND POSITION_DATA_ED.EFFDT <= JOB.EFFDT )) AS OH_POSITION_DESCR
, JOB.JOBCODE
-----------------------------------------------------------------------------------------------------------
-- , JOBCODE.DESCR AS OH_JOBCODE_DESCR
, ( SELECT JOBCODE.DESCR
FROM PS_JOBCODE_TBL JOBCODE
WHERE JOBCODE.JOBCODE = JOB.JOBCODE
AND JOBCODE.SETID = JOB.SETID_JOBCODE
AND JOBCODE.EFFDT = ( SELECT MAX(JOBCODE_ED.EFFDT)
FROM PS_JOBCODE_TBL JOBCODE_ED
WHERE JOBCODE_ED.SETID = JOB.SETID_JOBCODE
AND JOBCODE_ED.JOBCODE = JOB.JOBCODE
AND JOBCODE_ED.EFFDT <= JOB.EFFDT )) AS OH_JOBCODE_DESCR
, JOB.LOCATION
-----------------------------------------------------------------------------------------------------------
-- , LOCATION.DESCR AS OH_LOCATION_DESCR
, ( SELECT LOCATION.DESCR
FROM PS_LOCATION_TBL LOCATION
WHERE LOCATION.SETID = JOB.SETID_LOCATION
AND LOCATION.LOCATION = JOB.LOCATION
AND LOCATION.EFFDT = ( SELECT MAX(LOCATION_ED.EFFDT)
FROM PS_LOCATION_TBL LOCATION_ED
WHERE LOCATION_ED.SETID = JOB.SETID_LOCATION
AND LOCATION_ED.LOCATION = JOB.LOCATION
AND LOCATION_ED.EFFDT <= JOB.EFFDT )) AS OH_LOCATION_DESCR
, JOB.UNION_CD
-----------------------------------------------------------------------------------------------------------
-- , UNION_TBL.DESCR AS OH_UNION_DESCR
,( SELECT UNION_TBL.DESCR
FROM PS_S_UNION_TBL UNION_TBL
WHERE UNION_TBL.UNION_CD = JOB.UNION_CD
AND UNION_TBL.EFFDT = ( SELECT MAX(UNION_TBL_ED.EFFDT)
FROM PS_S_UNION_TBL UNION_TBL_ED
WHERE UNION_TBL_ED.UNION_CD = JOB.UNION_CD
AND UNION_TBL_ED.EFFDT <= JOB.EFFDT )) AS OH_UNION_DESCR
FROM PS_TL_PAYABLE_TIME TL_PAYABLE_TIME
INNER JOIN PS_PERSONAL_DATA PERSONAL_DATA ON TL_PAYABLE_TIME.EMPLID = PERSONAL_DATA.EMPLID
INNER JOIN PS_PERS_NID PERS_NID ON TL_PAYABLE_TIME.EMPLID = PERS_NID.EMPLID
AND PERS_NID.COUNTRY = 'USA'
AND PERS_NID.NATIONAL_ID_TYPE = 'PR'
INNER JOIN PS_JOB JOB ON TL_PAYABLE_TIME.EMPLID = JOB.EMPLID
AND TL_PAYABLE_TIME.EMPL_RCD = JOB.EMPL_RCD
AND JOB.EFFDT = ( SELECT MAX(JOB_ED.EFFDT)
FROM PS_JOB JOB_ED
WHERE JOB.EMPLID = JOB_ED.EMPLID
AND JOB.EMPL_RCD = JOB_ED.EMPL_RCD
AND JOB_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
WHERE TL_PAYABLE_TIME.DUR >= TO_DATE('2008-01-01' , 'YYYY-MM-DD')
|
|
|
Re: Need to improve query performance [message #350392 is a reply to message #349657] |
Wed, 24 September 2008 22:37 |
harrysmall3
Messages: 109 Registered: April 2008 Location: Massachusetts
|
Senior Member |
|
|
Guys, correct me if I am wrong but this is how I am interpreting
the aggregate part of the execution plan for the sub-selects
Step #28 is the parent for the execution path 28 - 58 with each step having one child to return rows up, each child executing once for its parent, and starting with step 58.
The results of step 28 are passed to the filter at the top of the select where records from the hash-joins paths are filtered
for final return to the top (SELECT).
When trying to improve any query i try to think of
what the 'better execution plan' would be.
No doubt the hash join is enormous and there's the time bulk.
I go back to my original statement on approaching this query
and I still believe that improvement lies in creating an execution plan that can filter as many records as possible up front from each join step.
If I have read the query plan correctly then I think this affirms my approach - all of the max/min values collected in
sub-selects are not applied until the end of the execution path
against the enormous data set of all the combined hash steps.
If the query could be modified so that the data retrieved from
the path 58 - 28 could be applied as filter steps for the innermost child steps of the hash paths for the respective tables they address, then bulk join steps will be significantly
improved in time.
So what I need to verify right now is if I am correct in
saying that the data retrieved by the sub-selects is being applied as a filter to the final result of all of the joined data?
In this case, the FILTER operation is like a nested loop,
every record returned by step2 is qualified by the series of lookups in 58 - 28 for inclusion/exclusion.
That is what I would want to change.
I'm trying to decompose the execution plan into blocks and
do this in a divide and conquer approach but I need to know if this premise is correct first.
Thx
Harry
|
|
|
Re: Need to improve query performance [message #350454 is a reply to message #350327] |
Thu, 25 September 2008 02:15 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
@RAJANKK4 - it's worth rewriting your query to try inline selects using Analytics as well - they are fast, and work well in cases like this:
SELECT TL_PAYABLE_TIME.EMPLID
, TL_PAYABLE_TIME.EMPL_RCD
, TL_PAYABLE_TIME.DUR
, TL_PAYABLE_TIME.SEQ_NBR
, PERSONAL_DATA.NAME
, JOB.BARG_UNIT
, TL_PAYABLE_TIME.TRC
-----------------------------------------------------------------------------------------------------------
-- , TL_TRC.DESCR AS OH_TRC_DESCR
,( SELECT FIRST_VALUE(TL_TRC.DESCR) OVER (ORDER BY TL_TRC.EFFDT DESC)
FROM PS_TL_TRC_TBL TL_TRC
WHERE TL_TRC.TRC = TL_PAYABLE_TIME.TRC
AND TL_TRC.EFFDT <= TL_PAYABLE_TIME.DUR ) AS OH_TRC_DESCR
-----------------------------------------------------------------------------------------------------------
-- , TL_ERNCD.TL_ERNCD
,( SELECT FIRST_VALUE(TL_ERNCD.TL_ERNCD) OVER (ORDER BY TL_ERNCD.EFFDT DESC)
FROM PS_OH_S_TL_ERNCD TL_ERNCD
WHERE TL_ERNCD.TRC = TL_PAYABLE_TIME.TRC
AND TL_ERNCD.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM
AND TL_ERNCD.EFFDT <= TL_PAYABLE_TIME.DUR ) AS TL_ERNCD
, TL_PAYABLE_TIME.TL_QUANTITY
|
|
|
Re: Need to improve query performance [message #350492 is a reply to message #349657] |
Thu, 25 September 2008 03:47 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
@JRowbottom - do you tested those select ?
because without analitical functions my select about 3-5 times faster.
select count(col_4)
from (
select t7.col_1
,t7.col_2
,(select distinct first_value(t8.col_4) over (order by t8.col_3 desc)
from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3 <= t7.col_3) col_4
from test_0107 t7);
select count(col_4)
from (
select t7.col_1
,t7.col_2
,(select t8.col_4 from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3= (select max(t81.col_3)
from test_0108 t81
where t81.col_2 = t7.col_2
and t81.col_3<=t7.col_3)) col_4
from test_0107 t7);
|
|
|
Re: Need to improve query performance [message #350496 is a reply to message #349657] |
Thu, 25 September 2008 03:50 |
Kriptas
Messages: 118 Registered: June 2008
|
Senior Member |
|
|
and in main query, i think, there is largest problem with this join sub select
INNER JOIN PS_JOB JOB ON TL_PAYABLE_TIME.EMPLID = JOB.EMPLID
AND TL_PAYABLE_TIME.EMPL_RCD = JOB.EMPL_RCD
AND JOB.EFFDT = ( SELECT MAX(JOB_ED.EFFDT)
FROM PS_JOB JOB_ED
WHERE JOB.EMPLID = JOB_ED.EMPLID
AND JOB.EMPL_RCD = JOB_ED.EMPL_RCD
AND JOB_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
regarding execution plan, it multiply rownum from 16M till 41M, and then using filter to filter data...
but from that join there is lots of dependable colums, and other joins or inline selects....
|
|
|
Re: Need to improve query performance [message #350574 is a reply to message #350496] |
Thu, 25 September 2008 08:16 |
JRowbottom
Messages: 5933 Registered: June 2006 Location: Sunny North Yorkshire, ho...
|
Senior Member |
|
|
Are there actually any records in the table PS_JOB with an EFFDT after the TL_PAYABLE_TIME.DUR date?
If there aren't then with the right index, that subquery can be made to fly.
Oddly, I did run timings on the analytic queries.
From the trace file, there is nothing to chose between them - same time to within 3%, same number of consistent reads
select *
from (
select t7.col_1
,t7.col_2
,(select first_value(t8.col_4) over (order by t8.col_3 desc)
from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3 <= t7.col_3) col_4
from test_0107 t7)
where rownum > 1
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 0 0 0
Execute 1 0.00 0.00 0 0 0 0
Fetch 1 1.15 1.12 1167 12148 0 0
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 3 1.15 1.13 1167 12148 0 0
select *
from (
select t7.col_1
,t7.col_2
,(select t8.col_4
from test_0108 t8
where t8.col_2 = t7.col_2
and t8.col_3 = (select max(t81.col_3)
from test_0108 t81
where t7.col_2 = t81.col_2
and t81.col_3 <= t7.col_3)) col_4
from test_0107 t7)
where rownum > 1
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 0 0 0
Execute 1 0.00 0.00 0 0 0 0
Fetch 1 1.18 1.15 0 12148 0 0
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 3 1.18 1.16 0 12148 0 0
|
|
|
Goto Forum:
Current Time: Mon Nov 04 11:49:46 CST 2024
|