Path: news.easynews.com!core-easynews!newsfeed3.easynews.com!easynews.com!easynews!news.glorb.com!postnews1.google.com!not-for-mail
From: neo55592@hotmail.com (Neo)
Newsgroups: comp.databases.object,comp.databases,comp.databases.theory,comp.object
Subject: Re: Nearest Common Ancestor Report (XDb1's $1000 Challenge)
Date: 6 Jun 2004 11:41:56 -0700
Organization: http://groups.google.com
Lines: 245
Message-ID: <4b45d3ad.0406061041.69217398@posting.google.com>
References: <4b45d3ad.0405172323.2c043a28@posting.google.com> <nnlja01mvbfv2f53oq3iag63oc7e9bf9t7@4ax.com> <4b45d3ad.0405201025.7a04d225@posting.google.com> <picqa01mpenqaq3ftiqif7nhiclvpohjm8@4ax.com> <4b45d3ad.0405202106.4d079b08@posting.google.com> <n5vsa0po7v2dbn3s1t1hl3fomslfmfurpq@4ax.com> <4b45d3ad.0405221528.559f5a68@posting.google.com> <m0Src.48689$hH.923163@bgtnsc04-news.ops.worldnet.att.net> <4b45d3ad.0405231249.3a2a28f6@posting.google.com> <mts4b09m5olm07v0b9ppienrdk8au4md6c@4ax.com>
NNTP-Posting-Host: 67.98.160.241
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
X-Trace: posting.google.com 1086547316 26427 127.0.0.1 (6 Jun 2004 18:41:56 GMT)
X-Complaints-To: groups-abuse@google.com
NNTP-Posting-Date: Sun, 6 Jun 2004 18:41:56 +0000 (UTC)
Xref: core-easynews comp.databases.object:25940 comp.databases:105449 comp.databases.theory:35746 comp.object:178676
X-Received-Date: Sun, 06 Jun 2004 11:41:17 MST (news.easynews.com)

> I changed my model ... now down to 11.0 ms.

RM Sol#5 is Neo's second iteration to make Hugo's last soution (RM
Sol#2) more comparable with XDb1's db. RM Sol#5 allows each thing to
have 0 to many attributes, and each attribute to have 0 to many
values, (similar to XDb1). The main tables are T_Things, T_Attribs,
T_AttribValues and T_Hierarchies. Other significant differences still
exist such as normalization of symbols in values. RM Sol#5 only
implements values of char data type (other tables would be needed for
other data types). Please advise of errors or improvements to script.
If it looks ok, I would like to run a larger report (1 million result
rows).

Large Report (28,940 rows) Generation Summary (provided by Neo) 
---------------------------------------------------------------------
Solution       Time(sec) Platform          Notes
-------------  --------  ----------------- --------------------------
RM#5 SqlSrvr7   40.5     500 Mhz Server    See details below
XDb1 4.5.7       2.9     500 Mhz Server
XDb1 4.5.9      16.971   233 Mhz PocketPC


Large Report Generation Details for RM#5 SqlServer7:
-----------------------------------------------------------
Create db and insert data using script (see below).

Restart
Run Time
--- ------
1   88 sec

Restart
Run Time
--- ------
1   40 sec
2   37 sec
3   39 sec

Restart
Run Time
--- ------
1   41 sec
2   36 sec
3   38 sec


-----------------------------------------------------------
Below is the abridged SQL Server script for RM Sol#5.
Complete script can be downloaded from www.xdb1.com/Example/Ex076.asp

-- RM Sol#5, Only for char values, Symbols not normalized
-- 200 Goat Hierarchy
-- suppress "(..) row(s) affected" messages
set nocount on
-- Tables to store data
create table things (thing int not null unique,
                     primary key (thing))
--
create table hierTypes (hierType int not null unique,
                        hierTypeName varchar(20) not null unique,
                        primary key (hierType))
--
create table attribTypes (attribType int not null unique,
                          attribTypeName varchar(20) not null unique,
                          primary key (attribType))
--
create table attribs (attrib int not null unique,
                      thing int not null references things,
                      attribType int not null references attribTypes,
                      primary key (attrib))
create unique nonclustered index myIndex60 on attribs(thing,
attribType)
create table attribValues_char (attrib int not null references
attribs,
                                value varchar(100) not null,
                                primary key (attrib, value))
--
create table hierarchies (thing1 int not null references things,
                          thing2 int not null references things,
                          hierType int not null references hierTypes,
                          primary key (hierType, thing1, thing2),
                          check (thing1 <> thing2))
-- Intermediate table is used to generate report
create table ancestors (thing int not null,
                        ancestor int not null,
                        dist int not null,
                        primary key (thing, ancestor))
create unique nonclustered index myIndex90 on ancestors(ancestor,
thing)
-- Table that holds the report
create table NCancestor (ThingX varchar(20) not null,
                         ThingY varchar(20) not null,
                         CmnAnc varchar(20) not null,
                         Dist int not null,
                         primary key (ThingX, ThingY, CmnAnc))
go
-- this procedure creates the report
create proc CommonAncestors
         (@hierTypeName varchar(20))
as
declare @hierType int
select @hierType = hierType
from hierTypes
where hierTypeName = @hierTypeName
-- delete data from previous execution
truncate table ancestors
truncate table NCancestor
-- starting data: thing itself is dist 0, direct leader is dist 1
insert ancestors with (tablockx holdlock) (thing, ancestor, dist)
select distinct thing, thing, 0
from things with (tablockx holdlock)
union all
select thing1, thing2, 1
from hierarchies with (tablockx holdlock)
where hierType = @hierType
-- use iteration to determine indirect leaders
while (@@rowcount<>0)
  insert ancestors with (tablockx holdlock) (thing, ancestor, dist)
  select a1.thing, a2.ancestor, min(a1.dist + a2.dist)
  from ancestors AS a1
  join ancestors AS a2
  on a2.thing = a1.ancestor
  where not exists
    (select *
     from   ancestors AS a3
     where  a3.thing = a1.thing
     and    a3.ancestor = a2.ancestor)
  group by a1.thing, a2.ancestor
-- now find nearest common ancestor for each pair of things
insert NCancestor with (tablockx holdlock) (ThingX, ThingY, CmnAnc,
Dist)
-- select t1.thingName, t2.thingName, ta.thingName, a1.dist + a2.dist
select  attribVal1.value, attribVal2.value, attribValA.value, a1.dist
+ a2.dist
from ancestors AS a1 with (tablockx holdlock)
join ancestors AS a2 on a1.ancestor = a2.ancestor and a1.thing <
a2.thing
join things AS t1 on t1.thing = a1.thing 
join attribs AS attrib1 on attrib1.thing = t1.thing and
attrib1.attribType = 1
join attribValues_char AS attribVal1 on attrib1.attrib =
attribVal1.attrib
join things AS t2 on t2.thing = a2.thing
join attribs AS attrib2 on attrib2.thing = t2.thing and
attrib2.attribType = 1
join attribValues_char AS attribVal2 on attrib2.attrib =
attribVal2.attrib
join things AS ta on ta.thing = a1.ancestor
join attribs AS attribA on attribA.thing = ta.thing and
attribA.attribType = 1
join attribValues_char AS attribValA on attribA.attrib =
attribValA.attrib
where not exists
  (select *
   from ancestors AS a3
   join ancestors AS a4
   on a3.ancestor = a4.ancestor
   and a3.thing < a4.thing
   where a3.thing = a1.thing
   and a4.thing = a2.thing
   and a4.dist+a3.dist < a1.dist+a2.dist)
go
-- dummy execution on empty table forces compilation of stored proc
exec CommonAncestors @hierTypeName = 'whatever'
go
-- Insert goats
insert things (thing) values (-3)   -- 'thing'
insert things (thing) values (-2)   -- 'goat'
insert things (thing) values (-1)   -- 'g'
insert things (thing) values (0)    -- 'g0'
insert things (thing) values (1)    -- 'g1'
insert things (thing) values (2)    -- 'g2'
insert things (thing) values (3)    -- 'g3'
...
go
-- Insert HierarchyTypes
insert hierTypes (hierType, hierTypeName) values (1, 'class')
insert hierTypes (hierType, hierTypeName) values (2, 'parent')
go
-- Insert AttribTypes
insert attribTypes (attribType, attribTypeName) values (1, 'name')
go 
-- Insert goat attributes
insert attribs (attrib, thing, attribType) values (-3, -3, 1)
insert attribs (attrib, thing, attribType) values (-2, -2, 1)
insert attribs (attrib, thing, attribType) values (-1, -1, 1)
insert attribs (attrib, thing, attribType) values (0, 0, 1)
insert attribs (attrib, thing, attribType) values (1, 1, 1)
insert attribs (attrib, thing, attribType) values (2, 2, 1)
insert attribs (attrib, thing, attribType) values (3, 3, 1)
...
go
-- Insert attribute values
insert attribValues_char (attrib, value) values ( -3, 'thing')
insert attribValues_char (attrib, value) values ( -2, 'goat')
insert attribValues_char (attrib, value) values ( -1, 'g')
insert attribValues_char (attrib, value) values (  0, 'g0')
insert attribValues_char (attrib, value) values (  1, 'g1')
insert attribValues_char (attrib, value) values (  2, 'g2')
insert attribValues_char (attrib, value) values (  3, 'g3')
...
go
-- Insert things classes
insert hierarchies (thing1, thing2, hierType) values ( -2, -3, 1)
insert hierarchies (thing1, thing2, hierType) values ( -1, -2, 1)
insert hierarchies (thing1, thing2, hierType) values (  0, -2, 1)
insert hierarchies (thing1, thing2, hierType) values (  1, -2, 1)
insert hierarchies (thing1, thing2, hierType) values (  2, -2, 1)
insert hierarchies (thing1, thing2, hierType) values (  3, -2, 1)
...
go
-- Insert goat parents
insert hierarchies (thing1, thing2, hierType) values (0, -1, 2)
insert hierarchies (thing1, thing2, hierType) values (1, -1, 2)
insert hierarchies (thing1, thing2, hierType) values (2, -1, 2)
insert hierarchies (thing1, thing2, hierType) values (3, -1, 2)
...
go
--
print 'Initialization done - starting actual test'
go
-- generate the report - show elapsed time for 1 run
set nocount on
go
select current_timestamp AS 'Starting time'
exec CommonAncestors @hierTypeName = 'parent'
select current_timestamp AS 'Ending time'
go
-- display the results
select *
from NCancestor
order by ThingX, ThingY
go
--
-- cleanup after execution
drop proc CommonAncestors
drop table NCancestor
drop table ancestors
drop table hierarchies
drop table hierTypes
drop table attribValues_char
drop table attribs
drop table attribTypes
drop table things
go