Adjacency list to Nested set model statement
Date: 27 Feb 2002 14:38:37 -0800
Message-ID: <c0d87ec0.0202271438.12a87592_at_posting.google.com>
Several weeks ago, an Italian sent me a set of four PL/SQL procedures for generating the transitive closure (i.e. all the paths) in a tree using the adjacency model of a tree.
I came back with a modification of the procedure I use for converting the adjacency model of a tree into the nested sets model of a tree.
CREATE TABLE Stack
(stack_top INTEGER NOT NULL,
child VARCHAR(10) NOT NULL,
lft INTEGER,
rgt INTEGER);
/* This holds the transitive closure pairs and the distance in edges
between the two nodes shown. The assumption is that (a,a,0) does not
count and that the direction is from root to leaf nodes */
CREATE TABLE Transclosure
(parent VARCHAR(10) NOT NULL,
child VARCHAR(10) NOT NULL,
distance INTEGER NOT NULL);
BEGIN
DECLARE _at_counter INTEGER; -- loop control DECLARE _at_current_top INTEGER; -- top of stack DECLARE _at_max_counter INTEGER; -- loop limit
- each node gets a pair of (lft,rgt) numbers, SET _at_max_counter = 2 * (SELECT COUNT(*) FROM Tree1);
- push the root of the tree on the stack INSERT INTO Stack SELECT 1, child, 1, NULL -- has to be NULL for trans closure FROM Tree1 WHERE parent IS NULL;
SET _at_counter = 2;
SET _at_current_top = 1;
/* Tree1 holds the tree and destroys the data as it runs */
DELETE FROM Tree1
WHERE parent IS NULL;
/* Start loading the youngest child of the node on top of the stack */
WHILE (_at_counter <= @max_counter)
BEGIN
IF EXISTS (SELECT *
FROM Stack AS S1, Tree1 AS T1
WHERE S1.child = T1.parent
AND S1.stack_top = _at_current_top)
BEGIN -- push when top has subordinates and set lft
INSERT INTO Stack
SELECT (_at_current_top + 1),
MIN(T1.child), -- youngest child
_at_counter, -- sete lft
NULL -- no rgt number yet
FROM Stack AS S1, Tree1 AS T1
WHERE S1.child = T1.parent
AND S1.stack_top = _at_current_top;
-- remove this row from Tree1
DELETE FROM Tree1
WHERE child = (SELECT child
FROM Stack
WHERE stack_top = _at_current_top + 1);
SET _at_current_top = @current_top + 1;
END -- end of push
ELSE
BEGIN -- pop the stack and set rgt value
- transitive closure statement
INSERT INTO Transclosure (parent, child, distance)
SELECT DISTINCT S1.child, S2.child,
(S2.stack_top - S1.stack_top)
FROM Stack AS S1, Stack AS S2
WHERE ABS(S1.stack_top) < ABS(S2.stack_top)
AND S1.rgt IS NULL
AND S2.stack_top > 0 -- not yet popped from stack
AND NOT EXISTS
(SELECT *
FROM Transclosure AS T1
WHERE T1.parent = S1.child
AND T1.child = S2.child
AND T1.distance
- (S2.stack_top - S1.stack_top));
- end transitive code
UPDATE Stack
SET rgt = _at_counter,
stack_top = -stack_top
WHERE stack_top = _at_current_top
SET _at_current_top = @current_top - 1;
END; -- pop
SET _at_counter = @counter + 1;
END; --while
END; -- proc
The transitive clousre code can probably be tighter. The idea is that when you hit a leaf node, you know you have a path from the root to that leaf node on the stack. Do a self-join and pull off all possible distinct node pairs, calculating the distance from their depth in the stack.
Perhaps a UNION would be a better choice for duplicate elimination; should I put the (a,a,0) pairs in as part of the loop, or insert them outside the loop after the Transclosure table is created, or skip them altogether?
Are ABS() calls needed? I'm getting punchy.
Oh, here is some sample data. Remember the nursary rhyme "As I was going to St. Ives"?
DROP TABLE Wives;
DROP TABLE Sacks;
CREATE TABLE Wives (wife VARCHAR(10) NOT NULL); CREATE TABLE Sacks (sack VARCHAR(10) NOT NULL);
INSERT INTO Wives VALUES ('w1');
INSERT INTO Wives VALUES ('w2');
INSERT INTO Wives VALUES ('w3');
INSERT INTO Wives VALUES ('w4');
INSERT INTO Wives VALUES ('w5');
INSERT INTO Wives VALUES ('w6');
INSERT INTO Wives VALUES ('w7');
INSERT INTO Sacks VALUES ('s1');
INSERT INTO Sacks VALUES ('s2');
INSERT INTO Sacks VALUES ('s3');
INSERT INTO Sacks VALUES ('s4');
INSERT INTO Sacks VALUES ('s5');
INSERT INTO Sacks VALUES ('s6');
INSERT INTO Sacks VALUES ('s7');
DROP TABLE Tree1;
CREATE TABLE Tree1
(parent VARCHAR(10),
child VARCHAR(10) NOT NULL);
INSERT INTO Tree1 VALUES (NULL, 'M1');
INSERT INTO Tree1
SELECT DISTINCT 'M1', ('M1'+ W1.wife)
FROM Wives AS W1;
SELECT * FROM Tree1;
INSERT INTO Tree1
SELECT DISTINCT T1.child, (T1.child + S1.sack)
FROM Tree1 AS T1, Sacks AS S1
WHERE T1.child LIKE 'M1w_';
SELECT * FROM Tree1;
- so not all paths are the same depth
INSERT INTO Tree1 VALUES ('M1w1s7', 'M1w1s7c1');
INSERT INTO Tree1 VALUES ('M1w1s7', 'M1w1s7c2');
INSERT INTO Tree1 VALUES ('M1w1s7', 'M1w1s7c3');
INSERT INTO Tree1 VALUES ('M1w1s7c1', 'M1w1s7c1k1');
INSERT INTO Tree1 VALUES ('M1w1s7c1', 'M1w1s7c1k2');
INSERT INTO Tree1 VALUES ('M1w1s7c1', 'M1w1s7c1k3');
DROP TABLE Wives;
DROP TABLE Sacks;
SELECT * FROM Tree1; Received on Wed Feb 27 2002 - 23:38:37 CET
