Adjacency list to Nested set model statement

From: --CELKO-- <71062.1056_at_compuserve.com>
Date: 27 Feb 2002 14:38:37 -0800
Message-ID: <c0d87ec0.0202271438.12a87592_at_posting.google.com>



Several weeks ago, an Italian sent me a set of four PL/SQL procedures for generating the transitive closure (i.e. all the paths) in a tree using the adjacency model of a tree.

I came back with a modification of the procedure I use for converting the adjacency model of a tree into the nested sets model of a tree.

Here is the code; can you make it better?

/* Stack holds the nested sets model; it is the push down stack for the
tree traversal. Would an index or key help here? */

CREATE TABLE Stack
(stack_top INTEGER NOT NULL,
child VARCHAR(10) NOT NULL,
lft INTEGER,
rgt INTEGER);

/* This holds the transitive closure pairs and the distance in edges
between the two nodes shown. The assumption is that (a,a,0) does not count and that the direction is from root to leaf nodes */

CREATE TABLE Transclosure
(parent VARCHAR(10) NOT NULL,
child VARCHAR(10) NOT NULL,
distance INTEGER NOT NULL);

BEGIN

DECLARE _at_counter INTEGER; -- loop control
DECLARE _at_current_top INTEGER; -- top of stack
DECLARE _at_max_counter INTEGER; -- loop limit

  • each node gets a pair of (lft,rgt) numbers, SET _at_max_counter = 2 * (SELECT COUNT(*) FROM Tree1);
  • push the root of the tree on the stack INSERT INTO Stack SELECT 1, child, 1, NULL -- has to be NULL for trans closure FROM Tree1 WHERE parent IS NULL;

SET _at_counter = 2;
SET _at_current_top = 1;

/* Tree1 holds the tree and destroys the data as it runs */
DELETE FROM Tree1
WHERE parent IS NULL;

/* Start loading the youngest child of the node on top of the stack */

WHILE (_at_counter <= @max_counter)
BEGIN
IF EXISTS (SELECT *

              FROM Stack AS S1, Tree1 AS T1
             WHERE S1.child = T1.parent
               AND S1.stack_top = _at_current_top)
    BEGIN -- push when top has subordinates and set lft
      INSERT INTO Stack
      SELECT (_at_current_top + 1), 
              MIN(T1.child), -- youngest child
              _at_counter, -- sete lft
              NULL -- no rgt number yet
        FROM Stack AS S1, Tree1 AS T1
       WHERE S1.child = T1.parent
         AND S1.stack_top = _at_current_top;

      -- remove this row from Tree1
      DELETE FROM Tree1
       WHERE child = (SELECT child
                      FROM Stack
                     WHERE stack_top = _at_current_top + 1);
      SET _at_current_top = @current_top + 1;
    END -- end of push
    ELSE
    BEGIN -- pop the stack and set rgt value
  • transitive closure statement INSERT INTO Transclosure (parent, child, distance) SELECT DISTINCT S1.child, S2.child, (S2.stack_top - S1.stack_top) FROM Stack AS S1, Stack AS S2 WHERE ABS(S1.stack_top) < ABS(S2.stack_top) AND S1.rgt IS NULL AND S2.stack_top > 0 -- not yet popped from stack AND NOT EXISTS (SELECT * FROM Transclosure AS T1 WHERE T1.parent = S1.child AND T1.child = S2.child AND T1.distance
    • (S2.stack_top - S1.stack_top));
  • end transitive code
      UPDATE Stack
         SET rgt = _at_counter,
             stack_top = -stack_top
       WHERE stack_top = _at_current_top
      SET _at_current_top = @current_top - 1;
    END; -- pop
  SET _at_counter = @counter + 1;
END; --while
END; -- proc

The transitive clousre code can probably be tighter. The idea is that when you hit a leaf node, you know you have a path from the root to that leaf node on the stack. Do a self-join and pull off all possible distinct node pairs, calculating the distance from their depth in the stack.

Perhaps a UNION would be a better choice for duplicate elimination; should I put the (a,a,0) pairs in as part of the loop, or insert them outside the loop after the Transclosure table is created, or skip them altogether?

Are ABS() calls needed? I'm getting punchy.

Oh, here is some sample data. Remember the nursary rhyme "As I was going to St. Ives"?

DROP TABLE Wives;
DROP TABLE Sacks;

CREATE TABLE Wives (wife VARCHAR(10) NOT NULL); CREATE TABLE Sacks (sack VARCHAR(10) NOT NULL);

INSERT INTO Wives VALUES ('w1');
INSERT INTO Wives VALUES ('w2');
INSERT INTO Wives VALUES ('w3');
INSERT INTO Wives VALUES ('w4');
INSERT INTO Wives VALUES ('w5');
INSERT INTO Wives VALUES ('w6');
INSERT INTO Wives VALUES ('w7');

INSERT INTO Sacks VALUES ('s1');
INSERT INTO Sacks VALUES ('s2');
INSERT INTO Sacks VALUES ('s3');
INSERT INTO Sacks VALUES ('s4');
INSERT INTO Sacks VALUES ('s5');

INSERT INTO Sacks VALUES ('s6');
INSERT INTO Sacks VALUES ('s7');

DROP TABLE Tree1;
CREATE TABLE Tree1
(parent VARCHAR(10),
child VARCHAR(10) NOT NULL);

INSERT INTO Tree1 VALUES (NULL, 'M1');

INSERT INTO Tree1
SELECT DISTINCT 'M1', ('M1'+ W1.wife)
  FROM Wives AS W1;

SELECT * FROM Tree1;

INSERT INTO Tree1
SELECT DISTINCT T1.child, (T1.child + S1.sack)   FROM Tree1 AS T1, Sacks AS S1
WHERE T1.child LIKE 'M1w_';

SELECT * FROM Tree1;

  • so not all paths are the same depth
INSERT INTO Tree1 VALUES ('M1w1s7', 'M1w1s7c1');   
INSERT INTO Tree1 VALUES ('M1w1s7', 'M1w1s7c2');   
INSERT INTO Tree1 VALUES ('M1w1s7', 'M1w1s7c3');   
INSERT INTO Tree1 VALUES ('M1w1s7c1', 'M1w1s7c1k1');   
INSERT INTO Tree1 VALUES ('M1w1s7c1', 'M1w1s7c1k2');   
INSERT INTO Tree1 VALUES ('M1w1s7c1', 'M1w1s7c1k3');   

DROP TABLE Wives;
DROP TABLE Sacks;

SELECT * FROM Tree1; Received on Wed Feb 27 2002 - 23:38:37 CET

Original text of this message