Re: Floyd's algorithm in SQL

From: John Gilson <jag_at_acm.org>
Date: Thu, 02 Dec 2004 15:15:42 GMT
Message-ID: <ykGrd.18407$Yh2.6979338_at_twister.nyc.rr.com>


"--CELKO--" <jcelko212_at_earthlink.net> wrote in message news:18c7b3c2.0412011610.3891e0bc_at_posting.google.com...
> I am re-doing the graphs section for the third edition of SQL FOR
> SMARTIES and would like some feedback and help.
>
> Here is a version of Floyd's algorithm to find the transitive closure
> of a graph. You can also look up Warshall's algorithm, which is an
> earlier and more specialized version of Floyd's algorithm. Given a
> directed graph G represented by an adjacency matrix A[i, j], WHERE
> A[i, j] = 1 if the edge (i, j) is in the graph, compute the matrix P
> of paths with their lengths. Now put it into SQL:
>
> CREATE TABLE Paths
> (source CHAR(1)NOT NULL,
> destination CHAR(1) NOT NULL,
> lgth INTEGER DEFAULT 0 NOT NULL);
>
> CREATE TABLE Edges
> (source CHAR(1)NOT NULL,
> destination CHAR(1)NOT NULL,
> PRIMARY KEY (source, destination),
> lgth INTEGER DEFAULT 1 NOT NULL);
>
> INSERT INTO Edges (source, destination) VALUES ('a', 'b');
> INSERT INTO Edges (source, destination) VALUES ('b', 'c');
> INSERT INTO Edges (source, destination) VALUES ('c', 'd');
> INSERT INTO Edges (source, destination) VALUES ('d', 'a');
> INSERT INTO Edges (source, destination) VALUES ('a', 'e');
>
> The algorithm extends each known path by one edge at a time. Since no
> path can be longer than the number of nodes in the graph, we have an
> upper bound on iterations. But we can also stop when the INSERT
> statement adds no new paths.
>
> CREATE PROCEDURE Floyd()
> LANGUAGE SQL
> DETERMINISTIC
> BEGIN
> DECLARE old_path_tally INTEGER;
> SET old_path_tally = 0;
> DELETE FROM Paths; -- clean out working table
> INSERT INTO Paths SELECT * FROM Edges; -- load the edges
>
> -- add one edge to each path
> WHILE old_path_tally < (SELECT COUNT(*) FROM Paths)
> DO INSERT INTO Paths (source, destination, lgth)
> SELECT E1.source, P1.destination, (E1.lgth + P1.lgth)
> FROM Edges AS E1, Paths AS P1
> WHERE E1.destination = P1.source
> AND NOT EXISTS -- path is not here already
> (SELECT *
> FROM Paths AS P2
> WHERE E1.source = P2.source
> AND P1.destination = P2.destination);
> SET old_path_tally = (SELECT COUNT(*) FROM Paths);
> END WHILE;
> END;
>
> You could also have used a SQLSTATE code to detect any empty
> insertion. I am not sure if that would actually be faster or not.
>
> SELECT * FROM Paths;
> source destination lgth
> =========================
> a b 1
> a e 1
> b c 1
> c d 1
> d a 1
> a c 2
> b d 2
> c a 2
> d b 2
> d e 2
> a d 3
> b a 3
> c b 3
> c e 3
> d c 3
> a a 4
> b b 4
> b e 4
> c c 4
> d d 4
>
> What I'd like to know is:
>
> 1) Can I put Paths in a VIEW defined by a recursive WITH operator? I
> don't have a copy of a product that implements this SQL-99 feature.
>
> 2) Can I modify my SQL-92 code or the SQL-99 to handle the lowest cost
> problem? That is, each edge has a positive cost; what is the CHEAPEST
> path from x to y, not the SHORTEST path?
>
> There is fame, glory and virtual beer in this puzzle.

The following was tested with DB2 so it might not be fully compliant with Standard SQL. For example, I believe the Standard requires a recursive WITH to be defined as WITH RECURSIVE.

Using your DDL, here's a sample graph.

INSERT INTO Edges (source, destination, lgth) VALUES ('1', '2', 50);
INSERT INTO Edges (source, destination, lgth) VALUES ('1', '3', 30);
INSERT INTO Edges (source, destination, lgth) VALUES ('1', '4', 100);
INSERT INTO Edges (source, destination, lgth) VALUES ('1', '5', 10);
INSERT INTO Edges (source, destination, lgth) VALUES ('3', '2', 5);
INSERT INTO Edges (source, destination, lgth) VALUES ('4', '2', 20);
INSERT INTO Edges (source, destination, lgth) VALUES ('4', '3', 50);
INSERT INTO Edges (source, destination, lgth) VALUES ('5', '4', 10); To find the shortest paths:

CREATE VIEW ShortestPaths (source, destination, path_length) AS
WITH Paths (source, destination, path_length) AS
(
SELECT source, destination, 1
FROM Edges
UNION ALL
SELECT E1.source, P1.destination, P1.path_length + 1 FROM Edges AS E1, Paths AS P1
WHERE E1.destination = P1.source
)
SELECT source, destination, MIN(path_length) FROM Paths
GROUP BY source, destination;

SELECT source, destination, path_length
FROM ShortestPaths
ORDER BY source, destination

source destination path_length

1      2                     1

1      3                     1

1      4                     1

1      5                     1

3      2                     1

4      2                     1

4      3                     1

5      2                     2

5      3                     2

5      4                     1

To find the cheapest paths:

CREATE VIEW CheapestPaths (source, destination, path_cost) AS
WITH Paths (source, destination, path_cost) AS
(
SELECT source, destination, lgth
FROM Edges
UNION ALL
SELECT E1.source, P1.destination, P1.path_cost + E1.lgth FROM Edges AS E1, Paths AS P1
WHERE E1.destination = P1.source
)
SELECT source, destination, MIN(path_cost) FROM Paths
GROUP BY source, destination;

SELECT source, destination, path_cost
FROM CheapestPaths
ORDER BY source, destination;

source destination path_cost

1      2                    35

1      3                    30

1      4                    20

1      5                    10

3      2                     5

4      2                    20

4      3                    50

5      2                    30

5      3                    60

5      4                    10

To find the shortest paths iteratively, a slight fix to your code:

DECLARE old_path_tally INTEGER;
SET old_path_tally = 0;
DELETE FROM Paths; -- clean out working table INSERT INTO Paths SELECT source, destination, 1 FROM Edges; -- load the edges

  • add one edge to each path WHILE old_path_tally < (SELECT COUNT(*) FROM Paths) DO SET old_path_tally = (SELECT COUNT(*) FROM Paths); INSERT INTO Paths (source, destination, lgth) SELECT E1.source, P1.destination, (1 + P1.lgth) FROM Edges AS E1, Paths AS P1 WHERE E1.destination = P1.source AND NOT EXISTS -- path is not here already (SELECT * FROM Paths AS P2 WHERE E1.source = P2.source AND P1.destination = P2.destination); END WHILE;
SELECT source, destination, lgth
FROM Paths
ORDER BY source, destination;

source destination lgth
1 2 1
1 3 1
1 4 1
1 5 1
3 2 1
4 2 1
4 3 1
5 2 2
5 3 2
5 4 1

To find the cheapest paths iteratively:

DECLARE old_path_tally INTEGER;
SET old_path_tally = 0;
DELETE FROM Paths; -- clean out working table INSERT INTO Paths SELECT source, destination, lgth FROM Edges; -- load the edges

  • add one edge to each path WHILE old_path_tally < (SELECT COUNT(*) FROM Paths) DO SET old_path_tally = (SELECT COUNT(*) FROM Paths); INSERT INTO Paths (source, destination, lgth) SELECT E1.source, P1.destination, (E1.lgth + P1.lgth) FROM Edges AS E1 INNER JOIN (SELECT source, destination, MIN(lgth) AS lgth FROM Paths GROUP BY source, destination) AS P1 ON E1.destination = P1.source AND NOT EXISTS (SELECT * FROM Paths AS P2 WHERE E1.source = P2.source AND P1.destination = P2.destination AND P2.lgth <= E1.lgth + P1.lgth); END WHILE;
CREATE VIEW CheapestPathsIterative (source, destination, path_cost) AS
SELECT source, destination, MIN(lgth)
FROM Paths
GROUP BY source, destination;

SELECT source, destination, path_cost
FROM CheapestPathsIterative
ORDER BY source, destination;

source destination path_cost
1 2 35
1 3 30
1 4 20
1 5 10
3 2 5
4 2 20
4 3 50
5 2 30
5 3 60
5 4 10

--
JAG
Received on Thu Dec 02 2004 - 16:15:42 CET

Original text of this message