# Re: Floyd's algorithm in SQL

Date: Thu, 02 Dec 2004 15:15:42 GMT

Message-ID: <ykGrd.18407$Yh2.6979338_at_twister.nyc.rr.com>

"--CELKO--" <jcelko212_at_earthlink.net> wrote in message
news:18c7b3c2.0412011610.3891e0bc_at_posting.google.com...

> I am re-doing the graphs section for the third edition of SQL FOR

*> SMARTIES and would like some feedback and help.
**>
**> Here is a version of Floyd's algorithm to find the transitive closure
**> of a graph. You can also look up Warshall's algorithm, which is an
**> earlier and more specialized version of Floyd's algorithm. Given a
**> directed graph G represented by an adjacency matrix A[i, j], WHERE
**> A[i, j] = 1 if the edge (i, j) is in the graph, compute the matrix P
**> of paths with their lengths. Now put it into SQL:
**>
**> CREATE TABLE Paths
**> (source CHAR(1)NOT NULL,
**> destination CHAR(1) NOT NULL,
**> lgth INTEGER DEFAULT 0 NOT NULL);
**>
**> CREATE TABLE Edges
**> (source CHAR(1)NOT NULL,
**> destination CHAR(1)NOT NULL,
**> PRIMARY KEY (source, destination),
**> lgth INTEGER DEFAULT 1 NOT NULL);
**>
**> INSERT INTO Edges (source, destination) VALUES ('a', 'b');
**> INSERT INTO Edges (source, destination) VALUES ('b', 'c');
**> INSERT INTO Edges (source, destination) VALUES ('c', 'd');
**> INSERT INTO Edges (source, destination) VALUES ('d', 'a');
**> INSERT INTO Edges (source, destination) VALUES ('a', 'e');
**>
**> The algorithm extends each known path by one edge at a time. Since no
**> path can be longer than the number of nodes in the graph, we have an
**> upper bound on iterations. But we can also stop when the INSERT
**> statement adds no new paths.
**>
**> CREATE PROCEDURE Floyd()
**> LANGUAGE SQL
**> DETERMINISTIC
**> BEGIN
**> DECLARE old_path_tally INTEGER;
**> SET old_path_tally = 0;
**> DELETE FROM Paths; -- clean out working table
**> INSERT INTO Paths SELECT * FROM Edges; -- load the edges
**>
**> -- add one edge to each path
**> WHILE old_path_tally < (SELECT COUNT(*) FROM Paths)
**> DO INSERT INTO Paths (source, destination, lgth)
**> SELECT E1.source, P1.destination, (E1.lgth + P1.lgth)
**> FROM Edges AS E1, Paths AS P1
**> WHERE E1.destination = P1.source
**> AND NOT EXISTS -- path is not here already
**> (SELECT *
**> FROM Paths AS P2
**> WHERE E1.source = P2.source
**> AND P1.destination = P2.destination);
**> SET old_path_tally = (SELECT COUNT(*) FROM Paths);
**> END WHILE;
**> END;
**>
**> You could also have used a SQLSTATE code to detect any empty
**> insertion. I am not sure if that would actually be faster or not.
**>
**> SELECT * FROM Paths;
**> source destination lgth
**> =========================
**> a b 1
**> a e 1
**> b c 1
**> c d 1
**> d a 1
**> a c 2
**> b d 2
**> c a 2
**> d b 2
**> d e 2
**> a d 3
**> b a 3
**> c b 3
**> c e 3
**> d c 3
**> a a 4
**> b b 4
**> b e 4
**> c c 4
**> d d 4
**>
**> What I'd like to know is:
**>
**> 1) Can I put Paths in a VIEW defined by a recursive WITH operator? I
**> don't have a copy of a product that implements this SQL-99 feature.
**>
**> 2) Can I modify my SQL-92 code or the SQL-99 to handle the lowest cost
**> problem? That is, each edge has a positive cost; what is the CHEAPEST
**> path from x to y, not the SHORTEST path?
**>
**> There is fame, glory and virtual beer in this puzzle.
*

The following was tested with DB2 so it might not be fully compliant with Standard SQL. For example, I believe the Standard requires a recursive WITH to be defined as WITH RECURSIVE.

Using your DDL, here's a sample graph.

INSERT INTO Edges (source, destination, lgth)
**VALUES ('1', '2', 50);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('1', '3', 30);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('1', '4', 100);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('1', '5', 10);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('3', '2', 5);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('4', '2', 20);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('4', '3', 50);
**

INSERT INTO Edges (source, destination, lgth)
**VALUES ('5', '4', 10);
**
To find the shortest paths:

CREATE VIEW ShortestPaths (source, destination, path_length)
AS

WITH Paths (source, destination, path_length)
AS

(

SELECT source, destination, 1

FROM Edges

**UNION ALL
**

SELECT E1.source, P1.destination, P1.path_length + 1
FROM Edges AS E1, Paths AS P1

WHERE E1.destination = P1.source

)

SELECT source, destination, MIN(path_length)
FROM Paths

GROUP BY source, destination;

SELECT source, destination, path_length

FROM ShortestPaths

ORDER BY source, destination

source destination path_length

1 2 1 1 3 1 1 4 1 1 5 1 3 2 1 4 2 1 4 3 1 5 2 2 5 3 2 5 4 1

To find the cheapest paths:

CREATE VIEW CheapestPaths (source, destination, path_cost)
AS

WITH Paths (source, destination, path_cost)
AS

(

SELECT source, destination, lgth

FROM Edges

**UNION ALL
**

SELECT E1.source, P1.destination, P1.path_cost + E1.lgth
FROM Edges AS E1, Paths AS P1

WHERE E1.destination = P1.source

)

SELECT source, destination, MIN(path_cost)
FROM Paths

GROUP BY source, destination;

SELECT source, destination, path_cost

FROM CheapestPaths

ORDER BY source, destination;

source destination path_cost

1 2 35 1 3 30 1 4 20 1 5 10 3 2 5 4 2 20 4 3 50 5 2 30 5 3 60 5 4 10

To find the shortest paths iteratively, a slight fix to your code:

DECLARE old_path_tally INTEGER;

SET old_path_tally = 0;

DELETE FROM Paths; -- clean out working table
INSERT INTO Paths SELECT source, destination, 1 FROM Edges; -- load the edges

- add one edge to each path
WHILE old_path_tally < (SELECT COUNT(*) FROM Paths)
DO
SET old_path_tally = (SELECT COUNT(*) FROM Paths);
INSERT INTO Paths (source, destination, lgth)
SELECT E1.source, P1.destination, (1 + P1.lgth)
FROM Edges AS E1, Paths AS P1
WHERE E1.destination = P1.source
AND NOT EXISTS -- path is not here already
**(SELECT ***FROM Paths AS P2 WHERE E1.source = P2.source AND P1.destination = P2.destination);**END WHILE;**

FROM Paths

ORDER BY source, destination;

source destination lgth

1 2 1

1 3 1

1 4 1

1 5 1

3 2 1

4 2 1

4 3 1

5 2 2

5 3 2

5 4 1

To find the cheapest paths iteratively:

DECLARE old_path_tally INTEGER;

SET old_path_tally = 0;

DELETE FROM Paths; -- clean out working table
INSERT INTO Paths SELECT source, destination, lgth FROM Edges; -- load the edges

- add one edge to each path
WHILE old_path_tally < (SELECT COUNT(*) FROM Paths)
DO
SET old_path_tally = (SELECT COUNT(*) FROM Paths);
INSERT INTO Paths (source, destination, lgth)
SELECT E1.source, P1.destination, (E1.lgth + P1.lgth)
FROM Edges AS E1
**INNER JOIN**(SELECT source, destination, MIN(lgth) AS lgth FROM Paths GROUP BY source, destination) AS P1 ON E1.destination = P1.source AND**NOT EXISTS (SELECT ***FROM Paths AS P2 WHERE E1.source = P2.source AND P1.destination = P2.destination AND P2.lgth <= E1.lgth + P1.lgth);**END WHILE;**

SELECT source, destination, MIN(lgth)

FROM Paths

GROUP BY source, destination;

SELECT source, destination, path_cost

FROM CheapestPathsIterative

ORDER BY source, destination;

source destination path_cost

1 2 35

1 3 30

1 4 20

1 5 10

3 2 5

4 2 20

4 3 50

5 2 30

5 3 60

5 4 10

-- JAGReceived on Thu Dec 02 2004 - 16:15:42 CET