Re: I have the data and I have the result, I just don't have the query.
Date: Wed, 26 Nov 2003 06:44:59 GMT
Message-ID: <LZXwb.283294$pT1.15735_at_twister.nyc.rr.com>
"Diego Berge" <dberge_at_privacy.net> wrote in message
news:bpshfo$1s6t46$1_at_ID-186969.news.uni-berlin.de...
> Hi there,
>
> I'm scratching my head over the following problem.
>
> Suppose I have a table of events, which specifies event type, start
> and end times, like so:
>
> CREATE TABLE events (
> event_type INT NOT NULL REFERENCES types (event_type),
> dt_start DATETIME NOT NULL,
> dt_end DATETIME, -- NULL means event goes on forever
> KEY (event_type) -- event_type is not unique
> );
>
> Example:
> +------------+------------+------------+
> | event_type | dt_start | dt_end |
> +------------+------------+------------+
> | 20 | 2002-06-24 | 2005-06-24 |
> | 24 | 2001-02-12 | NULL |
> | 25 | 2002-04-01 | 2005-04-01 |
> | 25 | 2003-05-24 | 2006-05-24 |
> | 26 | 2003-05-24 | 2004-05-24 |
> | 31 | 2003-05-24 | NULL |
> | 40 | 2003-05-24 | NULL |
> | 42 | 2001-08-30 | 2006-08-30 |
> +------------+------------+------------+
>
> Then I have a table of "conditions", a condition being defined as a
> number of events occurring simultaneously:
>
> CREATE TABLE conditions (
> condition_id INT NOT NULL,
> event_type INT NOT NULL REFERENCES types (event_type),
> PRIMARY KEY (condition_id, event_type)
> );
>
> Example:
> +--------------+------------+
> | condition_id | event_type |
> +--------------+------------+
> | 6 | 20 |
> | 6 | 26 |
> | 6 | 42 |
> +--------------+------------+
>
> From that information, I should be able to determine the *times*
> during which a condition is present. So, in the example above, I should
> be able to say that condition 6 is present between 2003-05-24 and
> 2004-05-24, as those are the dates during which all of events 20, 26,
> and 42 are occurring. One must keep in mind that conditions could be
> present at various discontinuous points in time, and that two or more
> events of the same type could be present simultaneously (e.g., 25 above).
>
> Now, I don't seem to be able to come up with an adequate SQL
> statement that will give me the above information. I was trying to get
> something such as:
>
> +--------------+------------+------------+
> | condition_id | dt_start | dt_end |
> +--------------+------------+------------+
> | 6 | 2003-05-24 | 2004-05-24 |
> +--------------+------------+------------+
>
> I would very much appreciate any assistance that you could offer on
> this one. TIA.
>
> Regards,
> Diego Berge.
> --
> e-mail address invalid, please reply to newsgroups.
>
Here's a solution with an example that follows. I've written the solution
in a modular piecemeal fashion using views for clarity.
CREATE TABLE Events
event_type INT NOT NULL,
dt_start DATETIME NOT NULL,
dt_end DATETIME NULL,
CHECK (dt_end >= dt_start OR dt_end IS NULL),
UNIQUE (event_type, dt_start, dt_end)
)
CREATE TABLE Conditions
(
condition_id INT NOT NULL,
event_type INT NOT NULL,
PRIMARY KEY (condition_id, event_type)
)
- For all events in a given event type, return all unique endpoints CREATE VIEW EventTypeEndpoints (event_type, dt_endpoint) AS SELECT event_type, dt_start FROM Events UNION SELECT event_type, dt_end FROM Events
- For all events in a given condition, return all unique endpoints CREATE VIEW ConditionEndpoints (condition_id, dt_endpoint) AS SELECT DISTINCT C.condition_id, E.dt_endpoint FROM Conditions AS C INNER JOIN EventTypeEndpoints AS E ON C.event_type = E.event_type
- For all unique endpoints in a given condition, pair consecutive endpoints CREATE VIEW ConditionConsecutiveEndpoints (condition_id, dt_endpoint, dt_next_endpoint) AS SELECT E1.condition_id, E1.dt_endpoint, E2.dt_endpoint FROM ConditionEndPoints AS E1 INNER JOIN ConditionEndPoints AS E2 ON E1.condition_id = E2.condition_id AND ((E1.dt_endpoint < E2.dt_endpoint AND NOT EXISTS (SELECT * FROM ConditionEndPoints AS E3 WHERE E3.condition_id = E1.condition_id AND E3.dt_endpoint > E1.dt_endpoint AND E3.dt_endpoint < E2.dt_endpoint)) OR (E2.dt_endpoint IS NULL AND NOT EXISTS (SELECT * FROM ConditionEndPoints AS E3 WHERE E3.condition_id = E1.condition_id AND E3.dt_endpoint > E1.dt_endpoint)))
- For a given condition, all periods that overlap every event type of
- that condition. This is the intersection of the event types of a
- condition. CREATE VIEW ConditionIntersectionUnsimplified (condition_id, dt_start, dt_end) AS SELECT CE.condition_id, CE.dt_endpoint, CE.dt_next_endpoint FROM Conditions AS C INNER JOIN ConditionConsecutiveEndpoints AS CE ON CE.condition_id = C.condition_id INNER JOIN Events AS E ON C.event_type = E.event_type AND CE.dt_endpoint >= E.dt_start AND (CE.dt_endpoint <= E.dt_end OR E.dt_end IS NULL) AND CE.dt_next_endpoint >= E.dt_start AND (CE.dt_next_endpoint <= E.dt_end OR E.dt_end IS NULL) GROUP BY CE.condition_id, CE.dt_endpoint, CE.dt_next_endpoint HAVING COUNT(DISTINCT E.event_type) = (SELECT COUNT(*) FROM Conditions WHERE condition_id = CE.condition_id)
CREATE VIEW ConditionIntersectionUnsimplifiedStartsBefore
(condition_id, dt_start_before, dt_end_before, dt_start_after, dt_end_after)
AS
SELECT I1.condition_id,
I1.dt_start, I1.dt_end, I2.dt_start, I2.dt_end FROM ConditionIntersectionUnsimplified AS I1 INNER JOIN ConditionIntersectionUnsimplified AS I2 ON I1.condition_id = I2.condition_id AND I1.dt_start < I2.dt_start
CREATE VIEW ConditionIntersectionUnsimplifiedGaps
(condition_id, dt_start, dt_end)
AS
SELECT condition_id, MAX(dt_end_before), dt_start_after
FROM ConditionIntersectionUnsimplifiedStartsBefore
GROUP BY condition_id, dt_start_after
HAVING MAX(dt_end_before) < dt_start_after AND
COUNT(dt_end_before) = COUNT(*)
- Take intersection and simplify CREATE VIEW ConditionIntersection (condition_id, dt_start, dt_end) AS SELECT I.condition_id, MIN(I.dt_start), CASE WHEN COUNT(I.dt_end) = COUNT(*) THEN COALESCE(I.dt_start_gap, MAX(I.dt_end)) ELSE NULL END FROM (SELECT I.condition_id, I.dt_start, I.dt_end, MIN(G.dt_start) AS dt_start_gap FROM ConditionIntersectionUnsimplified AS I LEFT OUTER JOIN ConditionIntersectionUnsimplifiedGaps AS G ON I.condition_id = G.condition_id AND I.dt_end <= G.dt_start GROUP BY I.condition_id, I.dt_start, I.dt_end) AS I GROUP BY I.condition_id, I.dt_start_gap
- Example
INSERT INTO Conditions (condition_id, event_type)
VALUES (1, 1)
INSERT INTO Conditions (condition_id, event_type)
VALUES (1, 2)
INSERT INTO Events (event_type, dt_start, dt_end)
VALUES (1, '20020624', '20050624')
INSERT INTO Events (event_type, dt_start, dt_end)
VALUES (1, '20030524', NULL)
INSERT INTO Events (event_type, dt_start, dt_end)
VALUES (2, '20030101', '20040501')
INSERT INTO Events (event_type, dt_start, dt_end)
VALUES (2, '20040101', '20070501')
INSERT INTO Events (event_type, dt_start, dt_end)
VALUES (2, '20080101', '20090101')
SELECT condition_id, dt_start, dt_end
FROM ConditionIntersection
ORDER BY condition_id, dt_start, dt_end
condition_id dt_start dt_end
1 2003-01-01 00:00:00.000 2007-05-01 00:00:00.000
1 2008-01-01 00:00:00.000 2009-01-01 00:00:00.000
Regards,
jag
Received on Wed Nov 26 2003 - 07:44:59 CET