Find sequence starts and ends
To find the first and last values of column value sequences in a table like this ...
drop table if exists t;
create table t(id int);
insert into t values(1),(2),(3),(4),(6),(7),(8);
... an exclusion join on the previous sequential value finds the first value of each sequence, and the minimum next value from a left join and an exclusion join on the previous sequential value finds the end of each sequence:
SELECT
a.id AS Start,
MIN( c.id ) AS End
FROM tbl AS a
LEFT JOIN tbl AS b ON a.id = b.id + 1
LEFT JOIN tbl AS c ON a.id <= c.id
LEFT JOIN tbl AS d ON c.id = d.id - 1
WHERE b.id IS NULL
AND c.id IS NOT NULL
AND d.id IS NULL
GROUP BY a.id;
+-------+------+
| Start | End |
+-------+------+
| 1 | 4 |
| 6 | 8 |
+-------+------+
Thanks to Scott Noyes for noticing that a.id<c.id
fails to pick up sequences of 1 followed by skips of 1, but a.id<=c.id
does.
To see how that query works, look at the output of this version of the query with exclusion and aggregation clauses removed:
SELECT a.id AS aid,b.id AS bid, c.id AS c.id, d.di AS did
FROM tbl AS a
LEFT JOIN tbl AS b ON a.id = b.id + 1
LEFT JOIN tbl AS c ON a.id <= c.id
LEFT JOIN tbl AS d ON c.id = d.id - 1
ORDER BY a.id,b.id,c.id,d.id;
+------+------+------+------+
| aid | bid | cid | did |
+------+------+------+------+
| 1 | NULL | 1 | 2 |
| 1 | NULL | 2 | 3 |
| 1 | NULL | 3 | 4 |
| 1 | NULL | 4 | NULL | <-- end of sequence starting with 1
| 1 | NULL | 6 | 7 |
| 1 | NULL | 7 | 8 |
| 1 | NULL | 8 | NULL |
| 2 | 1 | 2 | 3 |
| 2 | 1 | 3 | 4 |
| 2 | 1 | 4 | NULL |
| 2 | 1 | 6 | 7 |
| 2 | 1 | 7 | 8 |
| 2 | 1 | 8 | NULL |
| 3 | 2 | 3 | 4 |
| 3 | 2 | 4 | NULL |
| 3 | 2 | 6 | 7 |
| 3 | 2 | 7 | 8 |
| 3 | 2 | 8 | NULL |
| 4 | 3 | 4 | NULL |
| 4 | 3 | 6 | 7 |
| 4 | 3 | 7 | 8 |
| 4 | 3 | 8 | NULL |
| 6 | NULL | 6 | 7 |
| 6 | NULL | 7 | 8 |
| 6 | NULL | 8 | NULL | <-- end of sequence starting with 6
| 7 | 6 | 7 | 8 |
| 7 | 6 | 8 | NULL |
| 8 | 7 | 8 | NULL |
+------+------+------+------+
A variant of the problem: when some IDs are used and some are not, find blocks of unused IDs:
DROP TABLE IF EXISTS tbl;
CREATE TABLE tbl(id INT,used BOOL);
INSERT INTO tbl VALUES(1,1),(2,0),(3,0),(4,1),(5,0),(6,0);
SELECT a.id AS Start, MIN( c.id ) AS End
FROM tbl AS a
LEFT JOIN tbl AS b ON a.id=b.id + 1 AND a.used=0 AND b.used=0
LEFT JOIN tbl AS c ON a.id<=c.id AND a.used=0 AND c.used=0
LEFT JOIN tbl AS d ON c.id=d.id-1 AND c.used=0 AND d.used=0
WHERE b.id IS NULL
AND c.id IS NOT NULL
AND d.id IS NULL
GROUP BY a.id;
+-------+------+
| Start | End |
+-------+------+
| 2 | 3 |
| 5 | 6 |
+-------+------+
Here's another variation on the pattern from a MySQL forum. You have a history of prescription dose changes ...
DROP TABLE IF EXISTS dose_change;
CREATE TABLE dose_change (
oid INTEGER UNSIGNED PRIMARY KEY AUTO_INCREMENT,
dose_date DATETIME NOT NULL,
dose INTEGER UNSIGNED,
);
INSERT INTO dose_change (dose_date, dose) values
('2025-01-01', 10),('2025-01-02', 10),('2025-01-03', 20),('2025-01-04', 20),
('2025-01-05', 10),('2025-01-06', 10),('2025-01-07', 10),('2025-01-08', NULL),
('2025-01-09', NULL),('2025-01-10', 30),('2025-01-11', 30),('2025-01-12', 30),
('2025-01-13', 10),('2025-01-14', 20),('2025-01-15', 10),('2025-01-16', NULL),
('2025-01-17', 10);
SELECT * FROM dose_change;
+-----+---------------------+------+
| oid | dose_date | dose |
+-----+---------------------+------+
| 1 | 2000-01-01 00:00:00 | 10 |
| 2 | 2000-01-02 00:00:00 | 10 |
| 3 | 2000-01-03 00:00:00 | 20 |
| 4 | 2000-01-04 00:00:00 | 20 |
| 5 | 2000-01-05 00:00:00 | 10 |
| 6 | 2000-01-06 00:00:00 | 10 |
| 7 | 2000-01-07 00:00:00 | 10 |
| 8 | 2000-01-08 00:00:00 | NULL |
| 9 | 2000-01-09 00:00:00 | NULL |
| 10 | 2000-01-10 00:00:00 | 30 |
| 11 | 2000-01-11 00:00:00 | 30 |
| 12 | 2000-01-12 00:00:00 | 30 |
| 13 | 2000-01-13 00:00:00 | 10 |
| 14 | 2000-01-14 00:00:00 | 20 |
| 15 | 2000-01-15 00:00:00 | 10 |
| 16 | 2000-01-16 00:00:00 | NULL |
| 17 | 2000-01-17 00:00:00 | 10 |
+-----+---------------------+------+
... and you want the dosage history:
2000-01-01 - 2000-01-03, 10
2000-01-03 - 2000-01-05, 20
2000-01-05 - 2000-01-08, 10
2000-01-10 - 2000-01-13, 30
2000-01-13 - 2000-01-14, 10
2000-01-14 - 2000-01-15, 20
2000-01-15 - 2000-01-16, 10
2000-01-17 - null , 10
Forum contributor "laptop alias" posted this solution:
SELECT a.dose_date AS start
, MIN(DATE(c.dose_date)) + INTERVAL 1 DAY AS end
, a.dose
FROM
( SELECT x.dose_date, x.dose, COUNT(*) id
FROM dose_change x
JOIN dose_change y
ON y.dose_date <= x.dose_date
GROUP BY x.oid
) AS a
LEFT JOIN
( SELECT x.dose_date, x.dose, COUNT(*) id
FROM dose_change x
JOIN dose_change y
ON y.dose_date <= x.dose_date
GROUP BY x.oid
) AS b ON a.id = b.id + 1 AND b.dose = a.dose
LEFT JOIN
( SELECT x.dose_date, x.dose, COUNT(*) id
FROM dose_change x
JOIN dose_change y
ON y.dose_date <= x.dose_date
GROUP BY x.oid
) AS c ON a.id <= c.id AND c.dose = a.dose
LEFT JOIN
( SELECT x.dose_date, x.dose, COUNT(*) id
FROM dose_change x
JOIN dose_change y
ON y.dose_date <= x.dose_date
GROUP BY x.oid
) AS d ON c.id = d.id - 1 AND d.dose = c.dose
WHERE b.id IS NULL AND c.id IS NOT NULL AND d.id IS NULL
GROUP BY start;
+---------------------+------------+------+
| start | end | dose |
+---------------------+------------+------+
| 2000-01-01 00:00:00 | 2000-01-03 | 10 |
| 2000-01-03 00:00:00 | 2000-01-05 | 20 |
| 2000-01-05 00:00:00 | 2000-01-08 | 10 |
| 2000-01-10 00:00:00 | 2000-01-13 | 30 |
| 2000-01-13 00:00:00 | 2000-01-14 | 10 |
| 2000-01-14 00:00:00 | 2000-01-15 | 20 |
| 2000-01-15 00:00:00 | 2000-01-16 | 10 |
| 2000-01-17 00:00:00 | 2000-01-18 | 10 |
+---------------------+------------+------+
Tom Melly found this simpler but slower solution:
SELECT
a.dose_date AS StartDate,
a.dose AS Dose,
( SELECT b.dose_date
FROM dose_change AS b
WHERE b.dose_date > a.dose_date AND (b.dose <> a.dose OR b.dose IS NULL)
ORDER BY b.dose_date ASC LIMIT 1
) AS StopDate
FROM dose_change AS a
WHERE Coalesce(
(SELECT c.dose
FROM dose_change AS c
WHERE c.dose_date <= a.dose_date
ORDER BY c.dose_date DESC LIMIT 1,1
), -99999
) <> a.dose
AND a.dose IS NOT NULL
ORDER BY a.dose_date ASC;