Eliminate Overlapping Dates
I was looking for an efficient way to eliminate overlapping days when provided with a historical table that provided events that could overlap. In my case, I had dates show the range of a process. However, the multiple start and end dates could overlap, and even run concurrently. To eliminate double counting the days the process truly was in play I needed a way to find eliminate the overlap, and eliminate duplicate days when running in parallel. I researched ways to complete this and found the solution through this post. Solutions to Packing Date and Time Intervals Puzzle
Itzik provided an excellent solution, though I had to take time to digest. The only problem I ran into, was his solution was focused on a single user and dates. For my purposes, I need to evaluate an account and further break it down by overlap on a particular process. Grateful for SQL MVP's contributions to the community as this was a brain bender!
if object_id('tempdb..#Awesome') is not null | |
drop table #awesome; | |
create table #awesome | |
( | |
unique_join_k int | |
,process varchar(10) | |
,start_date date | |
,end_date date | |
,grouper_id as convert(uniqueidentifier, hashbytes('sha1', | |
isnull(cast(unique_join_k as varchar(max)), '|') | |
+ '|' + isnull(cast(process as varchar(max)), '|') | |
)) | |
) | |
insert into #awesome | |
select | |
* | |
from | |
( | |
values | |
(1000, 'process 1', cast('20131006' as date), cast('20131213' as date)), | |
(1000, 'process 1', cast('20131225' as date), cast('20140215' as date)), | |
(1000, 'process 1', cast('20140123' as date), cast('20140215' as date)), | |
(1000, 'process 2', cast('20140328' as date), cast('20140617' as date)), | |
(1001, 'process 1', cast('20130214' as date), cast('20130421' as date)), | |
(1001, 'process 1', cast('20130827' as date), cast('20130926' as date)), | |
(1001, 'process 1', cast('20131211' as date), cast('20131221' as date)), | |
(1001, 'process 1', cast('20140227' as date), cast('20140405' as date)), | |
(1002, 'process 1', cast('20130518' as date), cast('20130622' as date)), | |
(1002, 'process 2', cast('20130527' as date), cast('20130618' as date)), | |
(1003, 'process 1', cast('20130312' as date), cast('20130428' as date)), | |
(1003, 'process 1', cast('20130510' as date), cast('20130614' as date)), | |
(1003, 'process 1', cast('20130725' as date), cast('20131115' as date)), | |
(1003, 'process 1', cast('20131010' as date), cast('20131014' as date)), | |
(1003, 'process 2', cast('20140413' as date), cast('20140614' as date)), | |
(1004, 'process 1', cast('20130116' as date), cast('20130425' as date)), | |
(1004, 'process 1', cast('20130828' as date), cast('20130926' as date)), | |
(1004, 'process 1', cast('20140101' as date), cast('20140210' as date)), | |
(1004, 'process 1', cast('20140312' as date), cast('20140408' as date)), | |
(1004, 'process 2', cast('20140513' as date), cast('20140806' as date)), | |
(1004, 'process 2', cast('20140515' as date), cast('20140818' as date)), | |
(1005, 'process 1', cast('20130606' as date), cast('20130724' as date)), | |
(1005, 'process 2', cast('20131125' as date), cast('20140217' as date)), | |
(1005, 'process 3', cast('20131220' as date), cast('20140131' as date)), | |
(1005, 'process 3', cast('20140503' as date), cast('20140605' as date))) as vtable (unique_join_k, process, start_date, end_date); | |
select | |
* | |
from | |
#awesome as a | |
order by | |
a.unique_join_k | |
,a.[start_date]; | |
with c1 as -- let e = end ordinals, let s = start ordinals | |
( | |
select | |
grouper_id | |
,unique_join_k | |
,time_start = start_date | |
,is_start = 1 | |
,sqid_end = null | |
,sqid_start = row_number() over (partition by grouper_id order by start_date) | |
from | |
#awesome cs | |
union all | |
select | |
grouper_id | |
,unique_join_k | |
,time_start = end_date | |
,is_start = 0 | |
,sqid_end = row_number() over (partition by grouper_id order by end_date) | |
,sqid_start = null | |
from | |
#awesome cs | |
), | |
c2 as (-- let se = start or end ordinal, namely, how many events (start or end) happened so far | |
select | |
c1.* | |
,sqid_all_is_starts = row_number() over (partition by grouper_id order by time_start, is_start desc) | |
from | |
c1 | |
), --select * from c2 | |
c3 as ( | |
/******************************************************* | |
For start events, the expression sqid_start - (sqid_all_is_starts - sqid_start) - 1 represents how many sessions were active | |
just before the current (hence - 1) | |
For end events, the expression (sqid_all_is_starts - sqid_end) - sqid_end ) represents how many sessions are active | |
right after this one | |
The above two expressions are 0 exactly when a group of packed intervals | |
either starts or ends, respectively | |
After filtering only events when a group of packed intervals either starts or ends, | |
group each pair of adjacent start/end events | |
*******************************************************/ | |
select | |
grouper_id | |
,unique_join_k | |
,time_start | |
,date_grouping = floor((row_number() over (partition by grouper_id order by time_start) - 1) / 2 + 1) | |
from | |
c2 | |
where | |
coalesce(sqid_start - (sqid_all_is_starts - sqid_start) - 1, | |
(sqid_all_is_starts - sqid_end) - sqid_end) = 0 | |
) -- select * from c3 | |
/******************************************************* | |
OVERLAPPING DATES ON SAME GROUP RESOLVED IN DISTINCT RANGES | |
*******************************************************/ | |
, | |
overlappedgone as ( | |
select | |
* | |
,sqid = dense_rank() over (partition by grouper_id, date_grouping order by time_start asc) | |
from | |
c3 | |
) | |
select | |
unique_join_k_start = og1.unique_join_k | |
,unique_join_k_end = og2.unique_join_k | |
,calc_start_date = og1.time_start | |
,calc_end_date = og2.time_start | |
,unique_non_overlapping_days = datediff(day, og1.time_start, og2.time_start) | |
,grouper_id = og1.grouper_id | |
from | |
overlappedgone og1 | |
inner join overlappedgone og2 | |
on og1.date_grouping = og2.date_grouping | |
and og1.grouper_id = og2.grouper_id | |
and og2.sqid = 2 | |
where | |
og1.sqid = 1 | |
order by | |
og1.unique_join_k | |
,calc_start_date |