728x90

시계열 분석에서 불연속적인 상태 변화 데이터에서 각 상태가 유지된 시간을 산출하는 쿼리 

 

예를 들어, 다음과 같은 시계열 데이터에서 각 상태별 지속 시간 총합("결과 데이터")을 산출하고자 한다. 

Time (sec) State
1 Normal
2 Normal
5 Anomaly
6 Anomaly
7 Anomaly
8 Normal
15 Anomaly
16 Anomaly
17 Normal

 

결과 데이터.

State Durations Time List 
Normal 11 1~4, 8~14, 17
Anomlay 5 5~7, 15~16

 

 

테스트 데이터 생성

DROP TABLE IF EXISTS  log_events;

create table log_events
(
   group_id			INT 
,  ts				BIGINT 
,  state            INT    /* 0=normal, 1=anomaly_1, 2=anomaly_2 */
);

TRUNCATE TABLE log_events;

insert into log_events (group_id, ts, state) VALUES
(10001,   1, 0),
(10001,   2, 0),
(10001,   3, 0),
(10001,   4, 0),
-- (10001,   5, 0),
-- (10001,   6, 0),
(10001,   7, 0),
(10001,   8, 0),
(10001,   9, 0),
(10001,  10, 0),
(10001,  11, 1),
(10001,  12, 1),
(10001,  13, 1),
(10001,  14, 1),
(10001,  15, 1),
(10001,  16, 2),
-- (10001,  17, 2),
-- (10001,  18, 2),
(10001,  19, 2),
(10001,  20, 2),
(10001,  21, 2),
(10001,  22, 2),
(10001,  23, 2),
(10001,  24, 0),
(10001,  26, 0),
(10001,  27, 0),
(10001,  28, 0),
(10001,  29, 0),
(10001,  30, 0),
(10001,  31, 1),
(10001,  32, 1),
(10001,  33, 1),
(10001,  34, 1),
(10001,  35, 1),
(10001,  36, 2),
-- (10001,  37, 2),
-- (10001,  38, 2),
(10001,  39, 2),
(10001,  40, 2),
(10001,  41, 0),
(10001,  42, 0),
(10001,  43, 0),
(10001,  44, 0),
(10001,  45, 0),

(10002,   1, 0),
(10002,   2, 0),
(10002,   3, 0),
(10002,   4, 0),
(10002,   5, 0),
(10002,   6, 0),
(10002,   7, 0),
(10002,   8, 0),
-- (10002,   9, 0),
-- (10002,  10, 0),
(10002,  11, 0),
(10002,  12, 0),
(10002,  13, 0),
(10002,  14, 0),
(10002,  15, 0),
(10002,  16, 1),
(10002,  17, 1),
(10002,  18, 1),
-- (10002,  19, 1),
(10002,  20, 1),
(10002,  21, 2),
(10002,  22, 2),
(10002,  23, 2),
(10002,  24, 2),
(10002,  25, 2),
(10002,  26, 0),
(10002,  27, 0),
(10002,  28, 0),
(10002,  29, 0),
(10002,  30, 0);



SELECT * 
FROM log_events
;

산출 쿼리 

-- MySQL Version. 

WITH
cte_src AS
(
  SELECT row_number() over(ORDER BY A.group_id, A.ts, A.state) as rnum
  	,	  A.*
  FROM ( 
          SELECT 	group_id, ts, state 
          FROM 		log_events
          UNION ALL 
          SELECT 	group_id, maX(ts)+1 AS ts , 9 state -- end-marking by group_id 
          FROM   	log_events
          GROUP BY 	group_id
          UNION ALL 
          SELECT 	group_id, 0 AS ts , -1 state  -- start-marking by group_id 
          FROM   	log_events
          GROUP BY 	group_id
        ) A  
),
cte_t1 as (
    select row_number() over(ORDER BY A.group_id, A.ts) as rnum
  		,	A.group_id
    	,	 A.ts 
    	, 	 A.state							AS prev_state
    	, 	 B.state							AS current_state
    	,	 IF(A.state - B.state = 0, 0, 1)   	AS change_flag
    from 	cte_src AS A
    		JOIN 
    		cte_src AS B
    		On( A.group_id = B.group_id AND A.rnum +1 = B.rnum )
    WHERE  IF(A.state - B.state = 0, 0, 1) = 1
),
cte_t2 AS 
(
    SELECT 	A.group_id 
        ,	 A.current_state
        ,	 A.prev_state
        ,	 A.ts + 1		from_ts
        ,    B.ts 			to_ts 
        ,	 B.ts - A.ts    AS durations
    FROM	cte_t1  A 
             JOIN 
             cte_t1 B 
             ON( A.group_id = B.group_id AND A.rnum+1 = B.rnum )
)
SELECT 	group_id
	,	 current_state
	, 	 prev_state 
	,	 sum(durations) 	durations
FROM		cte_t2
GROUP BY group_id, current_state, prev_state 
;

 

다음 사이트에서 테스트 가능합니다. 

SQL OnLine IDE (sqliteonline.com)

728x90
  • 네이버 블러그 공유하기
  • 네이버 밴드에 공유하기
  • 페이스북 공유하기
  • 카카오스토리 공유하기
반응형