Carsharing 데이터 분석 3: Cohort Analysis
2019-10-22
Tabluea Visualization: Cohort Chart
Table: dump table
필드 이름 | 유형 |
---|---|
id | INTEGER |
created | TIMESTAMP |
updated | TIMESTAMP |
deleted | TIMESTAMP |
status | INTEGER |
pickup_time | TIMESTAMP |
return_time | TIMESTAMP |
options | INTEGER |
purpose | STRING |
car_id | INTEGER |
exemption_id | STRING |
paycard_id | STRING |
pickup_spot_id | INTEGER |
return_spot_id | INTEGER |
user_id | INTEGER |
전체 대여 횟수 및 이용자 확인
SELECT COUNT(id), COUNT(DISTINCT user_id)
FROM `carsharing-256705.carsharing.dump`
count | user_count | |
---|---|---|
1 | 47011 | 2875 |
Subquery를 이용한 방법
SELECT COUNT(DISTINCT user_id)
FROM `carsharing-256705.carsharing.dump`
WHERE user_id IN (SELECT DISTINCT user_id FROM `carsharing-256705.carsharing.dump` WHERE pickup_time BETWEEN '2018-01-01' AND '2018-01-31')
AND pickup_time BETWEEN '2018-02-01' AND '2018-02-28'
WITH 구문을 이용한 방법
WITH month_1_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-01-01' AND '2018-01-30'
) ,
month_2_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-02-01' AND '2018-02-28'
),
month_3_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-03-01' AND '2018-03-30'
),
month_4_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-04-01' AND '2018-04-30'
),
month_5_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-05-01' AND '2018-05-30'
),
month_6_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-06-01' AND '2018-06-28'
)
SELECT
(SELECT count(*)
FROM month_1_user) AS month_1_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_2_user USING (user_id)) AS month_2_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_3_user USING (user_id)) AS month_3_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_4_user USING (user_id)) AS month_4_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_5_user USING (user_id)) AS month_5_cohort
month_1_cohort | month_2_cohort | month_3_cohort | month_4_cohort | month_5_cohort | |
---|---|---|---|---|---|
1 | 373 | 216 | 200 | 190 | 170 |
Cohort Percentage 구하기
SELECT
ROUND(month_1_cohort / month_1_cohort * 100) AS month_1_pct,
ROUND(month_2_cohort / month_1_cohort * 100) AS month_2_pct,
ROUND(month_3_cohort / month_1_cohort * 100) AS month_3_pct,
ROUND(month_4_cohort / month_1_cohort * 100) AS month_4_pct,
ROUND(month_5_cohort / month_1_cohort * 100) AS month_5_pct
FROM (
WITH month_1_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-01-01' AND '2018-01-30'
) ,
month_2_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-02-01' AND '2018-02-28'
),
month_3_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-03-01' AND '2018-03-30'
),
month_4_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-04-01' AND '2018-04-30'
),
month_5_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-05-01' AND '2018-05-30'
),
month_6_user AS (
SELECT DISTINCT user_id
FROM `carsharing-256705.carsharing.dump`
WHERE date(pickup_time, 'Asia/Seoul') BETWEEN '2018-06-01' AND '2018-06-28'
)
SELECT
(SELECT count(*)
FROM month_1_user) AS month_1_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_2_user USING (user_id)) AS month_2_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_3_user USING (user_id)) AS month_3_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_4_user USING (user_id)) AS month_4_cohort,
(SELECT count(*)
FROM month_1_user
JOIN month_5_user USING (user_id)) AS month_5_cohort
)
month_1_pct | month_2_pct | month_3_pct | month_4_pct | month_5_pct | |
---|---|---|---|---|---|
1 | 100.0 | 58.0 | 54.0 | 51.0 | 46.0 |