delete from tb where dates between '2017-09-11' and '2017-11-24'; delete from tb where dates='2017-12-04';
查看空值
1
SELECT * FROM tb WHERE user_id IS NULL OR item_id IS NULL OR category_id IS NULL OR behavior_type IS NULL OR timestamps IS NULL OR dates IS NULL OR time_hour IS NULL;
可以看到没有空值
四、构建模型(描述分析)
基于AARRR行为漏斗模型分析用户行为
用户角度
(1)用户总行为漏斗分析
UV、PV、PV/UV
1 2 3 4
SELECT COUNT(DISTINCT user_id) uv, (SELECT COUNT(*) FROM tb WHERE behavior_type = 'pv') pv, (SELECT COUNT(*) FROM tb WHERE behavior_type = 'pv') / COUNT(DISTINCT user_id) as 'pv/uv' FROM tb;
-- 计算复购人数 select count(*) 复购人数 from (select distinct user_id from tb where behavior_type='buy' group by user_id having count(behavior_type)>1) as buy2;
1 2 3
SELECT COUNT(DISTINCT user_id) 购买总人数 FROM tb WHERE behavior_type = 'buy';
select dates, sum(case when behavior_type = 'pv' THEN 1 ELSE 0 END) pv_count, sum(case when behavior_type = 'buy' THEN 1 ELSE 0 END) buy_count, sum(case when behavior_type = 'cart' THEN 1 ELSE 0 END) cart_count, sum(case when behavior_type = 'fav' THEN 1 ELSE 0 END) fav_count from tb group by dates order by dates;
(2)分析一天内用户每小时的行为
1 2 3 4 5 6 7 8
SELECT time_hour as hours, SUM(case WHEN behavior_type = 'pv' THEN 1 ELSE 0 END) pv_count, SUM(case WHEN behavior_type = 'buy' THEN 1 ELSE 0 END) buy_count, sum(case WHEN behavior_type = 'cart' THEN 1 ELSE 0 END) cart_count, sum(case WHEN behavior_type = 'fav' THEN 1 ELSE 0 END) fav_count FROM tb GROUP BY time_hour ORDER BY time_hour;
不同商品种类的用户行为
(1)热度最高的前20个商品
1 2 3
-- 求商品总数 SELECT COUNT(DISTINCT item_id) FROM tb;
商品共计398972种。
1 2 3 4 5 6
-- 求热度最高的前20个商品 SELECT item_id, COUNT(behavior_type) FROM tb GROUP BY item_id ORDER BY COUNT(behavior_type) DESC limit 20;
(2)商品被浏览、收藏、购买、加入购物车排行榜
1 2 3 4 5 6 7
# 销量前20位的商品 SELECT item_id,COUNT(*) FROM tb WHERE behavior_type = 'buy' GROUP BY item_id ORDER BY COUNT(*) desc LIMIT 20;
可以看到销量前20位的商品
1 2 3 4 5 6
# 浏览前20的商品 SELECT item_id,COUNT(*) FROM tb WHERE behavior_type = 'fav' GROUP BY item_id ORDER BY COUNT(*) desc LIMIT 20;
1 2 3 4 5 6
-- 收藏前20的商品 SELECT item_id,COUNT(*) FROM tb WHERE behavior_type = 'fav' GROUP BY item_id ORDER BY COUNT(*) desc LIMIT 20;
(3)商品复购分析
a. 商品复购排行榜
1 2 3 4 5 6
SELECT item_id, COUNT(distinct user_id) buy_times FROM tb WHERE behavior_type = 'buy' GROUP BY item_id order by COUNT(user_id) desc limit 20;
-- 统计商品被用户的购买次数 SELECT buy_times,count(item_id) FROM (SELECT item_id, COUNT(user_id) buy_times FROM tb WHERE behavior_type = 'buy' GROUP BY item_id) as t GROUP BY buy_times ORDER BY COUNT(*) DESC;
# 被购买商品总种数 SELECT COUNT(DISTINCT item_id) FROM tb WHERE behavior_type = 'buy';