数据分析:SQL和Python

阿里云国内75折 回扣 微信号:monov8
阿里云国际,腾讯云国际,低至75折。AWS 93折 免费开户实名账号 代冲值 优惠多多 微信号:monov8 飞机:@monov6

SQL

统计数据概况计算样本总数、商家总数、用户总数、消费总数、领券总数等

select
    count(User_id) as '样本总数',
    count(distinct Merchant_id) as '商家总数',
    count(distinct User_id) as '用户总数',
    count(Date) as '消费总数',
    count(Date_received) as '领券总数',
    (select count(*) from ddm.offline_train as a where a.Date_received is not null and a.Date is not null) as '领券消费总数',
    (select count(*) from ddm.offline_train as a where a.Date_received is null and a.Date is not null) as '无券消费总数',
    (select count(*) from ddm.offline_train as a where a.Date_received is not null and a.Date is not null)/count(Date_received) as '核销率'
from ddm.offline_train

统计不同距离下领券人数、用券消费人数、核销率

# 查找各距离的领券人数/用券消费人数/核销率
select
    Distance,
    count(Coupon_id) as get_coupons_num,
    sum(if(Date_received is not null and Date is not null,1,0)) as user_coupons_num,
    sum(if(Date_received is not null and Date is not null,1,0)) /count(Coupon_id) as use_coupons_rate
from ddm.offline_train
where Distance is not null
group by Distance
order by distance

 消费券使用情况占比

# 消费券使用情况占比
with temp as (
    select
        case
            when Date_received is not null and Date is not null then '有券消费'
            when Date_received is not null and Date is null then '有券未消费'
            when Date_received is null and Date is not null then '无券消费'
        end as flag
    from ddm.offline_train
)
select
    flag as '优惠券使用情况',
    concat(round(count(flag)/(select count(*) from temp)*100,2),'%') as '百分比'
from temp
group by flag
order by count(flag)/(select count(*) from temp)

 

 with as 也叫做子查询部分类似于一个视图或临时表可以用来存储一部分的sql语句查询结果必须和其他的查询语句一起使用且中间不能有分号目前在oracle、sql server、hive等均支持 with as 用法但 mysql并不支持

不同类型优惠券的核销情况和平均领取距离

# 不同优惠券类型的核销情况和平均领取距离
select
    Discount_rate as '折扣',
    avg(Distance) as '平均距离',
    count(Date_received) as '领券人数',
    sum(if(Date_received is not null and Date is not null,1,0)) as '有券消费人数',
    sum(if(Date_received is not null and Date is not null,1,0))/count(Date_received) as '核销率'
from ddm.offline_train
where Date_received is not null
group by Discount_rate
order by '有券消费人数' desc

不同满减门槛的核销情况

# 不同满减门槛的核销情况
select
    mk as '门槛',
    count(*) as '领券数量',
    sum(if(Date is not null,1,0)) as '用券消费数量',
    concat(round(sum(if(Date is not null,1,0))/count(*)*100,2),'%') as '核销率'
from(select
         DATE,
         convert(if(Discount_rate like '%.%',0,Discount_rate),signed) as mk
    from ddm.offline_train) as aa
where mk is not null
group by mk
order by mk

不同核销率的商家分布情况占比

# 不同核销率用户分布
with temp as (
    select
        Merchant_id,
        count(Date_received) as get_num,
        sum(if(Date is not null and Date_received is not null,1,0)) as use_num,
        sum(if(Date is not null and Date_received is not null,1,0))/count(Date_received) as Merchant_rate
    from ddm.offline_train
    where Date_received is not null
    group by Merchant_id
)
 
 
select
    tag,
    concat(round(count(*)/(select count(*) from temp)*100,2),'%') as Merchant_percent
from(
    select
        Merchant_id,
        case
            when Merchant_rate = 0 then '核销率0'
            when Merchant_rate > 0 and Merchant_rate < 0.2 then '核销率0-20%'
            when Merchant_rate >= 0.2 and Merchant_rate< 0.3 then '核销率20%-30%'
            when Merchant_rate >= 0.3 and Merchant_rate< 0.5 then '核销率30%-50%'
            when Merchant_rate >= 0.5 then '核销率50%以上'
        end as tag
    from temp
    )aa
group by tag
order by Merchant_percent desc

不同领券次数商家的分布情况平均核销率/占比 

# 不同领券次数用户分布-平均核销率/占比
with temp as (
    select
        Merchant_id,
        count(Date_received) as get_num,
        sum(if(Date is not null and Date_received is not null,1,0))/count(Date_received) as user_rate,
        sum(if(Date is not null and Date_received is not null,1,0)) as use_num,
        case
            when count(Date_received)>100 then '100次以上'
            when count(Date_received)=0 then '0次'
            when count(Date_received) between 1 and 10 then '1-10次'
            when count(Date_received) between 11 and 50 then '11-50次'
            when count(Date_received) between 51 and 100 then '51-100次'
            else '其他次'
        end as flag
    from ddm.offline_train
    group by Merchant_id
)
 
select
    flag as '被领券次数',
    concat(round(avg(user_rate)*100,2),'%') as Merchant_avg_use_rate,
    concat(round(count(*)/(select count(*) from temp)*100,2),'%') as Merchant_percent
from temp
group by flag
order by (count(*)/(select count(*) from temp)) desc

不同核销率用户分布占比

# 不同核销率用户分布
with temp as (
    select
        User_id,
        count(Date_received) as get_num,
        sum(if(Date is not null and Date_received is not null,1,0)) as use_num,
        sum(if(Date is not null and Date_received is not null,1,0))/count(Date_received) as user_rate
    from ddm.offline_train
    where Date_received is not null
    group by User_id
)
 
 
select
    tag,
    concat(round(count(*)/(select count(*) from temp)*100,2),'%') as user_percent
from(
    select
        User_id,
        case
            when user_rate = 0 then '核销率0'
            when user_rate > 0 and user_rate < 0.3 then '核销率0-30%'
            when user_rate >= 0.3 and user_rate< 0.5 then '核销率30%-50%'
            when user_rate >= 0.5 then '核销率50%以上'
        end as tag
    from temp
    )aa
group by tag
order by user_percent desc

不同月份优惠券领券次数/核销次数/核销率

# 不同月份领券次数/核销次数/核销率
select
    `month`,
    coupons_get_num,
    coupons_use_num,
    concat(round(coupons_use_num/coupons_get_num*100,2),'%') as coupons_use_rate
from(select
        month(Date_received) as `month`,
        count(*) as coupons_get_num
    from ddm.offline_train
    where Date_received is not null
    group by month(Date_received)) as a
inner join(
    select
        month(Date) as `month`,
        count(*) as coupons_use_num
    from ddm.offline_train
    where Date_received is not null and Date is not null
    group by month(Date)
)as b using(`month`)
order by `month`

不同工作日的优惠券平均核销周期、核销率

# 工作日平均核销间隔、核销率
with get_coupons as(
    select
        weekday(Date_received)+1 as coupons_day,
        count(*) as coupons_get_num
    from ddm.offline_train
    where Date_received is not null
    group by weekday(Date_received)+1
),
use_coupons as(
    select
        weekday(Date)+1 as coupons_day,
        count(*) as coupons_use_num,
        round(avg(datediff(Date,Date_received)),2) as use_interval
    from ddm.offline_train
    where Date is not null and Date_received is not null
    group by weekday(Date)+1
)
 
select
    coupons_day,
    use_interval,
    concat(round(coupons_use_num/coupons_get_num*100,2),'%') as coupons_use_rate
from get_coupons
inner join use_coupons using(coupons_day)
order by coupons_day

 Python

库包

  1. pandas: 支持从CSV、JSON、SQL、Microsoft Excel load数据可以对各种维度的数据做归并、再成形、选择还有数据清洗和数据加工特征
  2. numpy存储和操作矩阵、数组计算的包数组计算、逻辑运算、傅立叶变化和图形操作、跟线性代数相关的操作
  3. Matplotlib绘图工具可以绘制线图、散点图、等高线图、条形图、柱状图、3D 图形、甚至是图形动画等。
  4. sklearn它涵盖了分类、回归、聚类、降维、模型选择、数据预处理六大模块降低机器学习实践门槛将复杂的数学计算集成为简单的函数并提供了众多公开数据集和学习案例。

数据预处理

data = pd.read_csv(r"/Users/xll/Documents/服务数据质量/测试数据1.csv")

#返回数据集行和列的元组其中data.shape[0]代表返回行数data.shape[1] 代表返回列数
print(data.shape) 

#返回数据集的所有列名
data.columns

#随机返回样本5行
data.sample(5)

#返回前5行
print(data.head(5))

#返回浮点型和正行字段的均值、最大值、等统计数据
print(data.describe())

#numpy包的这个方法也是可以得到同样的结果
import numpy as np
print(data.describe(include=[np.number]))

 参考

https://blog.csdn.net/twlve/article/details/128609147?spm=1001.2014.3001.5502

https://blog.csdn.net/twlve/article/details/128626526?spm=1001.2014.3001.5502

O2O优惠券核销-数据分析_十二十二呀的博客-CSDN博客_优惠券数据分析

阿里云国内75折 回扣 微信号:monov8
阿里云国际,腾讯云国际,低至75折。AWS 93折 免费开户实名账号 代冲值 优惠多多 微信号:monov8 飞机:@monov6
标签: python