Skip to main content

常用mysql sql整理

13 min read
# 添加索引
CREATE FULLTEXT INDEX index_name ON table_name (field_to_index);

# 删除索引
DROP INDEX index_name ON table_name;

# 查看索引
SHOW INDEX FROM table_name;

# 硬删除数据库数据后,清除对应文件,从而优化索引效率的
OPTIMIZE TABLE table_name;

# 查看 MySQL 是否支持分表
SHOW VARIABLES LIKE '%partition%';

# 查看当前进程列表
SHOW PROCESSLIST;

# 字符串函数
SELECT LEFT('string', length), RIGHT('string', length), SUBSTR('string', start, length), SUBSTRING_INDEX('string', delimiter, count);

# 将字符串转换为日期类型
SELECT CAST(date AS DATE) AS date FROM table1;

# string转int
SELECT CAST('123' AS SIGNED);

# 聚合函数
SELECT AVG(column_name), MIN(column_name), MAX(column_name), COUNT(column_name), SUM(column_name) FROM table_name;

# 聚合字符串
SELECT GROUP_CONCAT(column_name ORDER BY another_column_name SEPARATOR 'separator');

# 控制流函数if(), ifnull(), when case
SELECT COUNT(IF(channel_type=1, IF(check_status=6,1,0),0)) FROM tougao_record WHERE accept_company_id=100;

# 时间戳按天分组
SELECT DATE_FORMAT(date_entered, "%Y-%m-%d") AS ud, COUNT(id) AS cs FROM ttrss_entries GROUP BY ud;

# 每个月的数据统计
SELECT MONTH(date_entered) AS month, COUNT(id) FROM ttrss_entries WHERE YEAR(date_entered) = 2020 GROUP BY month;

# xxx时间内的数据统计
SELECT COUNT(id) FROM ttrss_feeds WHERE DATE_SUB(CURDATE(), INTERVAL 7 DAY) <= DATE(last_successful_update);

# 怎么获取格式化时间的差值
SELECT TIME_TO_SEC(TIMEDIFF(t2, t1)), TIMESTAMPDIFF(SECOND, t1, t2), UNIX_TIMESTAMP(t2) - UNIX_TIMESTAMP(t1);

# 按中文字母排序
SELECT * FROM tablename WHERE 1=1 ORDER BY CONVERT(name USING gbk) COLLATE gbk_chinese_ci ASC;

# 目前有分数散布在1-100的n组数据,怎么根据统计区间呢?
SELECT ELT(INTERVAL(score, 0, 60, 80, 100), '0-60', '60-80', '80-100') AS score_interval, COUNT(id) FROM student_score GROUP BY score_interval;


# insert into ... on duplicate key update
INSERT INTO test_tbl (id, dr) VALUES (1, 2), (2, 3), (x, y) ON DUPLICATE KEY UPDATE dr=VALUES(dr);

# 批量插入时,如何不插入重复数据?
INSERT IGNORE INTO test_tbl (id, dr) VALUES (1, 2), (2, 3), (x, y);

# 不存在则插入,存在则修改
REPLACE INTO test_tbl (id, dr) VALUES (1, 2), (2, 3), (x, y);

mysql-sql

use1.sql


# 取出数据并显示行号 (假设按p_view倒排序)
# 用到了ifnull以及回话变量
select p_name, p_type, p_view, @rownum := @rownum + 1
from products a,
(select @rownum := 0) b
ORDER BY p_view desc;

# SQL也能实现小算法(辅助决策)
# 计算商品评分、及时补货
# 根据分类显示出商品的名称、点击量和销售量情况。没有销售的置为0
# select p_type,a.p_name,a.p_view, IFNULL(b.p_sales,0)
# from products a
# LEFT JOIN products_sales b on a.p_id=b.p_id
# GROUP BY a.p_type,a.p_name order by a.p_type desc, a.p_view desc;


# select a.*,b.sales_avg from (select p_type,a.p_name,a.p_view, IFNULL(b.p_sales,0) as sales from products a LEFT JOIN products_sales b on a.p_id=b.p_id GROUP BY a.p_type,a.p_name order by a.p_type desc, a.p_view desc ) a ,( select p_type,round(sum(sales)/count(*),0) as sales_avg from ( select p_type, a.p_name,a.p_view, IFNULL(b.p_sales,0) as sales from products a LEFT JOIN products_sales b on a.p_id=b.p_id GROUP BY a.p_type,a.p_name order by a.p_type desc, a.p_view desc ) cGROUP BY p_type)bwhere a.p_type=b.p_type


# 商品的分类点击量平均值
# select p_type,round(sum(p_view)/count(*),0) as view_avg from products GROUP BY p_type


# select p_type,p_name, (p_view/view_avg)*0.3+(sales/sales_avg)*0.7 from (select a.*,b.sales_avg,c. view_avg from (select p_type,a.p_name,a.p_view, IFNULL(b.p_sales,0) as sales from products a LEFT JOIN products_sales b on a.p_id=b.p_id GROUP BY a.p_type,a.p_name order by a.p_type desc, a.p_view desc ) a ,( select p_type,round(sum(sales)/count(*),0) as sales_avg from ( select p_type, a.p_name,a.p_view, IFNULL(b.p_sales,0) as sales from products a LEFT JOIN products_sales b on a.p_id=b.p_id GROUP BY a.p_type,a.p_name order by a.p_type desc, a.p_view desc ) cwhere c.sales>0GROUP BY p_type)b,(select p_type,round(sum(p_view)/count(*),0) as view_avg from products GROUP BY p_type )cwhere a.p_type=b.p_type and a.p_type=c.p_type ) aa


# 1、譬如我们要做个用户激励注册 (或者类似"传销系统" ~_~)
# 2、用户注册时可以填 “推荐人”
# 于是我们很可能会有个需求: 查询出,注册用户的推荐人。(说不定要给奖励或分成哦~~~~)

select a.users, b.u_name
from (select GROUP_CONCAT(u_name) as users, p_id from webusers GROUP BY p_id) a
INNER JOIN webusers b on a.p_id = b.u_id;

# 找到重复数据、并删除之

select r_content, r_userid, count(*)
from reviews
GROUP BY r_content, r_userid;

select r_content, r_userid, count(*)
from reviews
GROUP BY r_content, r_userid
HAVING count(*) > 1;

select GROUP_CONCAT(r_id) as ids, r_content, r_userid, count(*)
from reviews
GROUP BY r_content, r_userid
HAVING count(*) > 1;


# 有重复数据不插入或更新的处理方法

# insert into users(user_name,user_qq) values('shenyi','123123') on DUPLICATE key update user_updatetime=now(),user_qq=values(user_qq);

# insert into news(news_title,news_abstract,news_code)VALUES('这是一条PHP新闻','PHP新闻相关的新闻摘要',MD5(CONCAT('这是一条PHP新闻','PHP新闻相关的新闻摘要')))on DUPLICATE key update dupnum=dupnum+1


# 更新数据技巧之:update表子查询、多条件判断

# 系统运行了一段时间后,好多用户有爱的消费了
# 我们需要在表中对用户进行等级更新,我们的需求是
# 1、只对超过平均消费金额的用户进行等级升级
# 2、达到平均消费金额 1倍的用户 等级是 白金用户
# 3、2倍或以上的是黄金用户
# 4、其他一律是吃瓜用户

update
user_level,
(select avg(user_total) as avg from user_level) b
set user_rank=
case
when round(user_total / avg) >= 1 and round(user_total / avg) < 2
then '白金用户'
when round(user_total / avg) >= 2 then '黄金用户'

ELSE '吃瓜' end
where user_total >= b.avg;


# 利用order by 实现"排名作弊"

# 常规做法
select *
from (select * from user_level where id in (2, 4, 6) order by id desc) a
union
select *
from (select * from user_level where id not in (2, 4, 6) order by id desc) b;

# 优化做法
select *
from user_level
order by id in (4, 6, 2) and id <> 2 desc, user_total desc;

# 等价于下面的sql
select *
from user_level
order by field(id, 4, 6, 2) and id <> 2 desc, user_total desc;


### 场景案例:获取连续签到X天用户列表
# 统计出连续签到X天的用户。然后对他们进行排行或者假模假样的奖励
select user_name,
sign_date,
IF(@pre = user_name and DATEDIFF(sign_date, @pre_date) = 1, @rownum := @rownum + 1,
@rownum := 1),
@pre := user_name,
@pre_date := sign_date
from (select user_name, sign_date
from user_sign
GROUP BY user_name, sign_date
ORDER BY user_name, sign_date) a,
(select @pre := '', @rownum := 0, @pre_date := '') b;



### 场景案例:子查询去重、获取商品分类最新销售情况

# 这类统计在项目后台经常出现。属于统计后二次加工统计
# 其实这是统计出 某个商品在某个日期(天)内的销售总数,所以肯定有重复。
# 需求是:查询出 图书和食品两个分类(或多个分类) 在最新的一天内的商品销售情况

select a.*
from prod_sales a
INNER JOIN(select prod_class, max(sales_date) as sn
from prod_sales
GROUP BY prod_class) b
on a.prod_class = b.prod_class and a.sales_date = b.sn
order by prod_class;


### 场景案例:多表关联update(用户积分奖励)

# 譬如我们 规定 在 某2天内,凡是消费的客户给予 消费金额的10%作为积分奖励。
# 以最大的一天为准。不累加 下面的表是为了演示方便,实际中我们很可能要通过关联订单表等等才能产生这种记录

update users_score a
INNER JOIN (select max(paymoney) as mp, user_name
from users_buy
group by user_name) b on a.user_name = b.user_name
set a.user_score= a.user_score + (b.mp * 0.1);

use2.sql

### 获取指定日期的活跃人数
select count(*), DATE_FORMAT(logintime, '%Y-%m-%d')
from user_login
where DATE_FORMAT(logintime, '%Y-%m-%d') = '2019-04-01';

select count(*), DATE_FORMAT(logintime, '%Y-%m-%d')
from user_login
where DATEDIFF(logintime, '2019-04-01') = 0;

# explain查看索引使用情况
# system>const>eq_ref>ref> fulltext > ref_or_null > index_merge > unique_subquery > index_subquery >range>index>ALL
# 更好的做法
explain
select count(*), DATE_FORMAT(logintime, '%Y-%m-%d')
from user_login
where logintime >= '2019-04-01'
and logintime < '2019-04-02';


### 统计指定日期注册新用户 的次日登录人数

# 譬如 2019-03-05 .我们要算出
# 1、 2019-03-05 注册的用户
# 2、次日(也就是2019-03-06)登录的用户

select count(DISTINCT userid) as c1
from user_login
where logintime >= '2019-03-06'
and logintime < '2019-03-07'
and userid in (select userid
from user_info
where addtime >= '2019-03-05'
and addtime < '2019-03-06');


### 统计指定日期范围内用户的登录总次数、每天登录次数

# 譬如 2019-03-05~~~~~2019-03-10 .注意:一个用户一天内不止登录一次

# 获取总次数
select userid, count(*)
from user_login
where logintime >= '2019-03-05'
and logintime < '2019-03-11'
group by userid;

# 获取每天
select userid, DATE_FORMAT(logintime, '%Y%m%d') as dd, count(*) as cc
from user_login
where logintime >= '2019-03-05'
and logintime < '2019-03-11'
and userid = 'u1001997'
group by userid, DATE_FORMAT(logintime, '%Y%m%d');

# 两个合并一下
select userid, DATE_FORMAT(logintime, '%Y%m%d'), count(*)
from user_login
where logintime >= '2019-03-05'
and logintime < '2019-03-11'
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d');


### 统计指定日期范围内每天都登录的用户(连续x天都登录、加行号)
# 该需求属于上面场景的延伸,所以修改原sql即可

select userid,
d,
IF(@pre = userid, @rownum := @rownum + 1, @rownum := 1) as rownum,
@pre := userid
from (select userid, DATE_FORMAT(logintime, '%Y%m%d') as d
from user_login
where logintime >= '2019-03-05'
and logintime < '2019-03-11'
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d')) a,
(select @pre := '', @rownum := 0) b;


### 活跃用户的定义、模拟日活用户数、7天日活数统计

# 凡是 每天登录超过2次的(>=2) 计入我们的日活用户

# 先获取某一天的日活数 以2019-03-02 为例,我们要获取 该日的登录情况

select userid, DATE_FORMAT(logintime, '%Y%m%d') as d, count(*) as logintcount
from user_login
where logintime >= '2019-03-02'
and logintime < '2019-03-03'
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d');

# 过滤出 登录次数大于等于2的人数
select count(*)
from (select userid, DATE_FORMAT(logintime, '%Y%m%d') as d, count(*) as logintcount
from user_login
where logintime >= '2019-03-02'
and logintime < '2019-03-03'
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d')) a
where a.logintcount >= 2;

# 7天的日活数
select d, count(*)
from (select userid, DATE_FORMAT(logintime, '%Y%m%d') as d, count(*) as logintcount
from user_login
where logintime >= '2019-03-02'
and logintime < DATE_ADD('2019-03-02', INTERVAL 7 day)
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d')) a
where a.logintcount >= 2
GROUP BY d;


### 统计指定日期日活用户的日增长率(一条SQL)

# 求 3.2-----3.8 的日活 日增长率
# 实际统计时,要多算出来2天 (头尾各加一天)

select DATE_ADD(a2.d, INTERVAL 1 day), a2.dau2, dau2 / dau1
from (select a1.*,
if(@pre = '', 0, @pre) as dau2,
@pre := dau1,
@rownum := @rownum + 1 as rownum
from (select d, count(*) as dau1
from (select userid,
DATE_FORMAT(logintime, '%Y%m%d') as d,
count(*) as logintcount
from user_login
where logintime >= '2019-03-01'
and logintime < DATE_ADD('2019-03-02', INTERVAL 8 day)
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d')) a
where a.logintcount >= 2
GROUP BY d
order by d desc) a1,
(select @pre := '', @rownum := 0) b1) a2
where a2.rownum > 2;


### 统计月活人数、日活缓存表的使用

# 所谓的月活就是把日活的人数累加就可以了

select DATE_FORMAT(d, '%Y%m'), sum(dau1)
from (select d, count(*) as dau1
from (select userid, DATE_FORMAT(logintime, '%Y%m%d') as d, count(*) as logintcount
from user_login
where logintime >= '2019-01-01'
and logintime < '2019-04-01'
GROUP BY userid, DATE_FORMAT(logintime, '%Y%m%d')) a
where a.logintcount >= 2
GROUP BY d
order by d desc) a
GROUP BY DATE_FORMAT(d, '%Y%m');

### 留存率、指定日期的次日留存率计算

# 某天进来一批新用户,有的用了一下就再也没回来过,有的用过几次后也离开了,还有些在一定时间段里,能够一直持续活跃使用,这些一直保持活跃的我们称之为留存用户。留存用户占这批次新增用户的比例,就是留存率。

# 次日留存率:(当天新增的用户中,在注册的第2天还登录的用户数)/第一天新增总用户数;
# 第3日留存率:(第一天新增用户中,在注册的第3天还有登录的用户数)/第一天新增总用户数;
# 第7日留存率:(第一天新增的用户中,在注册的第7天还有登录的用户数)/第一天新增总用户数;
# 第30日留存率:(第一天新增的用户中,在注册的第30天还有登录的用户数)/第一天新增总用户数。

# 2019-03-01 的次日留存率
# 分两步
# 1、计算2019-03-01的注册人数
# 2、计算2019-03-02的登陆人数

select logincount / regcount
from (select count(*) as regcount
from user_info
where addtime >= '2019-03-01'
and addtime < '2019-03-02') a,
(select count(*) as logincount
from (select userid, count(*)
from user_login
where logintime >= '2019-03-02'
and logintime < '2019-03-03'
and userid in (select userid
from user_info
where addtime >= '2019-03-01'
and addtime < '2019-03-02')
GROUP BY userid) a) b;



### 留存率、统计指定日期的连续N天的留存率

# 以2019-3-1 作为基准 计算次日、2日、3日。。。。。N日留存率,现在我们假设统计 到 3月8日

select a.ltime, a.logincount / b.regcount
from (select ltime, count(*) as logincount
from (select DATE_FORMAT(logintime, '%Y-%m-%d') as ltime, userid, count(*)
from user_login
where logintime >= '2019-03-02'
and logintime < '2019-03-09'
and userid in (select userid
from user_info
where addtime >= '2019-03-01' and addtime < '2019-03-02')
GROUP BY DATE_FORMAT(logintime, '%Y-%m-%d'), userid) a
group by ltime) a,
(select count(*) as regcount
from user_info
where addtime >= '2019-03-01'
and addtime < '2019-03-02') b;