-- 测试数据
drop table t;
create table t(
id int auto_increment primary key,
cust_id int,
trx_dte date,
trx_amt decimal(10, 2)
);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-02-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-03-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-04-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10085, '2022-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10085, '2022-02-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-02-05', 13.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-03-05', 33.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-04-05', 53.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-05-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-02-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-04-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-05-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-07-05', 23.56);
-- 递归调用生成 '2021-05-01' 到 '2022-05-30' 的日期表
with recursive Date_Ranges AS
(
select '2021-05-01' as Date
union all
select Date + interval 1 day
from Date_Ranges
where Date < '2022-05-30'
),
-- 生成月份相连,日期相同并且连续三个月的日期数组,例如:["2021-01-05", "2021-02-05", "2021-03-05"]
-- 注意这里限制了日期,因为过了 '2022-03-30' 后,就不再满足数据的过滤条件
date_array_ranges as (
select JSON_ARRAY(date, date_add(date, interval 1 month), date_add(date, interval 2 month)) json_array_trx_dte
from date_ranges
where date <= '2022-03-30'
),
-- 根据 cust_id ,trx_amt 进行聚合,并生成用户相同,金额相同的交易日期 json 数组
cust_trx_amt_dte_array as
(
select cust_id, trx_amt, JSON_ARRAYAGG(trx_dte) json_array_trx_dte
from t
group by cust_id, trx_amt
)
-- 由于是连续三个月,那么交易日期的 json 数组的数量肯定是大于等于 3
-- 在满足上述条件后,进一步判断交易日期 json 数组是否包含连续三个月日期相同的元素
-- 这里用 exists 来判断是为了避免数据重复
select *
from cust_trx_amt_dte_array a
where json_length(a.json_array_trx_dte) >= 3
and exists (select 1
from date_array_ranges b
where json_contains(a.json_array_trx_dte->'$', b.json_array_trx_dte)
);
-- 返回结果
+-----------+---------+----------------------------------------------------------+
| cust_id | trx_amt | json_array_trx_dte |
+-----------+---------+----------------------------------------------------------+
| 10086 | 23.56 | ["2022-01-05", "2022-02-05", "2022-03-05", "2022-04-05"] |
+---------------------+----------------------------------------------------------+
情况 1 和情况 2 都可以用到索引中的 c 字段 — 使用 use index condition 。问题的关键在于有能力使用到,但不是一定会用到,use index condition 是一种优化的方法,在大部分情况下对性能提升没有那么明显。在你的查询语句中,你查询条件中的列和查询的列都是列 a ,b ,c ,那么直接走你建立的索引就好了。如果你对表多添加几个列,比如 d ,e ,f ,索引不变,查询条件保持不变,直接 select * 去查询数据,会发现用不上了 c ,因为直接无法使用索引就返回数据,还需要回表查询一次。
#### MySQL 版本
8.0.26
#### 表结构
CREATE TABLE `foo` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`a` varchar(7) NOT NULL DEFAULT '',
`b` int NOT NULL DEFAULT '1',
`c` varchar(3) NOT NULL DEFAULT '',
`d` int DEFAULT NULL,
`e` int DEFAULT NULL,
`f` int DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `idx_a_b_c` (`a`,`b`,`c`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 ;
CREATE TABLE `bar` (
`id` int NOT NULL AUTO_INCREMENT COMMENT '自增主键',
`a` varchar(7) NOT NULL DEFAULT '',
`b` varchar(10) NOT NULL DEFAULT '',
`c` varchar(3) NOT NULL DEFAULT '',
`d` int DEFAULT NULL,
`e` int DEFAULT NULL,
`f` int DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `idx_a_b_c` (`a`,`b`,`c`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3;
#### 执行语句及执行计划
其中 used_key_parts 代表使用到索引中的哪几个列
root@localhost [zst]>EXPLAIN format=json SELECT * FROM bar where a = 'a1' and b like 'x%' and c = 'cc'\G
*************************** 1. row ***************************
EXPLAIN: {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "0.71"
},
"table": {
"table_name": "bar",
"access_type": "range",
"possible_keys": [
"idx_a_b_c"
],
"key": "idx_a_b_c",
"used_key_parts": [
"a",
"b"
],
"key_length": "66",
"rows_examined_per_scan": 1,
"rows_produced_per_join": 1,
"filtered": "100.00",
"index_condition": "((`zst`.`bar`.`a` = 'a1') and (`zst`.`bar`.`b` like 'x%') and (`zst`.`bar`.`c` = 'cc'))",
"cost_info": {
"read_cost": "0.61",
"eval_cost": "0.10",
"prefix_cost": "0.71",
"data_read_per_join": "88"
},
"used_columns": [
"id",
"a",
"b",
"c",
"d",
"e",
"f"
]
}
}
}
root@localhost [zst]>EXPLAIN format=json SELECT a, b, c FROM bar where a = 'a1' and b like 'x%' and c = 'cc'\G
*************************** 1. row ***************************
EXPLAIN: {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "0.35"
},
"table": {
"table_name": "bar",
"access_type": "index",
"possible_keys": [
"idx_a_b_c"
],
"key": "idx_a_b_c",
"used_key_parts": [
"a",
"b",
"c"
],
"key_length": "66",
"rows_examined_per_scan": 1,
"rows_produced_per_join": 1,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "0.25",
"eval_cost": "0.10",
"prefix_cost": "0.35",
"data_read_per_join": "88"
},
"used_columns": [
"a",
"b",
"c"
],
"attached_condition": "((`zst`.`bar`.`a` = 'a1') and (`zst`.`bar`.`b` like 'x%') and (`zst`.`bar`.`c` = 'cc'))"
}
}
}