Elasticsearch Query DSL详解
约 1770 字大约 6 分钟
elasticsearchquery-dsl
2025-05-16
Elasticsearch Query DSL(Domain Specific Language)是基于 JSON 的查询语言,提供了丰富的全文搜索、精确匹配、复合查询和聚合分析能力。本文系统地梳理各类查询和聚合的使用方式。
查询分类
全文查询 (Full-text Queries)
全文查询会对查询字符串进行分析(analysis)后再匹配。
match 查询
最常用的全文查询,将查询文本分词后与倒排索引匹配。
// 基本 match
GET /articles/_search
{
"query": {
"match": {
"content": "distributed database"
}
}
}
// 默认 OR:匹配包含 "distributed" 或 "database" 的文档
// 指定 AND 操作符
GET /articles/_search
{
"query": {
"match": {
"content": {
"query": "distributed database",
"operator": "and"
}
}
}
}
// minimum_should_match:至少匹配多少个词
GET /articles/_search
{
"query": {
"match": {
"content": {
"query": "fast distributed scalable database",
"minimum_should_match": "75%"
}
}
}
}
// fuzziness:模糊匹配(容错拼写错误)
GET /articles/_search
{
"query": {
"match": {
"title": {
"query": "databse",
"fuzziness": "AUTO"
}
}
}
}match_phrase 查询
短语匹配,要求词项按顺序出现且位置相邻。
GET /articles/_search
{
"query": {
"match_phrase": {
"content": {
"query": "distributed database",
"slop": 2 // 允许词项之间间隔最多 2 个位置
}
}
}
}multi_match 查询
跨多个字段搜索。
GET /articles/_search
{
"query": {
"multi_match": {
"query": "redis performance",
"fields": ["title^3", "content", "tags^2"],
"type": "best_fields"
}
}
}| type | 说明 | 适用场景 |
|---|---|---|
best_fields | 取最高分字段的分数(默认) | 不同字段竞争,如 title vs content |
most_fields | 各字段分数相加 | 同一内容多种分析方式 |
cross_fields | 跨字段组合匹配 | first_name + last_name |
phrase | 各字段执行 match_phrase | 短语跨字段 |
query_string 查询
支持 Lucene 查询语法,功能最强大但也最复杂。
GET /articles/_search
{
"query": {
"query_string": {
"query": "(redis OR elasticsearch) AND performance NOT deprecated",
"default_field": "content",
"default_operator": "AND"
}
}
}
// 支持的语法
// AND / OR / NOT
// 字段指定: title:redis
// 通配符: da?a*ase
// 范围: date:[2025-01-01 TO 2025-12-31]
// 模糊: database~2
// 权重: redis^3精确查询 (Term-level Queries)
精确查询不会对查询文本进行分析,直接与倒排索引中的词项匹配。
term 查询
// 精确匹配(用于 keyword/数值/日期字段)
GET /users/_search
{
"query": {
"term": {
"status": {
"value": "active"
}
}
}
}
// 注意:不要对 text 字段使用 term 查询!
// text 字段经过分析器处理,存储的是分词后的小写词项
// term 查询不会分析查询文本,"Active" 无法匹配存储的 "active"terms 查询
// 匹配多个值中的任意一个
GET /products/_search
{
"query": {
"terms": {
"category": ["electronics", "books", "clothing"]
}
}
}range 查询
GET /orders/_search
{
"query": {
"range": {
"created_at": {
"gte": "2025-01-01",
"lt": "2025-06-01",
"format": "yyyy-MM-dd"
}
}
}
}
// 数值范围
GET /products/_search
{
"query": {
"range": {
"price": {
"gte": 100,
"lte": 500
}
}
}
}exists 查询
// 字段存在且非 null
GET /users/_search
{
"query": {
"exists": {
"field": "phone"
}
}
}wildcard 和 prefix 查询
// 通配符查询(性能较差,避免前缀通配)
GET /users/_search
{
"query": {
"wildcard": {
"email.keyword": "*@gmail.com"
}
}
}
// 前缀查询
GET /users/_search
{
"query": {
"prefix": {
"name.keyword": "Ali"
}
}
}复合查询 (Compound Queries)
bool 查询
最常用的复合查询,组合多个查询子句。
GET /articles/_search
{
"query": {
"bool": {
"must": [
{ "match": { "content": "elasticsearch" } }
],
"filter": [
{ "term": { "status": "published" } },
{ "range": { "date": { "gte": "2025-01-01" } } }
],
"should": [
{ "match": { "tags": "tutorial" } },
{ "match": { "tags": "beginner" } }
],
"must_not": [
{ "term": { "language": "deprecated" } }
],
"minimum_should_match": 1
}
}
}filter vs must 的区别:
filter不计算相关性分数,可以被缓存,性能更好must计算相关性分数,影响文档排序- 对于精确过滤条件(状态、日期范围等),优先使用
filter
dis_max 查询
取多个查询中的最高分(而非 bool 的分数相加)。
GET /articles/_search
{
"query": {
"dis_max": {
"queries": [
{ "match": { "title": "elasticsearch guide" } },
{ "match": { "content": "elasticsearch guide" } }
],
"tie_breaker": 0.3 // 非最高分查询的分数 × tie_breaker 加入总分
}
}
}function_score 查询
自定义评分函数,精细控制文档排序。
GET /articles/_search
{
"query": {
"function_score": {
"query": { "match": { "content": "elasticsearch" } },
"functions": [
{
"filter": { "term": { "featured": true } },
"weight": 10
},
{
"field_value_factor": {
"field": "likes",
"factor": 1.2,
"modifier": "log1p",
"missing": 1
}
},
{
"gauss": {
"date": {
"origin": "2025-05-16",
"scale": "30d",
"decay": 0.5
}
}
}
],
"score_mode": "sum",
"boost_mode": "multiply"
}
}
}评分机制 (BM25)
Elasticsearch 默认使用 BM25 算法计算相关性分数。
// 查看评分细节
GET /articles/_search
{
"explain": true,
"query": {
"match": { "content": "elasticsearch" }
}
}
// BM25 参数调整
{
"settings": {
"index": {
"similarity": {
"custom_bm25": {
"type": "BM25",
"k1": 1.2, // 控制 TF 饱和度 (默认 1.2)
"b": 0.75 // 控制文档长度归一化 (默认 0.75)
}
}
}
}
}聚合 (Aggregations)
指标聚合 (Metric Aggregations)
GET /orders/_search
{
"size": 0,
"aggs": {
"avg_amount": { "avg": { "field": "amount" } },
"max_amount": { "max": { "field": "amount" } },
"min_amount": { "min": { "field": "amount" } },
"total_amount": { "sum": { "field": "amount" } },
"order_count": { "value_count": { "field": "id" } },
"unique_users": { "cardinality": { "field": "user_id" } },
"amount_stats": { "stats": { "field": "amount" } },
"amount_percentiles": {
"percentiles": {
"field": "amount",
"percents": [50, 90, 95, 99]
}
}
}
}桶聚合 (Bucket Aggregations)
// terms 聚合(按字段值分桶)
GET /orders/_search
{
"size": 0,
"aggs": {
"by_status": {
"terms": {
"field": "status",
"size": 10,
"order": { "_count": "desc" }
},
"aggs": {
"avg_amount": { "avg": { "field": "amount" } }
}
}
}
}
// date_histogram 聚合(按时间分桶)
GET /orders/_search
{
"size": 0,
"aggs": {
"orders_by_month": {
"date_histogram": {
"field": "created_at",
"calendar_interval": "month",
"format": "yyyy-MM"
},
"aggs": {
"total_amount": { "sum": { "field": "amount" } }
}
}
}
}
// range 聚合
GET /products/_search
{
"size": 0,
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{ "to": 100 },
{ "from": 100, "to": 500 },
{ "from": 500 }
]
}
}
}
}管道聚合 (Pipeline Aggregations)
// 基于其他聚合结果的二次计算
GET /orders/_search
{
"size": 0,
"aggs": {
"monthly_sales": {
"date_histogram": {
"field": "created_at",
"calendar_interval": "month"
},
"aggs": {
"total": { "sum": { "field": "amount" } }
}
},
"max_monthly": {
"max_bucket": {
"buckets_path": "monthly_sales>total"
}
},
"avg_monthly": {
"avg_bucket": {
"buckets_path": "monthly_sales>total"
}
},
"moving_avg": {
"moving_avg": {
"buckets_path": "monthly_sales>total",
"window": 3
}
}
}
}分页方式
search_after 深分页
// 第一页
GET /articles/_search
{
"size": 10,
"sort": [
{ "date": "desc" },
{ "_id": "asc" }
],
"query": { "match_all": {} }
}
// 后续页:使用上一页最后一条记录的 sort 值
GET /articles/_search
{
"size": 10,
"sort": [
{ "date": "desc" },
{ "_id": "asc" }
],
"query": { "match_all": {} },
"search_after": ["2025-05-10T10:00:00Z", "doc_id_123"]
}Point in Time (PIT)
// 创建 PIT
POST /articles/_pit?keep_alive=5m
// 使用 PIT + search_after
GET /_search
{
"size": 10,
"query": { "match_all": {} },
"pit": {
"id": "pit_id_here",
"keep_alive": "5m"
},
"sort": [
{ "date": "desc" },
{ "_shard_doc": "asc" }
],
"search_after": [...]
}查询优化建议
| 优化点 | 说明 |
|---|---|
| 过滤用 filter | 不需要评分的条件放 filter,可缓存 |
| 避免 wildcard 前缀 | *abc 性能差,使用 ngram 替代 |
| 合理设置 size | 默认返回 10 条,按需调整 |
| 使用 source filtering | _source: ["field1", "field2"] 减少传输 |
| 异步搜索 | 大查询使用 _async_search |
| Profile API | "profile": true 分析查询性能瓶颈 |
总结
- 全文查询用于搜索文本字段,会经过分析器处理
- 精确查询用于 keyword/数值/日期字段,不经过分析器
- bool 查询是组合查询的核心,filter 子句不参与评分且可缓存
- BM25 是默认评分算法,考虑词频、逆文档频率和文档长度
- 聚合提供强大的分析能力,支持指标、桶和管道三种类型
- 深分页推荐使用 search_after + PIT 方式
贡献者
更新日志
2026/3/14 13:09
查看所有更新日志
9f6c2-feat: organize wiki content and refresh site setup于