聚合(Aggregations)是Elasticsearch提供的强大数据分析功能,可以对搜索结果进行统计、分组、计算等操作。聚合分为三大类:指标聚合、桶聚合和管道聚合。
| 类型 | 说明 | 示例 |
|---|---|---|
| 指标聚合(Metric) | 计算数值指标,如求和、平均值、最大值 | 计算商品平均价格 |
| 桶聚合(Bucket) | 将文档分组到不同的桶中 | 按品牌分组商品 |
| 管道聚合(Pipeline) | 对其他聚合的结果进行二次聚合 | 计算每月销售额的移动平均 |
# 求平均值
GET /products/_search
{
"size": 0,
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
# 求和
GET /products/_search
{
"size": 0,
"aggs": {
"total_sales": {
"sum": {
"field": "sales"
}
}
}
}
# 最大值和最小值
GET /products/_search
{
"size": 0,
"aggs": {
"max_price": {
"max": {
"field": "price"
}
},
"min_price": {
"min": {
"field": "price"
}
}
}
}
# 统计聚合(一次性获取多个指标)
GET /products/_search
{
"size": 0,
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
}
}
}
# 响应:
{
"aggregations": {
"price_stats": {
"count": 100,
"min": 999.0,
"max": 19999.0,
"avg": 5499.5,
"sum": 549950.0
}
}
}
# 扩展统计(包含方差、标准差等)
GET /products/_search
{
"size": 0,
"aggs": {
"price_extended_stats": {
"extended_stats": {
"field": "price"
}
}
}
}
# 响应包含:
# count, min, max, avg, sum
# sum_of_squares, variance, std_deviation
# std_deviation_bounds (upper/lower)
# 计算价格的百分位数
GET /products/_search
{
"size": 0,
"aggs": {
"price_percentiles": {
"percentiles": {
"field": "price",
"percents": [25, 50, 75, 95, 99]
}
}
}
}
# 响应:
{
"aggregations": {
"price_percentiles": {
"values": {
"25.0": 2999.0,
"50.0": 5499.0,
"75.0": 8999.0,
"95.0": 15999.0,
"99.0": 19999.0
}
}
}
}
# 统计不同品牌的数量
GET /products/_search
{
"size": 0,
"aggs": {
"unique_brands": {
"cardinality": {
"field": "brand.keyword"
}
}
}
}
# 统计不同用户数量
GET /orders/_search
{
"size": 0,
"aggs": {
"unique_users": {
"cardinality": {
"field": "user_id",
"precision_threshold": 10000
}
}
}
}
# 按品牌分组统计
GET /products/_search
{
"size": 0,
"aggs": {
"brands": {
"terms": {
"field": "brand.keyword",
"size": 10
}
}
}
}
# 响应:
{
"aggregations": {
"brands": {
"buckets": [
{
"key": "Apple",
"doc_count": 45
},
{
"key": "Samsung",
"doc_count": 32
}
]
}
}
}
# 按品牌分组并计算平均价格
GET /products/_search
{
"size": 0,
"aggs": {
"brands": {
"terms": {
"field": "brand.keyword",
"size": 10,
"order": {
"avg_price": "desc"
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
# 按价格区间分组
GET /products/_search
{
"size": 0,
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{ "to": 2000 },
{ "from": 2000, "to": 5000 },
{ "from": 5000, "to": 10000 },
{ "from": 10000 }
]
}
}
}
}
# 按日期分组统计
GET /orders/_search
{
"size": 0,
"aggs": {
"sales_over_time": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "day",
"format": "yyyy-MM-dd"
},
"aggs": {
"total_amount": {
"sum": {
"field": "amount"
}
}
}
}
}
}
# 按月统计
GET /orders/_search
{
"size": 0,
"aggs": {
"monthly_sales": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "month"
},
"aggs": {
"revenue": {
"sum": {
"field": "amount"
}
}
}
}
}
}
# 按品牌分组,再按价格区间分组
GET /products/_search
{
"size": 0,
"aggs": {
"brands": {
"terms": {
"field": "brand.keyword"
},
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{ "to": 3000 },
{ "from": 3000, "to": 8000 },
{ "from": 8000 }
]
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
# 综合销售分析
GET /orders/_search
{
"size": 0,
"aggs": {
"daily_stats": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "day"
},
"aggs": {
"total_revenue": {
"sum": {
"field": "amount"
}
},
"order_count": {
"value_count": {
"field": "_id"
}
},
"avg_order_value": {
"avg": {
"field": "amount"
}
}
}
}
}
}
1. 指标聚合练习
2. Terms聚合练习
3. Range聚合练习
4. Date Histogram练习
5. 嵌套聚合练习
6. 实战场景
7. 高级聚合
8. 性能优化
Q: 聚合和SQL的GROUP BY有什么区别?
A: 聚合功能更强大,支持嵌套、管道聚合等复杂操作。聚合可以在搜索的同时进行,而且支持近实时分析。但聚合不支持JOIN操作。
Q: 为什么terms聚合结果不准确?
A: terms聚合默认只返回前10个桶,且在分布式环境下是近似结果。可以增加size参数和shard_size参数提高准确性,但会影响性能。
Q: 如何优化聚合性能?
A: 1) 使用filter减少聚合的文档数;2) 避免对text字段聚合;3) 合理设置size参数;4) 使用doc_values;5) 考虑使用预聚合或rollup功能。