Elastic Search Should clause - java

I'm trying to fetch users from ES based on the status of some of the fields.
I have 5 fields whose status I want to check and if any of these fields have the failed status I want to fetch that record. Since it's an OR condition between these 5 fields I was trying to use should in ES and adding terms to it. But it returns records of those users who don't match the criteria as well.
{
"from": 0,
"size": 50,
"query": {
"bool": {
"must": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"range": {
"segment_status.updated_at": {
"from": "2021-01-24",
"to": null,
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"should": [
{
"terms": {
"segment_status.bse_status": [
2,
3,
4
],
"boost": 1
}
},
{
"terms": {
"segment_status.nse_status": [
2,
3
],
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"path": "segment_status",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
}
],
"must_not": [
{
"term": {
"marked_failed_manually": {
"value": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"sort": [
{
"segment_status.updated_at": {
"order": "asc",
"mode": "min",
"nested_filter": {
"term": {
"segment_status.segment_type": {
"value": "CASH",
"boost": 1
}
}
},
"nested_path": "segment_status"
}
}
]
}
That is the query generated by the code. I'm using spring boot to build the query.

Just for reference, I tried this query and it seems to work.
{
"from": 0,
"size": 50,
"query": {
"bool": {
"must": [
{
"nested":{
"query":{
"bool":{
"must" : [
{
"range" : {
"segment_status.updated_at" : {
"from" : "2021-08-30",
"to" : null,
"include_lower" : true,
"include_upper" : true,
"boost" : 1.0
}
}
}
]
}
},
"path" : "segment_status",
"ignore_unmapped" : false,
"score_mode" : "avg",
"boost" : 1.0
}
},
{
"nested": {
"query": {
"bool": {
"should": [
{
"terms": {
"segment_status.bse_status": [
2,
3
],
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"path": "segment_status",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
}
],
"must_not": [
{
"term": {
"marked_failed_manually": {
"value": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"sort": [
{
"segment_status.updated_at": {
"order": "asc",
"mode": "min",
"nested_filter": {
"term": {
"segment_status.segment_type": {
"value": "CASH",
"boost": 1
}
}
},
"nested_path": "segment_status"
}
}
]
}

Related

Elasticsearch Rest High Level Client aggregate fields dynamically

I am trying to generate query dynamically based on the inputs but in the generated query i can see there are only two aggregations are getting generated how can i make each fields to have the separate aggregations below is the code what i have tried and the response what i'm getting.
From main() i'm calling
buildSearchCriteria("1");
Here i am setting the aggregation type and respective values:
public static void buildSearchCriteria(String... exceptionId) {
SearchCriteria searchCriteria = new SearchCriteria();
Map<String, List<FieldNameAndPath>> stringListMap = new HashMap<>();
stringListMap.put("nested", asList(new FieldNameAndPath("nested", "recommendations",
"recommendations", null, emptyList(), 1)));
stringListMap.put("filter", asList(new FieldNameAndPath("filter", "exceptionIds", "recommendations.exceptionId.keyword",
asList(exceptionId),
asList(new NestedAggsFields("terms", "exceptionIdsMatch")), 2)));
stringListMap.put("terms", asList(new FieldNameAndPath("terms", "by_exceptionId", "recommendations.exceptionId.keyword", null, emptyList(), 3),
new FieldNameAndPath("terms", "by_item", "recommendations.item.keyword", null, emptyList(), 4),
new FieldNameAndPath("terms", "by_destination", "recommendations.location.keyword", null, emptyList(), 5),
new FieldNameAndPath("terms", "by_trans", "recommendations.transportMode.keyword", null, emptyList(), 6),
new FieldNameAndPath("terms", "by_sourcelocation", "recommendations.sourceLocation.keyword", null, emptyList(), 7),
new FieldNameAndPath("terms", "by_shipdate", "recommendations.shipDate", null, emptyList(), 8),
new FieldNameAndPath("terms", "by_arrival", "recommendations.arrivalDate", null, emptyList(), 9)));
stringListMap.put("sum", asList(new FieldNameAndPath("sum", "quantity", "recommendations.transferQuantity", null, emptyList(), 10),
new FieldNameAndPath("sum", "transfercost", "recommendations.transferCost", null, emptyList(), 11),
new FieldNameAndPath("sum", "revenueRecovered", "recommendations.revenueRecovered", null, emptyList(), 12)));
System.out.println(stringListMap);
searchCriteria.setStringListMap(stringListMap);
aggregate(searchCriteria);
}
Below is the aggregate function which will get the the above information and builds query:
public static void aggregate(SearchCriteria searchCriteria) throws IOException {
Map<String, List<FieldNameAndPath>> map = searchCriteria.getStringListMap();
List<FieldNameAndPath> nesteds = map.get("nested");
List<FieldNameAndPath> filter = map.get("filter");
List<FieldNameAndPath> terms = map.get("terms");
List<FieldNameAndPath> sums = map.get("sum");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
AggregationBuilder aggregationBuilder = new SamplerAggregationBuilder("parent");
nesteds.stream().forEach(l -> buildAggregations(l, aggregationBuilder));
filter.stream().forEach(l -> buildAggregations(l, aggregationBuilder));
terms.stream().forEach(l -> buildAggregations(l, aggregationBuilder));
sums.stream().forEach(l -> buildAggregations(l, aggregationBuilder));
SearchRequest searchRequest = new SearchRequest();
searchRequest.indices("index");
searchRequest.types("type");
sourceBuilder.aggregation(aggregationBuilder);
searchRequest.source(sourceBuilder);
System.out.println(searchRequest.source().toString());
}
buildAggregations method:
private static AggregationBuilder buildAggregations(FieldNameAndPath fieldNameAndPath , AggregationBuilder parentAggregationBuilder) {
if(fieldNameAndPath.getAggType().equals("nested")){
parentAggregationBuilder = AggregationBuilders.nested(fieldNameAndPath.getFieldName(), fieldNameAndPath.getFieldPath());
}
if(fieldNameAndPath.getAggType().equals("filter")){
parentAggregationBuilder.subAggregation(AggregationBuilders
.filter(fieldNameAndPath.getFieldName(),
QueryBuilders.termsQuery(fieldNameAndPath.getNestedAggs()
.stream().map(nestedAggsFields -> nestedAggsFields.getFieldName()).findFirst().get(), fieldNameAndPath.getFieldValues())));
}
if(fieldNameAndPath.getAggType().equals("terms")){
parentAggregationBuilder.subAggregation(AggregationBuilders.terms(fieldNameAndPath.getFieldName())
.field(fieldNameAndPath.getFieldPath()));
}
if(fieldNameAndPath.getAggType().equals("sum")){
parentAggregationBuilder.subAggregation(AggregationBuilders.
sum(fieldNameAndPath.getFieldName()).field(fieldNameAndPath.getFieldPath()));
}
return parentAggregationBuilder;
}
SearchCriteria class:
#Data
public class SearchCriteria {
Map<String, List<FieldNameAndPath>> stringListMap;
private List<String> searchFields;
}
And the DTO FieldNameAndPath:
public class FieldNameAndPath{
private String aggType;
private String fieldName;
private String fieldPath;
private List<String> fieldValues;
private List<NestedAggsFields> nestedAggs;
private int order;
}
And the query output from the above code is:
{
"aggregations": {
"parent": {
"sampler": {
"shard_size": 100
},
"aggregations": {
"exceptionIds": {
"filter": {
"terms": {
"exceptionIdsMatch": [
"1"
],
"boost": 1
}
}
},
"by_exceptionId": {
"terms": {
"field": "recommendations.exceptionId.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"by_item": {
"terms": {
"field": "recommendations.item.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"by_destination": {
"terms": {
"field": "recommendations.location.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"by_trans": {
"terms": {
"field": "recommendations.transportMode.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"by_sourcelocation": {
"terms": {
"field": "recommendations.sourceLocation.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"by_shipdate": {
"terms": {
"field": "recommendations.shipDate",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"by_arrival": {
"terms": {
"field": "recommendations.arrivalDate",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"quantity": {
"sum": {
"field": "recommendations.transferQuantity"
}
},
"transfercost": {
"sum": {
"field": "recommendations.transferCost"
}
},
"revenueRecovered": {
"sum": {
"field": "recommendations.revenueRecovered"
}
}
}
}
}
}
Expected Query is:
{
"size": 0,
"aggregations": {
"exceptionIds": {
"nested": {
"path": "recommendations"
},
"aggregations": {
"exceptionIdsMatch": {
"filter": {
"terms": {
"recommendations.exceptionId.keyword": [
"1"
],
"boost": 1
}
},
"aggregations": {
"by_exceptionId": {
"terms": {
"field": "recommendations.exceptionId.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"by_item": {
"terms": {
"field": "recommendations.item.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"by_destination": {
"terms": {
"field": "recommendations.location.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"by_trans": {
"terms": {
"field": "recommendations.transportMode.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"by_sourcelocation": {
"terms": {
"field": "recommendations.sourceLocation.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"by_shipdate": {
"terms": {
"field": "recommendations.shipDate",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"by_arrival": {
"terms": {
"field": "recommendations.arrivalDate",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"quantity": {
"sum": {
"field": "recommendations.transferQuantity"
}
},
"transfercost": {
"sum": {
"field": "recommendations.transferCost"
}
},
"revenueRecovered": {
"sum": {
"field": "recommendations.revenueRecovered"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

How to term query nested json objects/fields in elastic search?

I am doing term aggregation based on field [type] like below but elastic is returning only 1 term count instead of 2 it is not doing nested object aggregation i.e under comments.data.comments[is a list] under this i have 2 type.
{
"aggs": {
"genres": {
"terms": {
"field": "comments.data.comments.type"
}
}
}
}
Gotta utilize the nested field type:
PUT events
{
"mappings": {
"properties": {
"events": {
"type": "nested",
"properties": {
"ecommerceData": {
"type": "nested",
"properties": {
"comments": {
"type": "nested",
"properties": {
"recommendationType": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
POST events/_doc
{
"events": [
{
"eventId": "1",
"ecommerceData": [
{
"comments": [
{
"rank": 1,
"recommendationType": "abc"
},
{
"rank": 1,
"recommendationType": "abc"
}
]
}
]
}
]
}
GET events/_search
{
"size": 0,
"aggs": {
"genres": {
"nested": {
"path": "events.ecommerceData.comments"
},
"aggs": {
"nested_comments_recomms": {
"terms": {
"field": "events.ecommerceData.comments.recommendationType"
}
}
}
}
}
}

What is the purpose of BoolQuery's "filter" in ElasticSearch?

I read the documentation of BoolQuery and according to it, this the purpose,
filter
The clause (query) must appear in matching documents. However unlike
must the score of the query will be ignored. Filter clauses are
executed in filter context, meaning that scoring is ignored and
clauses are considered for caching.
Also from BoolQueryBuilder class:
/**
* Adds a query that <b>must</b> appear in the matching documents but will
* not contribute to scoring. No {#code null} value allowed.
*/
public BoolQueryBuilder filter(QueryBuilder queryBuilder) {
if (queryBuilder == null) {
throw new IllegalArgumentException("inner bool query clause cannot be null");
}
filterClauses.add(queryBuilder);
return this;
}
but I can't get my head around, this. When should I use filter vs (should or must)
Here is the example I am working on :
I want to filter out some records based on the following assumptions :
Fetch All
1) Records where deleted=0 and isPrivate=true
AND
2) Records where (isPrivate=false or [isPrivate=true and
createdBy=loggedInUser])
Here are the 2 queries which give the same result, I want to know what filter query signifies
Result without Filter using just must and should clause.
"query": {
"bool": {
"must": [
{
"term": {
"deleted": {
"value": "0",
"boost": 1
}
}
},
{
"match": {
"isPrivate": {
"query": true
}
}
},
{
"bool": {
"should": [
{
"term": {
"isPrivate": {
"value": "false",
"boost": 1
}
}
},
{
"bool": {
"must": [
{
"term": {
"createdBy": {
"value": "1742991596",
"boost": 1
}
}
},
{
"term": {
"isPrivate": {
"value": "true",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
]
}
}
]
}
},
Query with using filter
"query": {
"bool": {
"adjust_pure_negative": true,
"boost": 1,
"filter": [
{
"bool": {
"must": [
{
"term": {
"deleted": {
"value": "0",
"boost": 1
}
}
},
{
"match": {
"isPrivate": {
"query": true
}
}
}
],
"should": [
{
"term": {
"isPrivate": {
"value": "false",
"boost": 1
}
}
},
{
"bool": {
"must": [
{
"term": {
"createdBy": {
"value": "1742991596",
"boost": 1
}
}
},
{
"term": {
"isPrivate": {
"value": "true",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
]
}
}
In your case, you should definitely use bool/filter since you don't have any constraint that contributes to scoring, all constraints are yes/no matches, and by using filter you can benefit from filter caches (which you don't when using must)
So definitely go with the filter option, but with a slight modification (you don't really need must at all and your boolean logic is not properly translated to bool queries):
{
"query": {
"bool": {
"minimum_should_match": 1,
"filter": [
{
"term": {
"deleted": {
"value": "0",
"boost": 1
}
}
},
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"term": {
"isPrivate": {
"value": "false",
"boost": 1
}
}
},
{
"bool": {
"filter": [
{
"term": {
"createdBy": {
"value": "1742991596",
"boost": 1
}
}
},
{
"term": {
"isPrivate": {
"value": "true",
"boost": 1
}
}
}
]
}
}
]
}
}
]
}
}
}
So to sum up:
should = OR condition
must = AND condition (when scoring is desired)
filter = AND condition (when scoring is not desired and/or when you want to benefit from filter caching)
Bonus: must_not = NOT condition

elasticsearch how to group by repetitive items in array without distinct

I'm trying to get the counts group by the repetitive items in array without distinct, use aggs terms but not work
GET /my_index/_search
{
"size": 0,
"aggs": {
"keywords": {
"terms": {
"field": "keywords"
}
}
}
}
documents like:
"keywords": [
"value1",
"value1",
"value2"
],
but the result is:
"buckets": [
{
"key": "value1",
"doc_count": 1
},
{
"key": "value2",
"doc_count": 1
}
]
how can i get the result like:
"buckets": [
{
"key": "value1",
"doc_count": 2
},
{
"key": "value2",
"doc_count": 1
}
]
finally I modify the mapping use nested:
"keywords": {
"type": "nested",
"properties": {
"count": {
"type": "integer"
},
"keyword": {
"type": "keyword"
}
}
},
and query:
GET /my_index/_search
{
"size": 0,
"aggs": {
"keywords": {
"nested": {
"path": "keywords"
},
"aggs": {
"keyword_name": {
"terms": {
"field": "keywords.keyword"
},
"aggs": {
"sums": {
"sum": {
"field": "keywords.count"
}
}
}
}
}
}
}
}
result:
"buckets": [{
"key": "value1",
"doc_count": 495,
"sums": {
"value": 609
}
},
{
"key": "value2",
"doc_count": 440,
"sums": {
"value": 615
}
},
{
"key": "value3",
"doc_count": 319,
"sums": {
"value": 421
}
},
...]

Filtered query on below mapping

I have created Elastic search mapping as below.
PUT indexcloud
{
"mappings": {
"_default_": {
"_all": {
"enabled": false
},
"_source": {
"compressed": true
},
"properties": {
"term": {
"fields": {
"raw": {
"index": "not_analyzed",
"analyzer": "lowercase_analyzer",
"type": "string"
}
},
"analyzer": "concat_all_alpha",
"type": "string"
},
"relation": {
"type": "nested",
"properties": {
"term": {
"type": "string",
"analyzer": "concat_all_alpha",
"fields": {
"raw": {
"index": "not_analyzed",
"analyzer": "lowercase_analyzer",
"type": "string"
}
}
}
}
}
}
}
},
"settings": {
"index": {
"analysis": {
"analyzer": {
"concat_all_alpha": {
"char_filter": [
"only_alphanum"
],
"filter": [
"lowercase"
],
"tokenizer": "keyword"
},
"uppercase_analyzer": {
"filter": "uppercase",
"tokenizer": "keyword"
},
"lowercase_analyzer": {
"filter": "lowercase",
"tokenizer": "keyword"
}
},
"char_filter": {
"only_alphanum": {
"pattern": "[^A-Z^a-z^0-9]|\\^",
"replacement": "",
"type": "pattern_replace"
}
}
},
"max_result_window": "1000000"
}
}
}
Sample index doc
POST indexcloud/skill
{"term":"Java Language","relation":[{"term":"java8"},{"term":"struct"},{"term":"j2ee"},{"term":"Progamming Language"}]}
I want to search using filtered query as below
GET indexcloud/_search
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"term" : "Java Language"
}
}
}
}
}
But this is not working. How can i achieve this ?. Note : i dont want like below
GET indexcloud/_search
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"term" : "javalanguage"
}
}
}
}
}
Because i want to search, the way i index.

Categories

Resources