elasticsearch how to group by repetitive items in array without distinct - java

I'm trying to get the counts group by the repetitive items in array without distinct, use aggs terms but not work
GET /my_index/_search
{
"size": 0,
"aggs": {
"keywords": {
"terms": {
"field": "keywords"
}
}
}
}
documents like:
"keywords": [
"value1",
"value1",
"value2"
],
but the result is:
"buckets": [
{
"key": "value1",
"doc_count": 1
},
{
"key": "value2",
"doc_count": 1
}
]
how can i get the result like:
"buckets": [
{
"key": "value1",
"doc_count": 2
},
{
"key": "value2",
"doc_count": 1
}
]

finally I modify the mapping use nested:
"keywords": {
"type": "nested",
"properties": {
"count": {
"type": "integer"
},
"keyword": {
"type": "keyword"
}
}
},
and query:
GET /my_index/_search
{
"size": 0,
"aggs": {
"keywords": {
"nested": {
"path": "keywords"
},
"aggs": {
"keyword_name": {
"terms": {
"field": "keywords.keyword"
},
"aggs": {
"sums": {
"sum": {
"field": "keywords.count"
}
}
}
}
}
}
}
}
result:
"buckets": [{
"key": "value1",
"doc_count": 495,
"sums": {
"value": 609
}
},
{
"key": "value2",
"doc_count": 440,
"sums": {
"value": 615
}
},
{
"key": "value3",
"doc_count": 319,
"sums": {
"value": 421
}
},
...]

Related

Elastic Search Should clause

I'm trying to fetch users from ES based on the status of some of the fields.
I have 5 fields whose status I want to check and if any of these fields have the failed status I want to fetch that record. Since it's an OR condition between these 5 fields I was trying to use should in ES and adding terms to it. But it returns records of those users who don't match the criteria as well.
{
"from": 0,
"size": 50,
"query": {
"bool": {
"must": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"range": {
"segment_status.updated_at": {
"from": "2021-01-24",
"to": null,
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"should": [
{
"terms": {
"segment_status.bse_status": [
2,
3,
4
],
"boost": 1
}
},
{
"terms": {
"segment_status.nse_status": [
2,
3
],
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"path": "segment_status",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
}
],
"must_not": [
{
"term": {
"marked_failed_manually": {
"value": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"sort": [
{
"segment_status.updated_at": {
"order": "asc",
"mode": "min",
"nested_filter": {
"term": {
"segment_status.segment_type": {
"value": "CASH",
"boost": 1
}
}
},
"nested_path": "segment_status"
}
}
]
}
That is the query generated by the code. I'm using spring boot to build the query.
Just for reference, I tried this query and it seems to work.
{
"from": 0,
"size": 50,
"query": {
"bool": {
"must": [
{
"nested":{
"query":{
"bool":{
"must" : [
{
"range" : {
"segment_status.updated_at" : {
"from" : "2021-08-30",
"to" : null,
"include_lower" : true,
"include_upper" : true,
"boost" : 1.0
}
}
}
]
}
},
"path" : "segment_status",
"ignore_unmapped" : false,
"score_mode" : "avg",
"boost" : 1.0
}
},
{
"nested": {
"query": {
"bool": {
"should": [
{
"terms": {
"segment_status.bse_status": [
2,
3
],
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"path": "segment_status",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1
}
}
],
"must_not": [
{
"term": {
"marked_failed_manually": {
"value": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"sort": [
{
"segment_status.updated_at": {
"order": "asc",
"mode": "min",
"nested_filter": {
"term": {
"segment_status.segment_type": {
"value": "CASH",
"boost": 1
}
}
},
"nested_path": "segment_status"
}
}
]
}

How to term query nested json objects/fields in elastic search?

I am doing term aggregation based on field [type] like below but elastic is returning only 1 term count instead of 2 it is not doing nested object aggregation i.e under comments.data.comments[is a list] under this i have 2 type.
{
"aggs": {
"genres": {
"terms": {
"field": "comments.data.comments.type"
}
}
}
}
Gotta utilize the nested field type:
PUT events
{
"mappings": {
"properties": {
"events": {
"type": "nested",
"properties": {
"ecommerceData": {
"type": "nested",
"properties": {
"comments": {
"type": "nested",
"properties": {
"recommendationType": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
POST events/_doc
{
"events": [
{
"eventId": "1",
"ecommerceData": [
{
"comments": [
{
"rank": 1,
"recommendationType": "abc"
},
{
"rank": 1,
"recommendationType": "abc"
}
]
}
]
}
]
}
GET events/_search
{
"size": 0,
"aggs": {
"genres": {
"nested": {
"path": "events.ecommerceData.comments"
},
"aggs": {
"nested_comments_recomms": {
"terms": {
"field": "events.ecommerceData.comments.recommendationType"
}
}
}
}
}
}

Java flatten json documents

I am a novice in Java and I am looking for a way to flatten json documents.
I have tried Object mapper but without success and I have also tried to do with json node but still get no success .
I found this link but the results is not what I need :https://github.com/wnameless/json-flattener
I have also been helped before but the example was too specific and I cannot do the same things because the documents is too long this is why I am looking for a way to make it generic: Flatten json documents in Java
I need to transform "any" json documents like in the example below :
Here is an example of my documents
Documents recieved:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 10,
"max_score": 0,
"hits": []
},
"aggregations": {
"groupe": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "a",
"doc_count": 1,
"date": {
"buckets": [
{
"key_as_string": "2017-05-03T00:00:00.000Z",
"key": 1493769600000,
"doc_count": 1,
"value": {
"value": 1
}
},
{
"key_as_string": "2017-05-03T01:00:00.000Z",
"key": 1493776800000,
"doc_count": 1,
"value": {
"value": 3
}
}
]
}
},
{
"key": "b",
"doc_count": 4,
"date": {
"buckets": [
{
"key_as_string": "2017-05-03T00:00:00.000Z",
"key": 1493769600000,
"doc_count": 1,
"value": {
"value": 4
}
},
{
"key_as_string": "2017-05-03T01:00:00.000Z",
"key": 1493773200000,
"doc_count": 1,
"value": {
"value": 3
}
}
]
}
}
]
}
}
}
Document Transformed:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 10,
"max_score": 0,
"hits": []
},
"aggregations": {
"groupe": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "a",
"doc_count": 1,
"date": {
"buckets": [
{
"key_as_string": "2017-05-03T00:00:00.000Z",
"key": 1493769600000,
"doc_count": 1,
"value": {
"value": 1
}
}
]
}
},
{
"key": "a",
"doc_count": 1,
"date": {
"buckets": [
{
"key_as_string": "2017-05-03T02:00:00.000Z",
"key": 1493776800000,
"doc_count": 1,
"value": {
"value": 3
}
}
]
}
},
{
"key": "b",
"doc_count": 1,
"date": {
"buckets": [
{
"key_as_string": "2017-05-03T02:00:00.000Z",
"key": 1493776800000,
"doc_count": 1,
"value": {
"value": 4
}
}
]
}
},
"key": "b",
"doc_count": 1,
"date": {
"buckets": [
{
"key_as_string": "2017-05-03T02:00:00.000Z",
"key": 1493776800000,
"doc_count": 1,
"value": {
"value": 4
}
}
]
}
}
]
}
}
}

Filtered query on below mapping

I have created Elastic search mapping as below.
PUT indexcloud
{
"mappings": {
"_default_": {
"_all": {
"enabled": false
},
"_source": {
"compressed": true
},
"properties": {
"term": {
"fields": {
"raw": {
"index": "not_analyzed",
"analyzer": "lowercase_analyzer",
"type": "string"
}
},
"analyzer": "concat_all_alpha",
"type": "string"
},
"relation": {
"type": "nested",
"properties": {
"term": {
"type": "string",
"analyzer": "concat_all_alpha",
"fields": {
"raw": {
"index": "not_analyzed",
"analyzer": "lowercase_analyzer",
"type": "string"
}
}
}
}
}
}
}
},
"settings": {
"index": {
"analysis": {
"analyzer": {
"concat_all_alpha": {
"char_filter": [
"only_alphanum"
],
"filter": [
"lowercase"
],
"tokenizer": "keyword"
},
"uppercase_analyzer": {
"filter": "uppercase",
"tokenizer": "keyword"
},
"lowercase_analyzer": {
"filter": "lowercase",
"tokenizer": "keyword"
}
},
"char_filter": {
"only_alphanum": {
"pattern": "[^A-Z^a-z^0-9]|\\^",
"replacement": "",
"type": "pattern_replace"
}
}
},
"max_result_window": "1000000"
}
}
}
Sample index doc
POST indexcloud/skill
{"term":"Java Language","relation":[{"term":"java8"},{"term":"struct"},{"term":"j2ee"},{"term":"Progamming Language"}]}
I want to search using filtered query as below
GET indexcloud/_search
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"term" : "Java Language"
}
}
}
}
}
But this is not working. How can i achieve this ?. Note : i dont want like below
GET indexcloud/_search
{
"query" : {
"constant_score" : {
"filter" : {
"term" : {
"term" : "javalanguage"
}
}
}
}
}
Because i want to search, the way i index.

Elasticsearch: Multi-level nested query not working

My mapping is as follows:
{
"mappings": {
"person": {
"properties": {
"lastUpdated": {
"type": "long"
},
"isDeleted": {
"type": "boolean"
},
"person": {
"properties": {
"car": {
"type": "nested",
"properties": {
"model": {
"type": "string"
},
"make": {
"type": "string"
}
}
},
"last_name": {
"type": "string"
},
"first_name": {
"type": "string"
}
}
}
}
}
}
}
I have two documents:
{
"person": {
"first_name": "Bob",
"last_name": "Doe",
"car": [
{
"make": "Saturn",
"model": "Imprezza"
},
{
"make": "Honda",
"model": "Accord"
}
]
},
"isDeleted": false,
"lastUpdated": 1433257051959
}
and
{
"person": {
"first_name": "Zach",
"last_name": "Foobar",
"car": [
{
"make": "Saturn",
"model": "SL"
},
{
"make": "Subaru",
"model": "Imprezza"
}
]
},
"isDeleted": false,
"lastUpdated": 1433257051959
}
I wanted to query the car.make field and so, I wrote the following query:
{
"query": {
"nested": {
"path": "person.person.car",
"query": {
"match": {
"car.make": "Saturn"
}
},
"inner_hits": {}
}
}
}
However, I am not getting anything back results back in return. When I remove the person level object and try to search, then it works. Any idea how to go about doing multi-level nested queries?
EDIT: On the other hand, when I structure my data like this and query then it works.
{
"mappings": {
"person": {
"properties": {
"car": {
"type": "nested",
"properties": {
"model": {
"type": "string"
},
"make": {
"type": "string"
}
}
},
"last_name": {
"type": "string"
},
"first_name": {
"type": "string"
}
}
}
}
}
{
"first_name": "Zach",
"last_name": "Foobar",
"car": [
{
"make": "Saturn",
"model": "SL"
},
{
"make": "Subaru",
"model": "Imprezza"
}
]
}
{
"first_name": "Bob",
"last_name": "Doe",
"car": [
{
"make": "Saturn",
"model": "Imprezza"
},
{
"make": "Honda",
"model": "Accord"
}
]
}
{
"query": {
"nested": {
"path": "person.car",
"query": {
"match": {
"car.make": "Honda"
}
},
"inner_hits": {}
}
}
}
This way the query works. I feel like this has something to do with multi-level nesting. Multi-level nesting is not working.
The nested path attribute needs to be "person.car".
Add "type": "nested", above the (2nd level) person properties line if you wish person to be a nested field type, which is required for Nested Query searches. The default field type is object field.
The naming you are using is confusing, try to rename your mapping not to use person twice.
{
"query": {
"nested": {
"path": "person.car",
"query": {
"match": {
"make": "Saturn"
}
},
"inner_hits": {}
}
}
}

Categories

Resources