Average of difference between the dates - java

A snippet of my elasticsearch data is like below. Status field is nested.
status: [
{
"updated_at": "2020-08-04 17:18:41",
"created_at": "2020-08-04 17:18:39",
"sub_stage": "Stage1"
},
{
"updated_at": "2020-08-04 17:21:15",
"created_at": "2020-08-04 17:18:41",
"sub_stage": "Stage2"
},
{
"updated_at": "2020-08-04 17:21:15",
"created_at": "2020-08-04 17:21:07",
"sub_stage": "Stage3"
}
]
After aggregating based on some field, I have for each bucket some documents and every document will have status field. Now, what I want is to find the average of time difference between stage1 and stage3.
For ex: Suppose for id = 1 bucket consists of 100 documents. Then for each document I have to find the time difference between stage 1 and stage 3. Then, finally take the average of it.
I am able to perform till aggregation but stuck at finding average.
With some effort, I am using below script but have no idea whether it is correct :
Map findEvent(List events, String type) {
return events.find(it -> it.sub_stage == type);
}
return ChronoUnit.DAYS.between(Instant.parse(findEvent(params._source.events, 'Stage1').timestamp), Instant.parse(findEvent(params._source.events, 'Stage3').timestamp););
Is there any way I can perform this in Java with this script or any other script ?
Roughly, Query looks like:
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"query": {
"bool": {
"should": [
{
"match": {
"status.sub_stage": {
"query": "Stage1",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1.0
}
},
"path": "status",
"ignore_unmapped": false,
"score_mode": "none",
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1.0
}
},
"aggregations": {
"id": {
"terms": {
"field": "id.keyword",
"size": 1000,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"avg time": {
"avg": {
"script": {
"source": "Map findStage(List events, String type) { return events.find(it -> it.sub_stage == type); } return ChronoUnit.DAYS.between(Instant.parse(findStage(ctx._source.status, 'Stage1').timestamp), Instant.parse(findStage(ctx._source.status, 'Stage3').timestamp));",
"lang": "painless"
}
}
}
}
}
}
}

Related

MongoDB Autocomplete index doesn't get result

I have a collection which name called 'airport' and i have Atlas Auto Complete index you can see JSON config below.
{
"mappings": {
"dynamic": false,
"fields": {
"name": [
{
"type": "string"
},
{
"foldDiacritics": false,
"maxGrams": 7,
"minGrams": 2,
"type": "autocomplete"
}
]
}
}
}
and this is my Document record
{
"_id": {
"$oid": "63de588c7154cc3ee5cbabb2"
},
"name": "Antalya Airport",
"code": "AYT",
"country": "TR",
"createdDate": {
"$date": {
"$numberLong": "1675516044323"
}
},
"updatedDate": {
"$date": {
"$numberLong": "1675516044323"
}
},
"updatedBy": "VISITOR",
"createdBy": "VISITOR",
}
And This is my MongoDB Query
public List<Document> autoCompleteAirports(AutoCompleteRequest autoCompleteRequest) {
return database.getCollection(AIRPORT).aggregate(
Arrays.asList(new Document("$search",
new Document("index", "airportAutoCompleteIndex")
.append("text",
new Document("query", autoCompleteRequest.getKeyword())
.append("path", "name")
)))
).into(new ArrayList<>());
}
So, when i type "antalya" or "Antalya", this works. But when i type "Antaly" or "antal" there is no result.
Any solution ?
i tried change min and max grams settings on index

ElasticSearch sorting isn't sorting by field

I'm trying to perform a field sort on the specified field but to no avail. The query keeps returning the same position when I run the script.
Here is the ElasticSearch script:
{
"from": 0,
"size": 10,
"timeout": "60s",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"query_string": {
"query": "random",
"fields": [],
"type": "best_fields",
"default_operator": "or",
"max_determinized_states": 10000,
"enable_position_increments": true,
"fuzziness": "AUTO",
"fuzzy_prefix_length": 0,
"fuzzy_max_expansions": 50,
"phrase_slop": 0,
"escape": false,
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1
}
},
{
"nested": {
"query": {
"bool": {
"must": [
{
"match": {
"reviews.source": {
"query": "TEST",
"operator": "AND",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"path": "reviews",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1,
"inner_hits": {
"name": "reviews",
"ignore_unmapped": false,
"from": 0,
"size": 3,
"version": false,
"seq_no_primary_term": false,
"explain": false,
"track_scores": false
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"should": [
{
"match": {
"dataset": {
"query": "QUERY_TEST",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "1",
"boost": 1
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"reviews.openedAt": {
"order": "desc",
"nested": {
"path": "reviews"
}
}
}
]
}
The mapping I'm currently using:
"reviews": {
"type": "nested",
"properties": {
"id": {
"type": "keyword",
"copy_to": "fulltext"
},
"updatedAt": {
"type": "date",
"format": "strict_date_time",
"index": false
},
"openedAt": {
"type": "date",
"format": "strict_date_time"
}
I'm trying to sort the records based on a specific date in the reviews section. If a user inputs ASC, the returning values (reviews) should be in ascending order based on the openedAt date. I believe the sorting function isn't necessarily hitting the appropriate path. What should the sorting function look like?
I have a Java API that I created that calls the request and creates its own set of records:
public SearchResponse(SearchResponse response, SearchRequest searchRequest) {
this.facets = new ArrayList<>();
if (searchRequest == null || searchRequest.getRestricted().isEmpty()) {
this.records =
Stream.of(response.getHits().getHits()).map(SearchHit::getSourceAsMap).collect(Collectors.toList());
} else {
this.records = processRestrictedResults(response, searchRequest);
}
if (response.getAggregations() != null) {
for (Map.Entry<String, Aggregation> entry : response.getAggregations().getAsMap().entrySet()) {
this.facets.add(Facet.create(entry));
}
}
this.totalRecords = getTotalMatched(response);
}
To answer the original question, the top-level hits are indeed being sorted by the latest reviews.openedAt in the descending order — one of the reviews from doc#2 has the value 2021-04-06T08:13:53.552Z which is greater than the only reviews.openedAt from doc#1 (2021-03-30T08:13:53.552Z), thus #2 comes before #1.
What you're missing, though, is sorted inner_hits, as I explained here and here.
In your particular use case this would mean:
{
"from": 0,
"size": 10,
"timeout": "60s",
"query": {
"bool": {
"must": [
... // your original queries
{
"nested": {
"path": "reviews", <-- we need to enforce the nested context
"query": {
"match_all": {} <-- this could've been `"exists": { "field": "reviews.openedAt" }` too
},
"inner_hits": {
"sort": {
"reviews.openedAt": { <-- sorting the inner hits under the nested context
"order": "desc"
}
}
}
}
}
]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"reviews.openedAt": { <-- sorting the top-level hits, as you previously were
"order": "desc",
"nested": {
"path": "reviews"
}
}
}
]
}
When you run the above query, each top-level hit will include an inner_hits attribute containing the sorted reviews which you can then post-process in your java backend.

How to generate elastic search nested aggregations in java?

I have the below aggregation query which i need to translate into java
Below aggregation query needs to be translated into java using elastic search client RestHighLevelCleint
not sure i tried multiple times but the java code is not able to translate as per the below query.
{
"aggs": {
"recommendations": {
"nested": {
"path": "events.recommendationData"
},
"aggs": {
"exception": {
"filter": {
"terms": {
"events.recommendationData.exceptionId": [
"2"
]
}
},
"aggs": {
"exceptionIds": {
"terms": {
"field": "events.recommendationData.exceptionId.keyword",
"size": 10
},
"aggs": {
"recommendations": {
"nested": {
"path": "events.recommendationData.recommendations"
},
"aggs": {
"recommendationType": {
"terms": {
"field": "events.recommendationData.recommendations.recommendationType",
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
using the below code with RestHighLevelClient
AggregationBuilder recommendations =
AggregationBuilders.nested("recommendations", "events.recommendationData");
AggregationBuilder exception = AggregationBuilders
.filter("exception", QueryBuilders.termsQuery("events.recommendationData.exceptionId", "2"));
AggregationBuilder exceptionIds = AggregationBuilders.terms("exceptionIds")
.field("events.recommendationData.exceptionId.keyword").size(10);
AggregationBuilder recommendations2 =
AggregationBuilders.nested("recommendations", "events.recommendationData.recommendations");
AggregationBuilder recommendationType = AggregationBuilders.terms("recommendationType")
.field("events.recommendationData.recommendations.recommendationType").size(10);
AggregationBuilder build =
recommendations
.subAggregation(exception)
.subAggregation(exceptionIds)
.subAggregation(recommendations2)
.subAggregation(recommendationType);
and it is producing the wrong query as i posted below which is not working.
{
"aggregations": {
"recommendations": {
"nested": {
"path": "events.recommendationData"
},
"aggregations": {
"exception": {
"filter": {
"terms": {
"events.recommendationData.exceptionId": [
"1",
"2"
],
"boost": 1
}
}
},
"exceptionIds": {
"terms": {
"field": "events.recommendationData.exceptionId.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"recommendations": {
"nested": {
"path": "events.recommendationData.recommendations"
}
},
"recommendationType": {
"terms": {
"field": "events.recommendationData.recommendations.recommendationType",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
}
}
}
}
}
Expected: Every aggregation is a sub aggregation to the previous.
Therefore, if you see the expected query, recommendationType is sub-aggregation of recommendations2. These together are sub-aggregation to exceptionIds and so on. Therefore only one line needs to change here, which is instead of
AggregationBuilder build =
recommendations
.subAggregation(exception)
.subAggregation(exceptionIds)
.subAggregation(recommendations2)
.subAggregation(recommendationType);
use this,
recommendations.subAggregation(
exception.subAggregation(
exceptionIds.subAggregation(
recommendations2.subAggregation(recommendationType)
)
)
);

"match or null" query in elasticsearch

Suppose I have an index with all movies released from 2010 to 2019;
How can I translate this query in SQL to ElasticSearch?
Select *
From movies
Where
releaseDate between '2018-01-01' and '2019-01-01' and
gender is like 'action' and
(mainActorId = 42 or mainActorId is null)
I want all the action movies from 2018 with an specific main actor or no main actor at all. How would I translate that to an ElasticSearch query?
From what I've read so far in the documentation, I could use something like this:
{
"size":0,
"query":{
"bool":{
"must":[
{
"range":{
"releaseDate":{
"from":"2018-01-01T00:00:01.531Z",
"to":"2019-01-01T00:00:01.531Z",
"include_lower":true,
"include_upper":true,
"boost":1.0
}
}
},
{
"terms":{
"gender":[
"action"
],
"boost":1.0
}
},
{
"terms":{
"mainActorId":[
42,
56
],
"boost":1.0
}
}
],
"must_not":[
{
"exists":{
"field":"mainActorId",
"boost":1.0
}
}
],
"adjust_pure_negative":true,
"boost":1.0
}
}
}
But that's not giving me any hits, even though there are action movies released in 2018 with the main actor I want (or no main actors at all). If I take away the "must_not exist" clause, the query works fine and gives me the action movies with the main actors I want, but I also want action movies with no main actors...
Great start so far!! You're almost there, see the query below, it should do just what you expect:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"range": {
"releaseDate": {
"from": "2018-01-01T00:00:01.531Z",
"to": "2019-01-01T00:00:01.531Z",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
},
{
"terms": {
"gender": [
"action"
],
"boost": 1
}
}
],
"should": [
{
"bool": {
"must_not": [
{
"exists": {
"field": "mainActorId"
}
}
]
}
},
{
"terms": {
"mainActorId": [
42,
56
]
}
}
],
"minimum_should_match": 1,
"adjust_pure_negative": true,
"boost": 1
}
}
}

Conditionally group and sum of the elements from an array

I want to group by APPName and I want find how many PrestoBarImpression, PrestoKeyCountChange, PrestoTileImpression for every application for a particular day (just the sum of order counts).
This is so I can generate a report with this information. I need how many order counts of PrestoTileImpression, how many order counts of PrestoBarImpression, how many order counts of PrestoTileClick for every application.
The below is my Document.
{
"ClientId": "XYZ123",
"location": {
"Name": "Hyderabad",
"Country": "India",
"Zip": "500084",
"Gps": {
"lat": "17.463607",
"lon": "78.344279"
}
},
"Network": {
"Operator": "Airtel",
"Type": "wifi",
"TowerID": "123",
"IP": "1.1.1.1"
},
"SessionTimeStamp": {
"Start": ISODate("2015-06-02T05:36:49.045 Z"),
"End": ISODate("2015-06-02T05:36:56.045 Z"),
"Duration": "7000"
},
"AppName": "WhatsApp",
"Text": "Key1 Key2 Key3 Key4",
"Actions": [{
"Type": "PrestoBarImpression",
"CampaignId": 1,
"keyword": "key1",
"prestoCount": 1,
"duration": 100,
"OrderCount": 1
}, {
"Type": "PrestoKeyCountChange",
"CampaignId": 1,
"keyword": "key1",
"prestoCount": 1,
"OrderCount": 2
}, {
"Type": "PrestoBarImpression",
"CampaignId": 2,
"keyword": "key2",
"prestoCount": 2,
"duration": 150,
"OrderCount": 3
}, {
"Type": "PrestoKeyCountChange",
"CampaignId": "2",
"keyword": "key2",
"prestoCount": 2,
"OrderCount": 4
}, {
"Type": "PrestoBarImpression",
"CampaignId": 1,
"keyword": "key3",
"prestoCount": 2,
"duration": 200,
"OrderCount": 5
}, {
"Type": "PrestoTileImpression",
"CampaignId": 1,
"duration": 200,
"OrderCount": 6
}, {
"Type": "PrestoTileImpression",
"AdditionalAction": "swipeRight",
"CampaignId": 2,
"duration": 200,
"OrderCount": 7
}, {
"Type": "PrestoTileClick",
"AdditionalAction": "swipeRight",
"CampaignId": 2,
"OrderCount": 8
}, {
"Type": "PrestoBarImpression",
"CampaignId": 2,
"keyword": "key4",
"prestoCount": 2,
"duration": 150,
"OrderCount": 9
}]
}
I got the below output by using #Viswas response I made a query.
Query
[
{
"$match":{
"SessionTimeStamp.Start":{
"$gte": ISODate("2015-06-01T18:30:00.000 Z"),
"$lte": ISODate("2015-06-04T18:29:59.000 Z")
}
}
},
{
"$unwind":"$Actions"
},
{
"$match":{
"Actions.Type":{
"$in":[
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]
}
}
},
{
"$group":{
"_id":{
"AppName":"$AppName",
"type":"$Actions.Type"
},
"total":{
"$sum":"$Actions.OrderCount"
}
}
},
{
"$sort":{
"total":1,
}
}
]
Output
{
"result":[
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoKeyCountChange"
},
"total":6
},
{
"_id":{
"AppName":"hike",
"type":"PrestoKeyCountChange"
},
"total":6
},
{
"_id":{
"AppName":"hike",
"type":"PrestoTileImpression"
},
"total":13
},
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoTileImpression"
},
"total":13
},
{
"_id":{
"AppName":"hike",
"type":"PrestoBarImpression"
},
"total":18
},
{
"_id":{
"AppName":"WhatsApp",
"type":"PrestoBarImpression"
},
"total":18
}
],
"ok":1.0000000000000000
}
I need the output in below format
[
{
"AppName":"WhatsApp",
" PrestoTileImpression":13,
"PrestoKeyCountChange":6,
"PrestoBarImpression":18,
"count":"10 (This is how many times thee Application presents in document, because I need to find top 10 apps Need to sort the output by this count)"
},
{
"AppName":"Hike",
" PrestoTileImpression":13,
"PrestoKeyCountChange":6,
"PrestoBarImpression":18,
"count":"10 "
}
]
It's really all about filtering the array content to get just the items you want in the sum:
db.collection.aggregate([
// Filter documents with matching entries first
{ "$match": {
"Actions.Type": { "$in": [
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]}
}},
// Unwind the array entries
{ "$unwind": "$Actions" },
// Filter to only keep desired array entries
{ "$match": {
"Actions.Type": { "$in": [
"PrestoBarImpression",
"PrestoKeyCountChange",
"PrestoTileImpression"
]}
}},
// Group by AppName and current day (finishing)
{ "$group": {
"_id": {
"AppName": "$AppName",
"day": {
"year": { "$year": "$SessionTimeStamp.End" },
"month": { "$month": "$SessionTimeStamp.End" },
"day": { "$dayOfMonth": "$SessionTimeStamp.End" }
},
"type": "$Actions.Type"
},
"total": { "$sum": "$Actions.OrderCount" }
}},
// Sort as however you require
{ "$sort": {
"_id.AppName": 1,
"_id.day": 1,
"_id.type": 1,
"total": -1
}}
])
Or if you want all those fields per document then right after the existing group add:
{ "$group": {
"_id": {
"AppName": "$_id.AppName",
"day": "$_id.day",
},
"PrestoBarImpression": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoBarImpression" ] },
"$total",
0
]
}},
"PrestoKeyCountChange": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoKeyCountChange" ] },
"$total",
0
]
}},
"PrestoTileImpression": { "$sum": {
"$cond": [
{ "$eq": [ "$_id.type", "PrestoTileImpression" ] },
"$total",
0
]
}}
}}
Which tallies the totals per field into single documents for "AppName" and "day".
You probably want to add a "date range" match to that first $match pipeline rather than add up everything in the collection and just do so between dates as well.
You should use aggregation to get result.
If you want ActionType wise OrderCount for given date (particular) date then you need to first match start to your date and then group data according to Action.Type.
The query will be as following:
db.collection.aggregate({
$match: {
"SessionTimeStamp.Start": ISODate("2015-06-02T05:36:49.045Z")
}
}, {
$group: {
"_id": "AppName",
"Document": {
$push: {
"SessionTimeStamp": "$SessionTimeStamp",
"Actions": "$Actions",
"AppName": "$AppName"
}
}
}
}, {
$unwind: "$Document"
}, {
$unwind: "$Document.Actions"
}, {
$group: {
_id: "$Document.Actions.Type",
"OrderCount": {
$sum: "$Document.Actions.OrderCount"
},
"App": {
$first: "$Document.AppName"
}
}
}, {
$project: {
"_id": 0,
"OrderCount": 1,
"ActionType": "$_id",
"App": 1
}
})
Edit after comment of question author:
Reference to Duplicate Question by author
Please verify spelling of count for appNames as it is different (count, Count) for some appNames
You should use following query-
db.collection.aggregate({
$match: {
"SessionTimeStamp.Start": {
$gte: ISODate("2015-06-02T05:36:49.045Z")
},
"SessionTimeStamp.End": {
$lte: ISODate("2015-06-02T05:36:56.045Z")
}
}
}, {
$unwind: "$Actions"
}, {
$group: {
"_id": {
"AppName": "$AppName",
"Type": "$Actions.Type"
},
"count": {
"$sum": "$Actions.Count"
},
"appCount": {
$sum: 1
}
}
}, {
$project: {
"AppName": "$_id.AppName",
"Type": "$_id.Type",
"count": 1,
"appCount": 1,
"_id": 0
}
})
If you still want to assign dynamic values as keys then you can iterate over the cursor you get like -
db.collection.aggregate({$match:{"SessionTimeStamp.Start":{$gte:ISODate("2015-06-02T05:36:49.045Z")},
"SessionTimeStamp.End":{$lte:ISODate("2015-06-02T05:36:56.045Z")}}},
{$unwind:"$Actions"},{$group:{"_id":{"AppName":"$AppName","Type":"$Actions.Type"},
"count":{"$sum":"$Actions.Count"},"appCount":{$sum:1}}},
{$project:{"AppName":"$_id.AppName","Type":"$_id.Type","count":1,
"appCount":1,"_id":0}}).forEach( function(myDoc){ var feeType = {};
feeType["AppName"] = myDoc.AppName; feeType[myDoc.Type]= myDoc.count;
feeType["appCount"] = myDoc.appCount; printjson (feeType);})

Categories

Resources