How to make a select with grouping in MongoDB - java

I am new to mongo and I try to obtain a group given by the identity document, the equivalent in sql would have the following form:
SELECT fecIni, idc , cic
from db.cllAuditoria
WHERE fecIni BETWEEN '2017-07-01T05:00:00.000Z' AND '2017-07-31T05:00:00.000Z'
group by idc , cic
please friends, this would help me a lot I have researched from different sources without coming up with the solution, I thank you in advance.

Assuming U have collection like:
/* 1 */
{
"_id" : ObjectId("5a58dafe1aadbcd8ae74572e"),
"fecIni" : ISODate("2017-07-30T05:00:00.000Z"),
"idc" : "idc 1",
"cic" : "cic 1"
}
/* 2 */
{
"_id" : ObjectId("5a58dc3a1aadbcd8ae745758"),
"fecIni" : ISODate("2017-07-29T05:00:00.000Z"),
"idc" : "idc 1",
"cic" : "cic 1"
}
/* 3 */
{
"_id" : ObjectId("5a58dc671aadbcd8ae74575e"),
"fecIni" : ISODate("2017-07-28T05:00:00.000Z"),
"idc" : "idc 1",
"cic" : "cic 2"
}
Grouping by idc, cic fields could be performed by query:
db.cllAuditoria.aggregate([
{
$match : {
fecIni: {
$gte: ISODate("2017-07-01T05:00:00.000Z"),
$lte: ISODate("2017-07-31T05:00:00.000Z")
}
}
},
{ $group : { _id : { "idc": "$idc", "cic" : "$cic"} } }
]);
Outcome will looks:
/* 1 */
{
"_id" : {
"idc" : "idc 1",
"cic" : "cic 2"
}
}
/* 2 */
{
"_id" : {
"idc" : "idc 1",
"cic" : "cic 1"
}
}
If U want to count items in group:
db.cllAuditoria.aggregate([
{
$match : {
fecIni: {
$gte: ISODate("2017-07-01T05:00:00.000Z"),
$lte: ISODate("2017-07-31T05:00:00.000Z")
}
}
},
{ $group : {
_id : { "idc": "$idc", "cic" : "$cic"},
count: { $sum: 1 }
} }
]);
Result:
/* 1 */
{
"_id" : {
"idc" : "idc 1",
"cic" : "cic 2"
},
"count" : 1.0
}
/* 2 */
{
"_id" : {
"idc" : "idc 1",
"cic" : "cic 1"
},
"count" : 2.0
}
Hope, it will helps U. If your data differs, please, provide example of collection and what U want to get as a result.

Related

MongoDB Search nested Objects without knowing Key

I have a list of objects that are given somewhat arbitrary Object keys as a result of using the async Java driver + BSON.
My issue is given the fact that jobStatuses are an arbitrary list of Dictionary items where I don't know the key, I have no idea how to access its sub-values. In the end, I'm trying to build a query that returns if ANY of jobStatus.*._id are true given a list of potential Object ID's.
So I'd be giving a list of ID's and want to return true if ANY of the items in jobStatuses have any of the given ID's. Any ideas?
Let's try this :
db.yourCollectionName.aggregate([
{
$project: {
_id: 0,
jobStatutses: { $arrayElemAt: [{ $objectToArray: "$jobStatutses" }, 0] }
}
}, {
$match: { 'jobStatutses.v._id': { $in: [ObjectId("5d6d8c3a5a0d22d3c84dd6dc"), ObjectId("5d6d8c3a5a0d22d3c84dd6ed")] } }
}
])
Collection Data :
/* 1 */
{
"_id" : ObjectId("5e06319c400289966eea6a07"),
"jobStatutses" : {
"5d6d8c3a5a0d22d3c84dd6dc" : {
"_id" : ObjectId("5d6d8c3a5a0d22d3c84dd6dc"),
"accepted" : "123",
"completed" : 0
}
},
"something" : 1
}
/* 2 */
{
"_id" : ObjectId("5e0631ad400289966eea6dd1"),
"jobStatutses" : {
"5d6d8c3a5a0d22d3c84dd6ed" : {
"_id" : ObjectId("5d6d8c3a5a0d22d3c84dd6ed"),
"accepted" : "456",
"completed" : 0
}
},
"something" : 2
}
/* 3 */
{
"_id" : ObjectId("5e0631cd400289966eea7542"),
"jobStatutses" : {
"5e06319c400289966eea6a07" : {
"_id" : ObjectId("5e06319c400289966eea6a07"),
"accepted" : "789",
"completed" : 0
}
},
"something" : 3
}
Output :
/* 1 */
{
"jobStatutses" : {
"k" : "5d6d8c3a5a0d22d3c84dd6dc",
"v" : {
"_id" : ObjectId("5d6d8c3a5a0d22d3c84dd6dc"),
"accepted" : "123",
"completed" : 0
}
}
}
/* 2 */
{
"jobStatutses" : {
"k" : "5d6d8c3a5a0d22d3c84dd6ed",
"v" : {
"_id" : ObjectId("5d6d8c3a5a0d22d3c84dd6ed"),
"accepted" : "456",
"completed" : 0
}
}
}
All you need is to check if at least one doc gets returned from DB for a given list or not, So we don't need to worry about document structure then just do result.length in your code to say at least one doc got matched for the input list.

Removing default fields from java elasticsearch querybuilder

Elasticsearch Java client's QueryBuider instance is adding properties like
1) order
2) min_doc_count
3) shard_min_doc_count
4) show_term_doc_count_error
5) lang
6) gap_policy
to the final Json query. My query works as expected without those properties. I want to prevent those properties from being added to my final query.
Java:
FilterAggregationBuilder aggregation = AggregationBuilders.filter("id", QueryBuilders.termsQuery("id",
"my-name"));
TermsAggregationBuilder lev1Agg = AggregationBuilders.terms("id").field("id");
lev1Agg.size(1);
lev1Agg.subAggregation(AggregationBuilders.sum("familyMemberCount").field("membersInFamily"));
lev1Agg.subAggregation(AggregationBuilders.sum("totalKidsInFamily").field("kidsInFamily"));
Map<String, String> bucketsPathsMap = new HashMap<>();
bucketsPathsMap.put("familyMemberCount", "familyMemberCount");
bucketsPathsMap.put("totalKidsInFamily", "totalKidsInFamily");
Script script = new Script("params.familyMemberCount / params.totalKidsInFamily");
BucketScriptPipelineAggregationBuilder bs = PipelineAggregatorBuilders
.bucketScript("myScript", bucketsPathsMap, script);
lev1Agg.subAggregation(bs);
aggregation.subAggregation(lev1Agg);
searchSourceBuilder = new SearchSourceBuilder().aggregation(aggregation);
searchSourceBuilder.size(0);
Query built by above code
GET my-alias/_search
{
"size" : 0,
"aggregations" : {
"id" : {
"filter" : {
"terms" : {
"name" : [
"my-name"
],
"boost" : 1.0
}
},
"aggregations" : {
"id" : {
"terms" : {
"field" : "name",
"size" : 1,
"min_doc_count" : 1,
"shard_min_doc_count" : 0,
"show_term_doc_count_error" : false,
"order" : [
{
"_count" : "desc"
},
{
"_term" : "asc"
}
]
},
"aggregations" : {
"familyMemberCount" : {
"sum" : {
"field" : "membersInFamily"
}
},
"totalKidsInFamily" : {
"sum" : {
"field" : "kidsInFamily"
}
},
"myScript" : {
"bucket_script" : {
"buckets_path" : {
"familyMemberCount" : "familyMemberCount",
"totalKidsInFamily" : "totalKidsInFamily"
},
"script" : {
"source" : "params.familyMemberCount / params.totalKidsInFamily",
"lang" : "painless"
},
"gap_policy" : "skip"
}
}
}
}
}
}
}
}

How to count number of occurence of specific field in a collection in Mongodb

I have my collection as shown below:
{
"_id" : NumberLong(366),
"file" : "xyz",
"clist" : {
"account" : "BFS",
"subAccount":"a"
},
},
{
"_id" : NumberLong(366),
"file" : "xyz",
"clist" : {
"account" : "BFS",
"subAccount":"b"
},
},
{
"_id" : NumberLong(366),
"file" : "xyz",
"clist" : {
"account" : "HC",
"subAccount":"c"
},
}
In that I have to group by account and count number of subAccount; for example:
{
account : "BFS",
subAccount : "b",
count : 1,
subAccount :"a",
count : 1
}
If for account BFS, subAccount b exists two times, then I should get output like this:
{
account : "BFS",
subAccount : "b",
count : 2,
subAccount : "a",
count : 1
}
May this help you...
db.coll.aggregate([ {
"$group" : {
"_id" : {
"account" : "$clist.account",
"subAccount" : "$clist.subAccount"
},
"count" : {
"$sum" : 1
}
}
}, {
$project : {
_id : 0,
"account" : "$_id.account",
"subAccount" : "$_id.subAccount",
"count" : "$count"
}
}, {
$group : {
_id : "$account",
"subaccounts" : {
"$push" : {
"subAccount" : "$subAccount",
"count" : "$count"
}
}
}
} ])

How to use MongoDB $let with Spring-Data?

I have a MongoDB collection of places. A typical place has most of the following fields:
{
"_id" : ObjectId("575014dc6b028f07bef53681"),
"_class" : "domain.model.PlaceV1",
"name" : "Γιασεμί",
"primary_photo_url" : "https://irs0.4sqi.net/img/general/original/34666238_STHSh6CHiC7hpAuB4rztRVg6cFc5ylfi15aRaR7zUuQ.jpg",
"seenDetails" : NumberLong(0),
"foursquare_checkins" : 646,
"foursquare_tips" : 28,
"keywords" : [
""
],
"verified" : 1,
"location" : {
"loc" : {
"type" : "Point",
"coordinates" : [
25.898318,
36.831486
]
},
"formattedAddress" : "Χώρα",
"locality" : "Amorgos",
"first_neighbourhood" : "Katapola",
"greek_locality" : "Αμοργός",
"greek_first_neighbourhood" : "Κατάπολα"
},
"contact" : {
"phone_numbers" : [
"+30 2285 074017"
]
},
"price" : {
"priceVotes" : NumberLong(0),
"price" : 0,
"priceVotesSum" : NumberLong(0)
},
"rating" : {
"rating" : 8,
"ratingVotes" : NumberLong(0),
"ratingVotesSum" : NumberLong(0)
},
"categories" : [
{
"cat_id" : NumberLong(10310061000),
"category" : "Café",
"greek_category" : "Καφετέρια",
"weight" : 4
},
{
"cat_id" : NumberLong(11610021000),
"category" : "Bar",
"greek_category" : "Μπαρ",
"weight" : 4
}
]
}
I want to make queries where the sorting will be based on a score that is a result of some expressions and conditions. From the mongo shell I have tried this:
db.place.aggregate([
{$match:{"location.locality":"Athens"}},
{$project:
{name:1, location:1, score:{
$let: {
vars:{ foursquare: {
$cond: { if: { $gte: [ "$foursquare_checkins", 500 ] }, then: 500, else: "$foursquare_checkins" }
},
rating: {$multiply:["$rating.rating", 100]},
},
in:{$add:["$$foursquare", "$$rating", "$seenDetails"]}
}
}
}
},
{$sort: {score: -1}}]).pretty();
This is a simple example of my queries. The score will contain more complex expressions like the distance from a location. The problem is that I cannot find a way to use the $let and the $cond operator in my Java code with Spring. Could anybody help?
You should be able to do this using nested DBObject and a Custom Aggregation Operation.
For Example:
Map operations = new HashMap();
operations.put("name", 1);
operations.put("location", 1);
operations.put("score", new BasicDBObject("$let", new BasicDBObject("vars", new BasicDBObject())));
Then you can create a CustomAggregationOperation to add this to your project
CustomAggregationOperation project = new CustomAggregationOperation(new BasicDBObject("$project", operation));
This will give you the following pipeline:
{ "$project" : { "score" : { "$let" : { "vars" : { }}} , "name" : 1 , "location" : 1}}
Then you can add your other stages:
Aggregation aggregate = Aggregation.newAggregation(match, project, sort);
public class CustomAggregationOperation implements AggregationOperation {
private DBObject operation;
public CustomAggregationOperation (DBObject operation) {
this.operation = operation;
}
#Override
public DBObject toDBObject(AggregationOperationContext context) {
return context.getMappedObject(operation);
}
}

Gets documents and total count of them in single query include pagination

I'm new in mongo and use mongodb aggregation framework for my queries. I need to retrieve some records which satisfy certain conditions(include pagination+sorting) and also get total count of records.
Now, I perform next steps:
Create $match operator
{ "$match" : { "year" : "2012" , "author.authorName" : { "$regex" :
"au" , "$options" : "i"}}}
Added sorting and pagination
{ "$sort" : { "some_field" : -1}} , { "$limit" : 10} , { "$skip" : 0}
After querying I receive the expected result: 10 documents with all fields.
For pagination I need to know the total count of records which satisfy these conditions, in my case 25.
I use next query to get count : { "$match" : { "year" : "2012" , "author.authorName" : { "$regex" : "au" , "$options" : "i"}}} , { "$group" : { "_id" : "$all" , "reviewsCount" : { "$sum" : 1}}} , { "$sort" : { "some_field" : -1}} , { "$limit" : 10} , { "$skip" : 0}
But I don't want to perform two separate queries: one for retrieving documents and second for total counts of records which satisfy certain conditions.
I want do it in one single query and get result in next format:
{
"result" : [
{
"my_documets": [
{
"_id" : ObjectId("512f1f47a411dc06281d98c0"),
"author" : {
"authorName" : "author name1",
"email" : "email1#email.com"
}
},
{
"_id" : ObjectId("512f1f47a411dc06281d98c0"),
"author" : {
"authorName" : "author name2",
"email" : "email2#email.com"
}
}, .......
],
"total" : 25
}
],
"ok" : 1
}
I tried modify the group operator : { "$group" : { "_id" : "$all" , "author" : "$author" "reviewsCount" : { "$sum" : 1}}}
But in this case I got : "exception: the group aggregate field 'author' must be defined as an expression inside an object". If add all fields in _id then reviewsCount always = 1 because all records are different.
Nobody know how it can be implement in single query ? Maybe mongodb has some features or operators for this case? Implementation with using two separate query reduces performance for querying thousand or millions records. In my application it's very critical performance issue.
I've been working on this all day and haven't been able to find a solution, so thought i'd turn to the stackoverflow community.
Thanks.
You can try using $facet in the aggregation pipeline as
db.name.aggregate([
{$match:{your match criteria}},
{$facet: {
data: [{$sort: sort},{$skip:skip},{$limit: limit}],
count:[{$group: {_id: null, count: {$sum: 1}}}]
}}
])
In data, you'll get your list with pagination and in the count, count variable will have a total count of matched documents.
Ok, I have one example, but I think it's really crazy query, I put it only for fun, but if this example faster than 2 query, tell us about it in the comments please.
For this question i create collection called "so", and put into this collection 25 documents like this:
{
"_id" : ObjectId("512fa86cd99d0adda2a744cd"),
"authorName" : "author name1",
"email" : "email1#email.com",
"c" : 1
}
My query use aggregation framework:
db.so.aggregate([
{ $group:
{
_id: 1,
collection: { $push : { "_id": "$_id", "authorName": "$authorName", "email": "$email", "c": "$c" } },
count: { $sum: 1 }
}
},
{ $unwind:
"$collection"
},
{ $project:
{ "_id": "$collection._id", "authorName": "$collection.authorName", "email": "$collection.email", "c": "$collection.c", "count": "$count" }
},
{ $match:
{ c: { $lte: 10 } }
},
{ $sort :
{ c: -1 }
},
{ $skip:
2
},
{ $limit:
3
},
{ $group:
{
_id: "$count",
my_documets: {
$push: {"_id": "$_id", "authorName":"$authorName", "email":"$email", "c":"$c" }
}
}
},
{ $project:
{ "_id": 0, "my_documets": "$my_documets", "total": "$_id" }
}
])
Result for this query:
{
"result" : [
{
"my_documets" : [
{
"_id" : ObjectId("512fa900d99d0adda2a744d4"),
"authorName" : "author name8",
"email" : "email8#email.com",
"c" : 8
},
{
"_id" : ObjectId("512fa900d99d0adda2a744d3"),
"authorName" : "author name7",
"email" : "email7#email.com",
"c" : 7
},
{
"_id" : ObjectId("512fa900d99d0adda2a744d2"),
"authorName" : "author name6",
"email" : "email6#email.com",
"c" : 6
}
],
"total" : 25
}
],
"ok" : 1
}
By the end, I think that for big collection 2 query (first for data, second for count) works faster. For example, you can count total for collection like this:
db.so.count()
or like this:
db.so.find({},{_id:1}).sort({_id:-1}).count()
I don't fully sure in first example, but in second example we use only cursor, which means higher speed:
db.so.find({},{_id:1}).sort({_id:-1}).explain()
{
"cursor" : "BtreeCursor _id_ reverse",
"isMultiKey" : false,
"n" : 25,
"nscannedObjects" : 25,
"nscanned" : 25,
"nscannedObjectsAllPlans" : 25,
"nscannedAllPlans" : 25,
"scanAndOrder" : false,
!!!!!>>> "indexOnly" : true, <<<!!!!!
"nYields" : 0,
"nChunkSkips" : 0,
"millis" : 0,
...
}
For completeness (full discussion was on the MongoDB Google Groups) here is the aggregation you want:
db.collection.aggregate(db.docs.aggregate( [
{
"$match" : {
"year" : "2012"
}
},
{
"$group" : {
"_id" : null,
"my_documents" : {
"$push" : {
"_id" : "$_id",
"year" : "$year",
"author" : "$author"
}
},
"reviewsCount" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"my_documents" : 1,
"total" : "$reviewsCount"
}
}
] )
By the way, you don't need aggregation framework here - you can just use a regular find. You can get count() from a cursor without having to re-query.

Categories

Resources