I have 3 collections in MongoDB
- regions
- each document corresponds to one geographical region, has a field for the region name, and another field which is an array of farms within the broader region
- details
- this collection contains documents, each one relating to a particular farm, and with various fields relating to details of that farm, e.g. number of cows
- yield
- this collection again contains documents where each one relates to a particular farm, and the fields in this instance are for farm output per day
I am trying to write a function that will start with the regions collection, for the first region it then takes each individual farm ID, and uses this to query the other two collections getting the total yield and total number of cows for that farm, then summing for each farm to get a total for the region.
I first tried using straightforward mongodb calls for just a single region
var db = client.connect('mongodb://localhost:27017/mydb', function(err,db) {
if (err) throw err;
var regions = db.collection('Regions');
var details = db.collection('Details');
var yield = db.collection('Yield');
regions.find({"region" : "Gotham"}).toArray(function(err, docs) {
for (var k = 0; k < docs.length; k++) {
var regionYield = 0;
for (var j = 0; j < docs[k].farms.length; j++) {
var farmYield = 0;
var farmID = docs[k].farms[j]
yield.find({Farm_ID: farmID}).toArray(function(err, docs) {
for (var i = 0; i < docs.length; i++) {
farmYield += +docs[i].Yield;
}
console.log('finished inner loop');
regionYield += farmYield;
});
}
console.log('finished middle loop');
}
console.log('finished outer loop');
});
After the completion of the outer loop I want to do something with the final regionYield
value, but as it is structured now, the outer loop finishes before the necessary computation is finished in the inner loop, due to the async mongo call. I just can't figure out to get around this. I have looked at a lot of questions/answers here explaining callbacks but I just can't figure out how to apply it to my case.
Answer
I think you are approaching this the wrong way in your design, but more on that later. First on to basically changing your listing.
The simple and no extra dependency approach would be to use the node stream interface that is directly supported by the driver. This allows you to process each document in turn and also does not load all the content into memory as with .toArray()
.
Also as a "stream" there is an "end" event that fires, and natuaral flow control that can wrap the queries issued:
var mongodb = require('mongodb'),
MongoClient = mongodb.MongoClient;
MongoClient.connect('mongodb://localhost/mydb',function(err,db) {
if (err) throw err;
var regions = db.collection('Regions'),
yield = db.collection('Yield');
var resultHash = {};
var rstream = regions.find({ "region": "Gotham" });
rstream.on('err',function(err) {
throw err;
});
rstream.on('end',function() {
console.log( 'Complete' );
console.log( JSON.stringify( resultHash, undefined, 2 ) );
db.close();
});
rstream.on('data',function(region) {
rstream.pause(); // pause outer stream
resultHash[region.region] = 0;
var fstream = yield.find({ "Farm_ID": { "$in": region.farms } });
fstream.on('err',function(err) {
throw err;
});
fstream.on('end',function() {
console.log('finished farms');
rstream.resume();i // resumes outer stream
});
fstream.on('data',function(farm) {
fstream.pause(); // pause inner stream
resultHash[region.region] += farm.Yield;
fstream.resume(); // resume inner stream
});
});
});
And that is basically going to "sum up" all of your "Yield" values for the matched "region" of the other document. Also note the very simple use of $in
to pass in all "farms" which is already in an array rather than processing another loop.
But you really should not be doing this anyway. Databases are "smart" and you design needs to be smarter.
You can avoid all of the trouble here by basically adding "region" data to your "yield" data. Then this is just a matter of running .aggregate()
:
So with data like this in "yield":
{ "region": "Gotham", "Yield": 123 }
Then run just this code:
yield.aggregate(
[
{ "$group": {
"_id": "$region",
"yield": { "$sum": "$Yield" }
}}
],
function(err,results) {
}
);
And it is all done, without loops or calculations in code.
So if you simply add the "related data" such as "region" to the "yield" data that you want to work with, then MongoDB already has the tools that make accumulating on that key a breeze.
This is what it means to move away from relational design. Things work differently, so you need to work differently with them. And smarter.
No comments:
Post a Comment