Using multiple aggregates (SUM, COUNT, AVG) in a RethinkDB query

With a RethinkDB query, you can’t do “count” and “sum” quite the same as you’d do in SQL.

Since it uses a map-reduce style engine underneath, you set count to “1” on each of your rows and then add these up later:

r.db('performance')
 .table('query_timings')
 .pluck('DbId')
 .group('DbId')
 .map(function(row) {
  return {
    DbId: row('DbId'),
    Duration: row('Duration'),
    count: 1
  }
  })
  .reduce(function(left, right) {
    var dur = left('Duration').add(right('Duration'));
    var cnt = left('count').add(right('count'));    
        
    var avg = cnt > 0 ? dur / avg : 0;
    
    return {
      Duration: dur,
      count: cnt,
      avg: avg
    }
  })

Note however, that with this implementation you’re computing the average over and over, which is not ideal.

If you want to do “count distinct” you’d want to create a hashmap to track the distinct values:

r.db('performance')
 .table('query_timings')
 .group('DbId')
 .map(function(row) {
    var IP = row('IP');
    
    var o = r.object(IP, 1);
   
    return {
      DbId: row('DbId'),
      IP: IP,
      o
    }
  })
  .reduce(function(left, right) {
    var qL = left('o');
    var qR = right('o');
  
    var m = qL.merge(qR);
      
    return {
      DbId: left('DbId'),
      IPs: m.keys(),
      found: m.keys().count(),
      o: m
    }
  })
  .ungroup()
  .map(function(group) {
    return {
        'DbId': group('group'),
        'found': group('reduction')('found')
    }
  })

Leave a Reply

Your email address will not be published. Required fields are marked *