MongoDB使用mapreduce完成数据迭代
MongoDB使用mapreduce完成数据迭代
文档结构:
01
Spcode
02
Spname
03
Consignid
04
Consname
05 www.zzzyk.com
Region
06
Regionname
07
Serviceid
08
Servicename
09
Srctermid
10
Logtime
>每天要做两次分组
1
#SQL
2
#天分组
3
SELECT Spcode, Spname, Consignid, Consname, Region,
4
Regionname, Serviceid, Servicename, Srctermid
5
FROM mo_log_201208
6
WHERE Logtime > "20120823" AND Logtime < "20120824"
7
GROUP BY Spcode, Spname, Consignid, Consname, Region, Regionname, Serviceid, Servicename
01
#MongoDB
02
#天分组
03 www.zzzyk.com
res = db.runCommand({
04
mapreduce:'mo_log_201208',
05
query:{Logtime:{$gte:'20120823', $lte:'20120824'}},
06
map:function() {
07
emit({Spcode:this.Spcode, Spname:this.Spname,
08
Consignid:this.Consignid, Consname:this.Consname,
09
Region:this.Region, Regionname:this.Regionname,
10
Serviceid:this.Serviceid,
11
Servicename:this.Servicename,
12
Srctermid:this.Srctermid}, {count:1});
13
},
14
reduce:function(key, value) {
15
var ret = {count:0};
16
ret.count++;
17
return ret;
18
},
19
out:'tmp_mo_spcode_consignid_region_serviceid_201208_1',
20
verbose:true
21
})
1
#SQL
2
#月分组
3
SELECT Spcode, Spname, Consignid, Consname, Region,
4 www.zzzyk.com
Regionname, Serviceid, Servicename, Srctermid
5
FROM mo_log_201208
6
GROUP BY Spcode, Spname, Consignid, Consname, Region, Regionname, Serviceid, Servicename
01
#MongoDB
02
#月分组
03
res = db.runCommand({
04
mapreduce:'mo_log_201208',
05
map:function() {
06
emit({Spcode:this.Spcode, Spname:this.Spname,
07
Consignid:this.Consignid, Consname:this.Consname,
08
Region:this.Region, Regionname:this.Regionname,
09
Serviceid:this.Serviceid, Servicename:this.Servicename,
10
Srctermid:this.Srctermid}, {count:1});
11
},
12
reduce:function(key, value) {
13
var ret = {count:0};
14 www.zzzyk.com
ret.count++;
15
return ret;
16
},
17
out:'tmp_mo_spcode_consignid_region_serviceid_201208',
18
verbose:true
19
})
>随着每天数据量的不断增长, 月分组的执行时间会不断的增加~~
>为了减少重复的分组操作, 降低分组时间, 用天分组的数据来迭代月分组的数据
01
#MongoDB
02
03
res = db.runCommand({
04 www.zzzyk.com
mapreduce:'mo_log_201208',
05
query:{Logtime:{$gte:'20120823', $lte:'20120824'}},
06
map:function() {
07
emit({Spcode:this.Spcode, Spname:this.Spname,
08
Consignid:this.Consignid, Consname:this.Consname,
09
Region:this.Region, Regionname:this.Regionname,
10
Serviceid:this.Serviceid, Servicename:this.Servicename,
11
Srctermid:this.Srctermid}, {count:1});
12
},
13
reduce:function(key, value) {
14
var ret = {count:0};
15
ret.count++;
16
return ret;
17
},
18
finalize:function(key, value){
19
db.tmp_mo_spcode_consignid_region_serviceid_201208.insert({"_id":key, "value":value}); www.zzzyk.com
20
return value;
21
},
22
out:'tmp_mo_spcode_consignid_region_serviceid_201208_1',
23
verbose:true
24
})
>Mongodb缺省的主键是_id, 在使用insert或者save的时候, 不会产生重复数据
>insert和save的区别是
>>insert:当主键重复的时候, 放弃操作
>>save:当主键重复的时候,执行更新操作