当前位置:数据库 > mongodb >>

MongoDB使用mapreduce完成数据迭代


MongoDB使用mapreduce完成数据迭代
 
文档结构:
 
01
Spcode
02
Spname
03
Consignid
04
Consname
05  www.zzzyk.com  
Region
06
Regionname
07
Serviceid
08
Servicename
09
Srctermid
10
Logtime
>每天要做两次分组
1
#SQL
2
#天分组
3
SELECT Spcode, Spname, Consignid, Consname, Region,
4
Regionname, Serviceid, Servicename, Srctermid
5
FROM mo_log_201208
6
WHERE Logtime > "20120823" AND Logtime < "20120824"
7
GROUP BY Spcode, Spname, Consignid, Consname, Region, Regionname, Serviceid, Servicename
01
#MongoDB
02
#天分组
03  www.zzzyk.com  
res = db.runCommand({
04
    mapreduce:'mo_log_201208',
05
    query:{Logtime:{$gte:'20120823', $lte:'20120824'}},
06
    map:function() {
07
        emit({Spcode:this.Spcode, Spname:this.Spname,
08
                Consignid:this.Consignid, Consname:this.Consname,
09
                Region:this.Region, Regionname:this.Regionname,
10
                Serviceid:this.Serviceid,   
11
                Servicename:this.Servicename,
12
                Srctermid:this.Srctermid}, {count:1});
13
    },
14
    reduce:function(key, value) {
15
        var ret = {count:0};
16
        ret.count++;
17
        return ret;
18
    },
19
    out:'tmp_mo_spcode_consignid_region_serviceid_201208_1',
20
    verbose:true
21
})
1
#SQL
2
#月分组
3
SELECT Spcode, Spname, Consignid, Consname, Region,
4  www.zzzyk.com  
Regionname, Serviceid, Servicename, Srctermid
5
FROM mo_log_201208
6
GROUP BY Spcode, Spname, Consignid, Consname, Region, Regionname, Serviceid, Servicename
 
01
#MongoDB
02
#月分组
03
res = db.runCommand({
04
    mapreduce:'mo_log_201208',
05
    map:function() {
06
        emit({Spcode:this.Spcode, Spname:this.Spname,
07
                Consignid:this.Consignid, Consname:this.Consname,
08
                Region:this.Region, Regionname:this.Regionname,
09
                Serviceid:this.Serviceid, Servicename:this.Servicename,
10
                Srctermid:this.Srctermid}, {count:1});
11
    },
12
    reduce:function(key, value) {
13
        var ret = {count:0};
14  www.zzzyk.com  
        ret.count++;
15
        return ret;
16
    },
17
    out:'tmp_mo_spcode_consignid_region_serviceid_201208',
18
    verbose:true
19
})
 >随着每天数据量的不断增长, 月分组的执行时间会不断的增加~~
 
 >为了减少重复的分组操作, 降低分组时间, 用天分组的数据来迭代月分组的数据
 
01
#MongoDB
02
 
03
res = db.runCommand({
04  www.zzzyk.com  
    mapreduce:'mo_log_201208',
05
    query:{Logtime:{$gte:'20120823', $lte:'20120824'}},
06
    map:function() {
07
        emit({Spcode:this.Spcode, Spname:this.Spname,
08
        Consignid:this.Consignid, Consname:this.Consname,
09
        Region:this.Region, Regionname:this.Regionname,
10
        Serviceid:this.Serviceid, Servicename:this.Servicename,
11
        Srctermid:this.Srctermid}, {count:1});
12
    },
13
    reduce:function(key, value) {
14
        var ret = {count:0};
15
        ret.count++;
16
        return ret;
17
    },
18
    finalize:function(key, value){
19
        db.tmp_mo_spcode_consignid_region_serviceid_201208.insert({"_id":key, "value":value});  www.zzzyk.com  
20
        return value;
21
    },
22
    out:'tmp_mo_spcode_consignid_region_serviceid_201208_1',
23
    verbose:true
24
})
>Mongodb缺省的主键是_id, 在使用insert或者save的时候, 不会产生重复数据
>insert和save的区别是
    >>insert:当主键重复的时候, 放弃操作
    >>save:当主键重复的时候,执行更新操作
 
CopyRight © 2022 站长资源库 编程知识问答 zzzyk.com All Rights Reserved
部分文章来自网络,