处理按第一列聚合第二列的代码示例
[python]
#coding=gbk
import os
import sys
import argparse
import commands
import logging
status, username = commands.getstatusoutput('whoami')
logging.basicConfig(level = logging.DEBUG,
format=username + ' %(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt = '%Y-%m-%d %H:%M:%S',
filename = '.log' + __file__ ,
filemode = 'a')
def perror_and_exit(message, status = -1):
sys.stderr.write(message + '\n')
logging.info(message + 'sys exit')
sys.exit(status)
count = 0
def handle_keywords(userid, keywords, fw):
global count
count += 1
pass
if __name__ == "__main__":
directory = "/home/junfeng"
if len(sys.argv) != 2:
perror_and_exit("Usage:python %s keyword or idea" % (__file__ + ""))
if sys.argv[1] == "keyword":
filename = os.path.join(directory, "userid_showword.active.tradeid5401.20130329.txt")
print filename
fw = file("userid_showwords_brand.txt", "w")
last_userid = None
keywords = []
for line in file(filename):
line = line.strip().split()
userid = line[0]
keyword = line[1]
if userid == last_userid:
keywords.append(keyword)
else:
if len(keywords) > 0:
handle_keywords(last_userid, keywords, fw)
pass
last_userid = userid
keywords = [keyword]
handle_keywords(last_userid, keywords, fw)
print keywords
global count
print count
pass
elif sys.argv[1] == "idea":
filename = os.path.join(directory, "userid_title_desc1_desc2.active.tradeid5401.20130329.txt")
for line in file(filename):
line = line.strip().split()
pass
#coding=gbk
import os
import sys
import argparse
import commands
import logging
status, username = commands.getstatusoutput('whoami')
logging.basicConfig(level = logging.DEBUG,
format=username + ' %(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt = '%Y-%m-%d %H:%M:%S',
filename = '.log' + __file__ ,
filemode = 'a')
def perror_and_exit(message, status = -1):
sys.stderr.write(message + '\n')
logging.info(message + 'sys exit')
sys.exit(status)
count = 0
def handle_keywords(userid, keywords, fw):
global count
count += 1
pass
if __name__ == "__main__":
directory = "/home/junfeng"
if len(sys.argv) != 2:
perror_and_exit("Usage:python %s keyword or idea" % (__file__ + ""))
if sys.argv[1] == "keyword":
filename = os.path.join(directory, "userid_showword.active.tradeid5401.20130329.txt")
print filename
fw = file("userid_showwords_brand.txt", "w")
last_userid = None
keywords = []
for line in file(filename):
line = line.strip().split()
userid = line[0]
keyword = line[1]
if userid == last_userid:
keywords.append(keyword)
else:
if len(keywords) > 0:
handle_keywords(last_userid, keywords, fw)
pass
last_userid = userid
keywords = [keyword]
handle_keywords(last_userid, keywords, fw)
print keywords
global count
print count
pass
elif sys.argv[1] == "idea":
filename = os.path.join(directory, "userid_title_desc1_des
补充:Web开发 , Python ,