当前位置:软件学习 > Word >>

hadoop平台运行WordCount程序

 1. 经典的WordCound程序(WordCount.java)
[java] view plaincopyprint?
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.List; 
import java.util.StringTokenizer; 
 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapred.FileInputFormat; 
import org.apache.hadoop.mapred.FileOutputFormat; 
import org.apache.hadoop.mapred.JobClient; 
import org.apache.hadoop.mapred.JobConf; 
import org.apache.hadoop.mapred.MapReduceBase; 
import org.apache.hadoop.mapred.Mapper; 
import org.apache.hadoop.mapred.OutputCollector; 
import org.apache.hadoop.mapred.Reducer; 
import org.apache.hadoop.mapred.Reporter; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 
 
public class WordCount extends Configured implements Tool { 
 
    public static class MapClass extends MapReduceBase implements 
            Mapper<LongWritable, Text, Text, IntWritable> { 
 
        private final static IntWritable one = new IntWritable(1); 
        private Text word = new Text(); 
 
        public void map(LongWritable key, Text value, 
                OutputCollector<Text, IntWritable> output, Reporter reporter) 
                throws IOException { 
            String line = value.toString(); 
            StringTokenizer itr = new StringTokenizer(line); 
            while (itr.hasMoreTokens()) { 
                word.set(itr.nextToken()); 
                output.collect(word, one); 
            } 
        } 
    } 
 
    /**
     * A reducer class that just emits the sum of the input values.
     */ 
    public static class Reduce extends MapReduceBase implements 
            Reducer<Text, IntWritable, Text, IntWritable> { 
 
        public void reduce(Text key, Iterator<IntWritable> values, 
                OutputCollector<Text, IntWritable> output, Reporter reporter) 
                throws IOException { 
            int sum = 0; 
            while (values.hasNext()) { 
                sum += values.next().get(); 
            } 
            output.collect(key, new IntWritable(sum)); 
        } 
    } 
 
    static int printUsage() { 
        System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>"); 
        ToolRunner.printGenericCommandUsage(System.out); 
        return -1; 
    } 
 
    /**
     * The main driver for word count map/reduce program. Invoke this method to
     * submit the map/reduce job.
     *
     * @throws IOException
     *             When there is communication problems with the job tracker.
     */ 
    public int run(String[] args) throws Exception { 
        JobConf conf = new JobConf(getConf(), WordCount.class); 
        conf.setJobName("wordcount"); 
 
        // the keys are words (strings) 
        conf.setOutputKeyClass(Text.class); 
        // the values are counts (ints) 
        conf.setOutputValueClass(IntWritable.class); 
 
        conf.setMapperClass(MapClass.class); 
        conf.setCombinerClass(Reduce.class); 
        conf.setReducerClass(Reduce.class); 
 
        List<String> other_args = new ArrayList<String>(); 
        for (int i = 0; i < args.length; ++i) { 
            try { 
                if ("-m".equals(args[i])) { 
                    conf.setNumMapTasks(Integer.parseInt(args[++i])); 
                } else if ("-r".equals(args[i])) { 
                    conf.setNumReduceTasks(Integer.parseInt(args[++i])); 
                } else { 
                    other_args.add(args[i]); 
                } 
            } catch (NumberFormatException except) { 
                System.out.println("ERROR: Integer expected instead of " 
              &nbs

补充:软件开发 , Java ,
CopyRight © 2022 站长资源库 编程知识问答 zzzyk.com All Rights Reserved
部分文章来自网络,