7.3 MapReduce实验1 去重 · Hadoop

输入样例 file1.txt ``` 2012-3-1 a 2012-3-2 b 2012-3-4 d 2012-3-3 c 2012-3-5 a 2012-3-6 b 2012-3-7 c 2012-3-3 e ``` file2.txt ``` 2012-3-1 a 2012-3-5 b 2012-3-2 d 2012-3-3 c 2012-3-5 a 2012-3-6 b 2012-3-4 c 2012-3-3 e ``` 代码实现： ``` import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class getunique { public static class MyMapper extends Mapper<LongWritable, Text, Text, NullWritable>{ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { context.write(value, NullWritable.get()); System.out.println("value="+value.toString()); } } public static class MyReducer extends Reducer<Text,NullWritable,Text,NullWritable>{ protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { context.write(key, NullWritable.get()); } } public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(getunique.class); job.setMapperClass(MyMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MyReducer.class); FileInputFormat.addInputPath(job, new Path("D:\\input\\file1.txt")); FileInputFormat.addInputPath(job, new Path("D:\\input\\file2.txt")); FileOutputFormat.setOutputPath(job, new Path("D:\\topoutput")); job.waitForCompletion(true); System.out.println("ok"); } } ```