输入样例
file1.txt
```
2012-3-1 a
2012-3-2 b
2012-3-4 d
2012-3-3 c
2012-3-5 a
2012-3-6 b
2012-3-7 c
2012-3-3 e
```
file2.txt
```
2012-3-1 a
2012-3-5 b
2012-3-2 d
2012-3-3 c
2012-3-5 a
2012-3-6 b
2012-3-4 c
2012-3-3 e
```
代码实现:
```
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class getunique {
public static class MyMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value, NullWritable.get());
System.out.println("value="+value.toString());
}
}
public static class MyReducer extends Reducer<Text,NullWritable,Text,NullWritable>{
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(getunique.class);
job.setMapperClass(MyMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setReducerClass(MyReducer.class);
FileInputFormat.addInputPath(job, new Path("D:\\input\\file1.txt"));
FileInputFormat.addInputPath(job, new Path("D:\\input\\file2.txt"));
FileOutputFormat.setOutputPath(job, new Path("D:\\topoutput"));
job.waitForCompletion(true);
System.out.println("ok");
}
}
```
- 空白目录
- 第一章 Linux虚拟机安装
- 第二章 SSH配置
- 第三章 jdk配置
- 第四章 Hadoop配置-单机
- 第五章 Hadoop配置-集群
- 第六章 HDFS
- 第七章 MapReduce
- 7.1 MapReduce(上)
- 7.2 MapReduce(下)
- 7.3 MapReduce实验1 去重
- 7.4 MapReduce实验2 单例排序
- 7.5 MapReduce实验3 TopK
- 7.6 MapReduce实验4 倒排索引
- 第八章 Hive
- Hive安装
- 数据定义
- 数据操作
- 第九章 HBase
- 第十章 SaCa RealRec数据科学平台
- 第十一章 Spark Core
- 第十二章 Spark Streaming
- 第十章 Spark测试题