<>前言

<>MapReduce排序

* 默认的排序按照字典序，且实现排序的方法是快排
<>MapReduce排序分类

1、部分排序

MapReduce根据输入记录的键值对数据集总体排序，确保输出的文件内部数据有序

2、全排序

3、辅助排序

4、二次排序

<>自定义排序案例

1、自定义一个Bean对象，实现WritableComparable 接口

import org.apache.hadoop.io.WritableComparable; import java.io.DataInput;
import java.io.DataOutput; import java.io.IOException; public class
PhoneSortBean implements WritableComparable<PhoneSortBean> { //峰值流量 private
long upFlow; //低谷流量 private long downFlow; //总流量 private long sumFlow;
@Override public int compareTo(PhoneSortBean o) { if (this.sumFlow > o.sumFlow)
{ return -1; }else if(this.sumFlow < o.sumFlow){ return 1; }else { return 0; }
} //提供无参构造 public PhoneSortBean() { } //提供三个参数的getter和setter方法 public long
getUpFlow() { return upFlow; } public void setUpFlow(long upFlow) { this.upFlow
= upFlow; } public long getDownFlow() { return downFlow; } public void
setDownFlow(long downFlow) { this.downFlow = downFlow; } public long
getSumFlow() { return sumFlow; } public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow; } public void setSumFlow() { this.sumFlow = this.upFlow
+ this.downFlow; } //实现序列化和反序列化方法,注意顺序一定要保持一致 @Override public void
write(DataOutput dataOutput) throws IOException { dataOutput.writeLong(upFlow);
dataOutput.writeLong(downFlow); dataOutput.writeLong(sumFlow); } @Override
public void readFields(DataInput dataInput) throws IOException { this.upFlow =
dataInput.readLong(); } //重写ToString方法 @Override public String toString() {
return upFlow + "\t" + downFlow + "\t" + sumFlow; } }
2、自定义Mapper

import org.apache.commons.lang3.StringUtils; import
org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; import
java.util.LinkedList; public class SortPhoneMapper extends Mapper<LongWritable,
Text, PhoneSortBean,Text> { private Text outV = new Text(); private
PhoneSortBean outK = new PhoneSortBean(); @Override protected void
map(LongWritable key, Text value, Context context) throws IOException,
InterruptedException { String line = value.toString(); //分割数据 String[] splits =
for(String str:splits){ if(StringUtils.isNotEmpty(str)){
linkedList.add(str.trim()); } } //抓取需要的数据:手机号,上行流量,下行流量 String phone =
linkedList.get(0); String max = linkedList.get(1); String mine =
linkedList.get(2); //封装outK outV outV.set(phone);
outK.setUpFlow(Long.parseLong(max)); outK.setDownFlow(Long.parseLong(mine));
outK.setSumFlow(); //写出outK outV context.write(outK, outV); } }
3、自定义Reducer

Reduce阶段的输出结果仍然以手机号为key，而value为排序后的自定义的bean
import java.io.IOException; public class SortPhoneReducer extends
Reducer<PhoneSortBean,Text , Text, PhoneSortBean> { @Override protected void
reduce(PhoneSortBean key, Iterable<Text> values, Context context) throws
IOException, InterruptedException { for (Text value : values) {
context.write(value,key); } } }
4、自定义Driver类
SortPhoneJob { public static void main(String[] args) throws Exception { //1

Job.getInstance(conf); //2 关联本Driver类 job.setJarByClass(SortPhoneJob.class);
//3 设置Map端输出KV类型 job.setReducerClass(SortPhoneReducer.class);
job.setMapperClass(SortPhoneMapper.class); //4 关联Mapper和Reducer
job.setMapOutputKeyClass(PhoneSortBean.class);
job.setMapOutputValueClass(Text.class); //5 设置程序最终输出的KV类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(PhoneSortBean.class); //6 设置程序的输入输出路径 String inPath =
"F:\\网盘\\csv\\phone_out_bean.txt"; String outPath =
"F:\\网盘\\csv\\phone_out_sort"; FileInputFormat.setInputPaths(job, new
Path(inPath)); FileOutputFormat.setOutputPath(job, new Path(outPath)); //7

public int compareTo(PhoneSortBean o) { if (this.sumFlow > o.sumFlow) { return
-1; }else if(this.sumFlow < o.sumFlow){ return 1; }else {
//如果总流量相同的情况下，再按照峰值流量排序 if(this.upFlow > o.upFlow){ return -1; }else
if(this.upFlow < o.upFlow){ return 1; }else { return 0; } } }
<>分区内排序案例

* 添加一个自定义分区器，按照业务规则指定分区号
1、添加自定义分区
public class MyPartioner extends Partitioner<MyPhoneBean, Text> { @Override
public int getPartition(MyPhoneBean myPhoneBean, Text text, int partion) {
String phone = text.toString(); if(phone.startsWith("135")){ return 0; }else
if(phone.startsWith("136")){ return 1; }else if(phone.startsWith("137")){
return 2; }else { return 3; } } }
2、改造Driver类

public class MyDriver { public static void main(String[] args) throws
Exception { //1 获取job对象 Configuration conf = new Configuration(); Job job =
Job.getInstance(conf); //2 关联本Driver类 job.setJarByClass(MyDriver.class); //3

job.setReducerClass(MyReducer.class); //4 关联Mapper和Reducer
job.setMapOutputKeyClass(MyPhoneBean.class);
job.setMapOutputValueClass(Text.class); //5 设置程序最终输出的KV类型
job.setOutputKeyClass(Text.class); job.setOutputValueClass(MyPhoneBean.class);