Você está na página 1de 1

package wcmr.hadoop.training.it.

bec;
import java.io.IOException;
import java.util.StringTokenizer;
import
import
import
import
import

org.apache.hadoop.io.IntWritable;
org.apache.hadoop.io.LongWritable;
org.apache.hadoop.io.Text;
org.apache.hadoop.mapreduce.Mapper;
org.apache.hadoop.mapreduce.Reducer;

public class WCMR {


public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
protected String delimeters = " , .;:'\"&!?-_\n\t12345678910[]{}
<>\\`~|=^()@#$%^*/+-";
protected static boolean caseSensitive = false;
public void map(LongWritable recadd, Text rec, Context con)
throws IOException, InterruptedException {
String line = (caseSensitive) ? rec.toString() : rec.toS
tring().toLowerCase();
StringTokenizer tokenizer = new StringTokenizer(line, de
limeters);
while (tokenizer.hasMoreTokens()) {
con.write(new Text(tokenizer.nextToken()), new I
ntWritable(1));
}
}
}
public static class WordCountReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Conte
xt con)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable el : values) {
sum = sum + el.get();
}
con.write(key, new IntWritable(sum));
}
}
}

Você também pode gostar