/**
* Definition of Input:
* template<class T>
* class Input {
* public:
* bool done();
* // Returns true if the iteration has elements or false.
* void next();
* // Move to the next element in the iteration
* // Runtime error if the iteration has no more elements
* T value();
* // Get the current element, Runtime error if
* // the iteration has no more elements
* }
* Definition of Document:
* class Document {
* public:
* int id; // document id
* string content; // document content
* }
*/
class TopKFrequentWordsMapper: public Mapper {
public:
void Map(Input<Document>* input) {
// Write your code here
// Please directly use func 'output' to output
// the results into output buffer.
// void output(string &key, int value);
}
};
class TopKFrequentWordsReducer: public Reducer {
public:
void setUp(int k) {
// initialize your data structure here
}
void Reduce(string &key, Input<int>* input) {
// Write your code here
}
void cleanUp() {
// Please directly use func 'output' to output
// the top k pairs <word, times> into output buffer.
// void output(string &key, int &value);
}
};
/**
* 本代码由九章算法编辑提供。版权所有,转发请注明出处。
* - 九章算法致力于帮助更多中国人找到好的工作,教师团队均来自硅谷和国内的一线大公司在职工程师。
* - 现有的面试培训课程包括:九章算法班,系统设计班,算法强化班,Java入门与基础算法班,Android 项目实战班,Big Data 项目实战班,
* - 更多详情请见官方网站:http://www.jiuzhang.com/?source=code
*/
/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
* Definition of Document:
* class Document {
* public int id;
* public String content;
* }
*/
class Pair {
String key;
int value;
Pair(String key, int value) {
this.key = key;
this.value = value;
}
}
public class TopKFrequentWords {
public static class Map {
public void map(String _, Document value,
OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, int value);
int id = value.id;
StringBuffer temp = new StringBuffer("");
String content = value.content;
String[] words = content.split(" ");
for (String word : words)
if (word.length() > 0) {
output.collect(word, 1);
}
}
}
public static class Reduce {
private PriorityQueue<Pair> Q = null;
private int k;
private Comparator<Pair> pairComparator = new Comparator<Pair>() {
public int compare(Pair left, Pair right) {
if (left.value != right.value) {
return left.value - right.value;
}
return right.key.compareTo(left.key);
}
};
public void setup(int k) {
// initialize your data structure here
this.k = k;
Q = new PriorityQueue<Pair>(k, pairComparator);
}
public void reduce(String key, Iterator<Integer> values) {
// Write your code here
int sum = 0;
while (values.hasNext()) {
sum += values.next();
}
Pair pair = new Pair(key, sum);
if (Q.size() < k) {
Q.add(pair);
} else {
Pair peak = Q.peek();
if (pairComparator.compare(pair, peak) > 0) {
Q.poll();
Q.add(pair);
}
}
}
public void cleanup(OutputCollector<String, Integer> output) {
// Output the top k pairs <word, times> into output buffer.
// Ps. output.collect(String key, Integer value);
List<Pair> pairs = new ArrayList<Pair>();
while (!Q.isEmpty()) {
pairs.add(Q.poll());
}
// reverse result
int n = pairs.size();
for (int i = n - 1; i >= 0; --i) {
Pair pair = pairs.get(i);
output.collect(pair.key, pair.value);
}
}
}
}