/**
* 本代码由九章算法编辑提供。版权所有,转发请注明出处。
* - 九章算法致力于帮助更多中国人找到好的工作,教师团队均来自硅谷和国内的一线大公司在职工程师。
* - 现有的面试培训课程包括:九章算法班,系统设计班,算法强化班,Java入门与基础算法班,Android 项目实战班,Big Data 项目实战班,
* - 更多详情请见官方网站:http://www.jiuzhang.com/?source=code
*/
/**
* Definition of Input:
* template<class T>
* class Input {
* public:
* bool done();
* // Returns true if the iteration has elements or false.
* void next();
* // Move to the next element in the iteration
* // Runtime error if the iteration has no more elements
* T value();
* // Get the current element, Runtime error if
* // the iteration has no more elements
* }
* Definition of Document:
* class Document {
* public:
* int id; // document id
* string content; // document content
* }
*/
class Pair {
public:
string key;
int value;
Pair(string word, int times) {
key = word;
value = times;
};
bool operator<(const Pair& obj) const {
return value > obj.value || value == obj.value && key < obj.key;
}
};
class TopKFrequentWordsMapper: public Mapper {
public:
void Map(Input<Document>* input) {
// Write your code here
// Please directly use func 'output' to output
// the results into output buffer.
// void output(string &key, int value);
while (!input->done()) {
vector<string> words = split(input->value().content, " ");
for (string word : words)
if (word.size() > 0) {
output(word, 1);
}
input->next();
}
}
vector<string> split(const string &str, string delim) {
vector<string> results;
int lastIndex = 0, index;
while ((index = str.find(delim, lastIndex)) != string::npos) {
results.push_back(str.substr(lastIndex, index - lastIndex));
lastIndex = index + delim.length();
}
if (lastIndex != str.length()) {
results.push_back(str.substr(lastIndex, str.length() - lastIndex));
}
return results;
}
};
class TopKFrequentWordsReducer: public Reducer {
private:
int k;
priority_queue<Pair> q;
public:
void setUp(int k) {
// initialize your data structure here
this->k = k;
}
void Reduce(string &key, Input<int>* input) {
// Write your code here
int sum = 0;
while (!input->done()) {
sum += input->value();
input->next();
}
Pair pair(key, sum);
if (q.size() < k)
q.push(pair);
else {
q.push(pair);
q.pop();
}
}
void cleanUp() {
// Please directly use func 'output' to output
// the top k pairs <word, times> into output buffer.
// void output(string &key, int &value);
vector<Pair> list;
while (!q.empty()) {
list.push_back(q.top());
q.pop();
}
// reverse
int n = list.size();
for (int i = n - 1; i >=0; --i)
output(list[i].key, list[i].value);
}
};