/**
  * 本代码由九章算法编辑提供。版权所有,转发请注明出处。
  * - 九章算法致力于帮助更多中国人找到好的工作,教师团队均来自硅谷和国内的一线大公司在职工程师。
  * - 现有的面试培训课程包括:九章算法班,系统设计班,算法强化班,Java入门与基础算法班,Android 项目实战班,Big Data 项目实战班,
  * - 更多详情请见官方网站:http://www.jiuzhang.com/?source=code
  */ 

/**
 * Definition of Input:
 * template<class T>
 * class Input {
 * public:
 *     bool done(); 
 *         // Returns true if the iteration has elements or false.
 *     void next();
 *         // Move to the next element in the iteration
 *         // Runtime error if the iteration has no more elements
 *     T value();
 *        // Get the current element, Runtime error if
 *        // the iteration has no more elements
 * }
 * Definition of Document:
 * class Document {
 * public:
 *     int id; // document id
 *     string content; // document content
 * }
 */
class Pair {
public:
    string key;
    int value;
    Pair(string word, int times) {
        key = word;
        value = times;
    };

    bool operator<(const Pair& obj) const {
        return value > obj.value || value == obj.value && key < obj.key;
    }
};

class TopKFrequentWordsMapper: public Mapper {
public:
    void Map(Input<Document>* input) {
        // Write your code here
        // Please directly use func 'output' to output 
        // the results into output buffer.
        // void output(string &key, int value);
        while (!input->done()) {
            vector<string> words = split(input->value().content, " ");
            for (string word : words) 
            if (word.size() > 0) {
                output(word, 1);
            }
            input->next();
        }
    }

    vector<string> split(const string &str, string delim) {
        vector<string> results;
        int lastIndex = 0, index;
        while ((index = str.find(delim, lastIndex)) != string::npos) {
            results.push_back(str.substr(lastIndex, index - lastIndex));
            lastIndex = index + delim.length();
        }
        if (lastIndex != str.length()) {
            results.push_back(str.substr(lastIndex, str.length() - lastIndex));
        }
        return results;
    }
};


class TopKFrequentWordsReducer: public Reducer {
private:
    int k;
    priority_queue<Pair> q;
public:
    void setUp(int k) {
        // initialize your data structure here
        this->k = k;
    }

    void Reduce(string &key, Input<int>* input) {
        // Write your code here
        int sum = 0;
        while (!input->done()) {
            sum += input->value();
            input->next();
        }

        Pair pair(key, sum);
        if (q.size() < k)
            q.push(pair);
        else {
            q.push(pair);
            q.pop();
        }        
    }

    void cleanUp() {
        // Please directly use func 'output' to output 
        // the top k pairs <word, times> into output buffer.
        // void output(string &key, int &value);
        vector<Pair> list;
        while (!q.empty()) {
            list.push_back(q.top());
            q.pop();
        }

        // reverse
        int n = list.size();
        for (int i = n - 1; i >=0; --i)
            output(list[i].key, list[i].value);
    }
};

results matching ""

    No results matching ""