
LintCode-504: Inverted Index (Map Reduce) (System Design题)


1) Map()的output输出input->value.id,而不是1。因为这里是要统计每个单词对应文档的id,不是统计单词个数。

2) Reduce()不需要专门维护一个map

 * Definition of Input:
 * template<class T>
 * class Input {
 * public:
 *     bool done(); 
 *         // Returns true if the iteration has elements or false.
 *     void next();
 *         // Move to the next element in the iteration
 *         // Runtime error if the iteration has no more elements
 *     T value();
 *        // Get the current element, Runtime error if
 *        // the iteration has no more elements
 * }
 * Definition of Document:
 * class Document {
 * public:
 *     int id; // document id
 *     string content; // document content
 * }
class InvertedIndexMapper: public Mapper {
    void Map(Input<Document>* input) {
        // Write your code here
        // Please directly use func 'output' to output 
        // the results into output buffer.
        // void output(string &key, int value);
        while(!input->done()) {
            stringstream ss;
            string word;
            ss << input->value().content;
            while(ss >> word) output(word, input->value().id);

class InvertedIndexReducer: public Reducer {
    void Reduce(string &key, Input<int>* input) {
        // Write your code here
        // Please directly use func 'output' to output 
        // the results into output buffer.
        // void output(string &key, vector<int> &value);
        vector<int> idList;
        while(!input->done()) {
            if (idList.size() ==  || input->value() != idList.back()) {
        output(key, idList);