skip to main |
skip to sidebar
ruby mapreduce
Google MapReduce
# docID, value => wordID, docID:docPos
WDIC = {101 => 'Page', 102 => 'of', 103 => 'School', 104 => 'Sakura', 105 => 'Kaede'}.invert
web = {1 => "Page of Sakura School", 2 => "Page of Kaede School"}
mapped = web.map do |doc_id, content|
content.scan(/\w+/).map.with_index { |word, i| [WDIC[word], [doc_id, i]] }
end.flatten(1)
p mapped.inject(Hash.new([])) { |h, (word_id, v)| h[word_id] += [v]; h }
# => {101=>[[1, 0], [2, 0]], 102=>[[1, 1], [2, 1]], 104=>[[1, 2]], 103=>[[1, 3], [2, 3]], 105=>[[2, 2]]}
No comments:
Post a Comment