from mrjob.job import MRJob from mrjob.step import MRStep class MRCoauthors(MRJob): def steps(self): # if you use multiple steps of MapReduce return [ MRStep(mapper=self.mapper_parse, reducer=...), ... ] def mapper_parse(self, _, line): # read author, paper, citations from input line author, paper, citations = map(str.strip, line.split(',')) citations = int(citations) # TODO: emit the appropriate key-value pairs # TODO: add more reducers / mappers as necessary if __name__ == "__main__": MRCoauthors().run()