import sys import urllib2 import xml.sax import xml.sax.handler class RssHandler(xml.sax.handler.ContentHandler): def startDocument(self): self.inItem = False self.inTitle = False self.channels = {} self.currentChannel = None def startElement(self, name, attrs): lname = name.lower() if lname == "item": self.inItem = True elif lname == "title": self.inTitle = True self.str = "" def endElement(self, name): lname = name.lower() if lname == "item": self.inItem = False elif lname == "title": self.inTitle = False if self.inItem: self.channels[self.currentChannel] += [self.str] else: self.currentChannel = self.str if self.currentChannel not in self.channels.keys(): self.channels[self.currentChannel] = [] def characters(self, content): if self.inTitle: self.str += content class Url(): def __init__(self, url): self.url = url def __enter__(self): self.stream = urllib2.urlopen(self.url) return self.stream def __exit__(self, type, value, traceback): self.stream.close() def generateRsses(feedFile): with open(feedFile, "r") as file: urls = [url.strip() for url in file.readlines()] for url in urls: with Url(url) as rss: handler = RssHandler() parser = xml.sax.make_parser() parser.setContentHandler(handler) parser.parse(rss) yield handler.channels def printFeed(rss): for channelName in rss.keys(): print "*** " + channelName + " ***" for title in rss[channelName]: print "\t" + title if __name__ == "__main__": [scriptName,feedFileName] = sys.argv for rss in generateRsses(feedFileName): printFeed(rss)