• 技术文章 >后端开发 >XML/RSS教程

    分享一个简单的rss阅读工具

    Y2JY2J2017-05-03 09:32:10原创1380
    #!usr/bin/env python
    # -*- coding:UTF-8 -*-
     
    import re
    from lxml import etree
    from bs4 import BeautifulSoup as sp
    import requests
    import urllib2
    import StringIO
     
    import sys
    reload(sys)
    sys.setdefaultencoding("utf-8")
     
    headers={'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     
    def urlread(url):
        try:
            req=requests.get(url,headers=headers)
            req.encoding="utf-8"
            return req.text.encode("utf-8")
        except:
            req=urllib2.Request(url,headers=headers)
            response=urllib2.urlopen(req)
            return response.read().encode("utf-8")
                 
    class Item:
        def __init__(self,title,link,date,description):
            self.title=title.strip()
            self.link=link.strip()
            self.pubDate=date.strip()
            self.decription=self.filter(description).strip()
             
        def filter(self,description):
            description=re.sub("<.*?>",'',description)
            description=re.sub("\r",'',description)
            description=re.sub("\n",'',description)
            description=re.sub("&nbsp;"," ",description)
            if len(description)>240:
                description=description[:240]+'...'
            return description       
     
        def __str__(self):
            return "%s\n%s\n%s\n<%s>\n" % (
                    self.title,
                    self.link,
                    self.decription,
                    self.pubDate
                    )
             
        __repr__=__str__
         
    class BSParser(object):
        #url=''
        def __init__(self,url):
            xml=urlread(url)
            self.reset(xml)
             
        def reset(self,xml=None):
            if xml==None:
                self.soup=sp("<xml> </xml>")
            else:
                self.soup=sp(xml,"xml")
     
        def callback(self,method,obj,tags):
            rst=None
            attr=method.lower()
     
            for tag in tags:
                try:
                    rst=getattr(obj,attr)(tag)
                except:
                    continue
                if rst:
                    break
            return rst
     
        def getfields(self,tags=["item",'entry']):
            return self.callback(method="FIND_ALL",
                                obj=self.soup,
                                tags=tags)
                 
        def gettitle(self,obj,tags=["title"]):
            return self.callback("FIND",obj,tags).text
             
        def getlink(self,obj,tags=["link"]):     
            rst=self.callback("FIND",obj,tags).text
            if not rst:          
                rst=self.callback("FIND",obj,tags).get("href")
            return rst
             
        def getdate(self,obj,tags=["pubDate","published"]):
            return self.callback("FIND",obj,tags).text
             
        def getdescription(self,obj,tags=["description","content"]):       
            return self.callback("FIND",obj,tags).text
         
        def run(self):
            for item in self.getfields():
                title=self.gettitle(item)
                link=self.getlink(item)
                date=self.getdate(item)
                description=self.getdescription(item)
                newsitem=Item(title,link,date,description)
                yield newsitem
     
    def test():
        parser=Parser()
        for item in parser.run():
            print item
             
    if __name__=="__main__":
        test()

    以上就是分享一个简单的rss阅读工具的详细内容,更多请关注php中文网其它相关文章!

    声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系admin@php.cn核实处理。
    专题推荐:rss,阅读工具
    上一篇:使用FeedTools解析RSS代码示例 下一篇:自己动手写 PHP MVC 框架(40节精讲/巨细/新人进阶必看)

    相关文章推荐

    • 使用xmlhttp为网站增加域名查询功能的示例代码分享• 四种XML解析方式详解• XML和Tomcat的入门知识的详细介绍• 基于PHP对XML的操作详解• XML轻松学习手册(2)XML概念
    1/1

    PHP中文网