Noel O'Blog: RSS feeds for chemistry projects on SourceForge

Wouldn't it be nice to know whether any of your favourite chemistry projects has released a new version? Or to keep abreast of the latest registrations of chemistry projects on SourceForge? No? I guess it's just me then.

Anyway, here are some RSS feeds I've thrown together to allow me to do just that:

latest chemistry releases
registrations of chemistry projects: latest and two months old

Note: the two-months-old RSS feed is better if you want to find some actual code or a working website when you click through.

Want to do the same for another software category? Here's the code (requires BeautifulSoup and PyRSS2Gen):


import datetime
import urllib

from BeautifulSoup import BeautifulSoup
import PyRSS2Gen

def download():
    urls = ["http://sourceforge.net/search/index.php?words=trove%3A%28384%29"
            "&sort=latest_file_date&sortdir=desc&offset=0&limit=100&"
            "type_of_search=soft&pmode=0",
            "http://sourceforge.net/search/index.php?words=trove%3A%28384%29"
            "&sort=registration_date&sortdir=desc&offset=0&limit=100&"
            "type_of_search=soft&pmode=0"]
    urllib.urlretrieve(urls[0], "releases.html")
    urllib.urlretrieve(urls[1], "registrations.html")

def converttodate(text):
    if text=="(none)":
        return None
    t = map(int, text.split("-"))

    return datetime.datetime(*t)

def makerss(filename, items, sortby, title):
    rss = PyRSS2Gen.RSS2(
        title = title,
        link = "http://baoilleach.blogspot.com/2008/05/rss-feeds-for-chemistry-projects-on.html",
        description = "baoilleach's RSS feed of "
                      "Chemistry projects on SourceForge",

        lastBuildDate = datetime.datetime.now(),

        items = [
           PyRSS2Gen.RSSItem(
             title = item["title"],
             link = item["link"],
             description = item["description"],
             guid = PyRSS2Gen.Guid("%s %s" % (item["title"], item['lastrelease'])),
             pubDate = item[sortby])
           for item in items]
           )

    rss.write_xml(open(filename, "w"))

def analyse(project):
    ans = {}
    ans['title'] = project.a.string
    ans['link'] = "http://sf.net" + project.a['href']

    data = project.parent.parent
    ans['lastrelease']  = converttodate(data('td')[5].string.strip())

    ans['registered'] = converttodate(data('td')[4].string.strip())

    data = project.parent.parent.findNextSibling()
    ans['description'] = data.td.contents[0].strip()
    if not ans['description']:
        ans['description'] = data.td.contents[2].strip()

    return ans

def processfile(filename):
    html = open(filename, "r").read()
    soup = BeautifulSoup(html)
    projects = soup.findAll(lambda tag: tag.name=="h3" and tag.a
                            and tag.a['href'].startswith("/projects/"))

    data = [analyse(project) for project in projects]
    return data

if __name__=="__main__":
    download()

    data = processfile("registrations.html")
    sometimeago = datetime.datetime.now() - datetime.timedelta(days=60)
    olddata = [d for d in data if d['registered'] <= sometimeago]
    makerss("oldregistrations.rss", olddata, "registered", "Registrations 60 days ago on SF")
    makerss("newregistrations.rss", data, "registered", "Latest registrations on SF")

    data = processfile("releases.html")
    makerss("latestreleases.rss", data, "lastrelease", "Latest releases on SF")

Noel O'Blog

Monday, 12 May 2008

RSS feeds for chemistry projects on SourceForge

2 comments: