Anyway, here are some RSS feeds I've thrown together to allow me to do just that:
Note: the two-months-old RSS feed is better if you want to find some actual code or a working website when you click through.
Want to do the same for another software category? Here's the code (requires BeautifulSoup and PyRSS2Gen):
import datetime
import urllib
from BeautifulSoup import BeautifulSoup
import PyRSS2Gen
def download():
urls = ["http://sourceforge.net/search/index.php?words=trove%3A%28384%29"
"&sort=latest_file_date&sortdir=desc&offset=0&limit=100&"
"type_of_search=soft&pmode=0",
"http://sourceforge.net/search/index.php?words=trove%3A%28384%29"
"&sort=registration_date&sortdir=desc&offset=0&limit=100&"
"type_of_search=soft&pmode=0"]
urllib.urlretrieve(urls[0], "releases.html")
urllib.urlretrieve(urls[1], "registrations.html")
def converttodate(text):
if text=="(none)":
return None
t = map(int, text.split("-"))
return datetime.datetime(*t)
def makerss(filename, items, sortby, title):
rss = PyRSS2Gen.RSS2(
title = title,
link = "http://baoilleach.blogspot.com/2008/05/rss-feeds-for-chemistry-projects-on.html",
description = "baoilleach's RSS feed of "
"Chemistry projects on SourceForge",
lastBuildDate = datetime.datetime.now(),
items = [
PyRSS2Gen.RSSItem(
title = item["title"],
link = item["link"],
description = item["description"],
guid = PyRSS2Gen.Guid("%s %s" % (item["title"], item['lastrelease'])),
pubDate = item[sortby])
for item in items]
)
rss.write_xml(open(filename, "w"))
def analyse(project):
ans = {}
ans['title'] = project.a.string
ans['link'] = "http://sf.net" + project.a['href']
data = project.parent.parent
ans['lastrelease'] = converttodate(data('td')[5].string.strip())
ans['registered'] = converttodate(data('td')[4].string.strip())
data = project.parent.parent.findNextSibling()
ans['description'] = data.td.contents[0].strip()
if not ans['description']:
ans['description'] = data.td.contents[2].strip()
return ans
def processfile(filename):
html = open(filename, "r").read()
soup = BeautifulSoup(html)
projects = soup.findAll(lambda tag: tag.name=="h3" and tag.a
and tag.a['href'].startswith("/projects/"))
data = [analyse(project) for project in projects]
return data
if __name__=="__main__":
download()
data = processfile("registrations.html")
sometimeago = datetime.datetime.now() - datetime.timedelta(days=60)
olddata = [d for d in data if d['registered'] <= sometimeago]
makerss("oldregistrations.rss", olddata, "registered", "Registrations 60 days ago on SF")
makerss("newregistrations.rss", data, "registered", "Latest registrations on SF")
data = processfile("releases.html")
makerss("latestreleases.rss", data, "lastrelease", "Latest releases on SF")
This is a great idea, thanks! Now we know how you stay on top of new chemistry projects.
ReplyDeletePyRSS2Gen? I'm feeling the love. :)
ReplyDelete