#! /usr/bin/python3 import re import urllib.request from xml.dom.minidom import parseString from html.entities import name2codepoint def unescape(text): def fixup(m): text = m.group(0) if text[:2] == "&#": # character reference try: if text[:3] == "&#x": return unichr(int(text[3:-1], 16)) else: return unichr(int(text[2:-1])) except ValueError: pass else: # named entity try: text = chr(name2codepoint[text[1:-1]]) except KeyError: pass return text # leave as is return re.sub("&#?\w+;", fixup, text) # Get the data f = urllib.request.urlopen('http://www.biblegateway.com/votd/get/?format=atom') atom = f.read() dom = parseString(atom) dom = dom.getElementsByTagName('entry')[0] title = dom.getElementsByTagName('title')[0].firstChild.data content = dom.getElementsByTagName('content')[0].firstChild.data content = unescape(str(content)) # Output print(title, '\n') print(content, '\n') print(':)', '\n')