#!/usr/bin/python # http://www.pythonchallenge.com/pc/def/ocr.html import urllib import re import string def get_challenge(): text_src=urllib.urlopen('http://www.pythonchallenge.com/pc/def/ocr.html').read() text=re.compile('',re.S).findall(text_src)[-1] return text text=get_challenge() counts={} for c in text: counts[c]=counts.get(c,0)+1 # {'\n': 1221, '!': 6079, '#': 6115, '%': 6104, '$': 6046, '&': 6043, ')': 6186, '(': 6154, '+': 6066, '*': 6034, '@': 6157, '[': 6108, ']': 6152, '_': 6112, '^': 6030, 'a': 1, 'e': 1, 'i': 1, 'l': 1, 'q': 1, 'u': 1, 't': 1, 'y': 1, '{': 6046, '}': 6105} # Method I avg=len(text)/len(counts) print ''.join([c for c in text if c!='\n' and counts[c]