#!/usr/bin/python
import urllib
import re
def getss(id):
urlbase = 'http://acm.pku.edu.cn/JudgeOnline/userstatus?user_id='
url = urlbase + id
html = urllib.urlopen(url)
rs = re.search(r'<td align=center width=25%><a href=status\?result=0&user_id=.*>(\d+)</a></td>', html.read())
if rs:
print id + " : " + rs.group(1)
else:
print "查无此人!"
html.close()
if __name__ == '__main__':
ids = ['test', 'linux', 'abc']
for id in ids:
getss(id)
一开始的想法是去解析html文件,看来简单的用RegExp就能解决了,但是若是服务器有一点的改变(比喻在<td align=center width=25%><a href=status\?result=0&user_id=.*>(\d+)</a></td>中加入一个空格)就会匹配失败.