from bs4 import BeautifulSoup import re def main(): html = """ 首页

Hello, world!

Good!!!


这是一个例子程序

静夜思

床前明月光

疑似地上霜

举头望明月

低头思故乡

腾讯网 美女 凯蒂猫 美女 Hello, Goup! """ # resp = requests.get('http://sports.sohu.com/nba_a.shtml') # html = resp.content.decode('gbk') soup = BeautifulSoup(html, 'lxml') print(soup.title) # JavaScript: document.body.h1 # JavaScript: document.forms[0] print(soup.body.h1) print(soup.find_all(re.compile(r'p$'))) print(soup.find_all('img', {'src': re.compile(r'\./img/\w+.png')})) print(soup.find_all(lambda x: len(x.attrs) == 2)) print(soup.find_all('p', {'class': 'foo'})) for elem in soup.select('a[href]'): print(elem.attrs['href']) if __name__ == '__main__': main()