用python+bs4爬取了手机归属地数据:
import urllib.requestfrom bs4 import BeautifulSoupdef spider1(url): headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept':'text/html;q=0.9,*/*;q=0.8' } opener = urllib.request.build_opener() opener.addheaders = [headers] source_code=opener.open(url).read() soup=BeautifulSoup(source_code,"html.parser",from_encoding="gbk") for link in soup.find_all('dd'): baseurl=r'http://guisd.com'+link.a['href']+r'all/' haoduan=link.a.text print(haoduan) source_code=opener.open(baseurl).read() soup=BeautifulSoup(source_code,"html.parser",from_encoding="gbk") for tabb in soup.find_all('tr')[1:]: for tdd in tabb.find_all('td')[0:6]: f.writelines(tdd.get_text()+',') f.writelines('\n') f=open('text.txt','w+')spider1('http://guisd.com/lb/')f.close()
最终效果如下: