python做爬虫的人真多,我就练练手- # Python 3.5.1
- # coding:utf-8
- # 采集搜索引擎关键字
- import urllib.request, re
-
- text = ["北京", "上海", "青岛"]
- for choice in text:
- keywards = urllib.request.quote(choice)
- url = "http://sug.so.360.cn/suggest?callback=suggest_so&encodein=utf-8&encodeout=utf-8&format=json&fields=word,obdata&word=" + keywards
- headers = {
- "GET":url,
- "Host":"sug.so.360.cn",
- "Referer":"http://www.so.com/",
- "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"
- }
- req = urllib.request.Request(url)
- for header in headers:
- req.add_header(header,headers[header])
- html = urllib.request.urlopen(req).read()
- html_decode=html.decode("utf-8")
- result = re.findall("\"([\u4e00-\u9fa5].*?)\"",html_decode)
- for item in result:
- print(item)
-
- input("Press Enter key to continue……")
复制代码
|