python笔记:用Python实现简单的爬虫 ##示例 做了一个简单的爬虫。使用python3。 涉及到代理的使用。关闭ssl验证。返回json的处理。 功能:用来查火车票。 ```python import urllib.request import json import codecs import time,datetime import ssl ssl._create_default_https_context = ssl._create_unverified_context def GetInfo(): while True: try: proxy_handler = urllib.request.ProxyHandler({'https': 'http://y003460:password@172.18.32.221:8080'}) opener = urllib.request.build_opener(proxy_handler) urllib.request.install_opener(opener) resp=urllib.request\ .urlopen('https://kyfw.12306.cn/otn/leftTicket/queryT?' 'leftTicketDTO.train_date=2016-10-01' '&leftTicketDTO.from_station=SZQ&leftTicketDTO.to_station=LDQ&' 'purpose_codes=ADULT',timeout=8) reader = codecs.getreader("utf-8") train_result = json.load(reader(resp)) # print(train_result) train_datas = train_result['data'] for item in train_datas: train_single_data = item['queryLeftNewDTO'] print(train_single_data['station_train_code'],"二等",train_single_data['ze_num']) if train_single_data['ze_num'] != "无" and train_single_data['ze_num'] != "-": return nowtime = datetime.datetime.now() print(nowtime.strftime("%Y-%m-%d %H:%M:%S-%f")) time.sleep(8) except Exception as errors: print("一个错误",errors) GetInfo() print("找到了") ``` ##技术 ###获取网页 py2 ```python proxy_handler = urllib2.ProxyHandler({}) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) # download text req = URL.format(args[1]) res_data = urllib2.urlopen(req) res = res_data.read() res = res.decode("utf-8") ``` py3 ```python proxy_handler = urllib.request.ProxyHandler({}) opener = urllib.request.build_opener(proxy_handler) urllib.request.install_opener(opener) # download text resp = urllib.request.urlopen(URL.format(args[1])) reader = codecs.getreader("utf-8") res = reader(resp).read() ``` 来自 大脸猪 写于 2016-09-25 00:32 -- 更新于2020-10-19 13:06 -- 0 条评论