2019年的第一篇博客,恩,好久没写过博客了,恩,忘了,哈哈,实在是太懒了
今天写一个爬取百度推广数据的爬虫,当然我写的肯定不是那么的完美,但是能用,大哭
注意:有的时候,get或post方法获取数据是会报ssl等错误,网站要验证啥的,没搞懂,网上搜索,都是设置 verify=False,我也懒得去详细分析,你们要是有兴趣可以去了解一下,然后这么设置了,在GET或者post是会有warning提示,编写代码:
# 禁用ssl发出的警告requests.packages.urllib3.disable_warnings()下面代码:
def main(): #开启一个session对话 main_session = requests.session() return main_sessiondef get_cost_info(main_session, endtime, flag1, flag2): #获取省或市在日期或者月份下的消费信息 if flag1 == 'province': splitDimension = "provinceName" else: splitDimension = "provinceCityName" if flag2 == 'month': unitOfTime = 3 else: unitOfTime = 5 headers = { 'Accept':'application/json', 'Accept-Encoding':'gzip, deflate', 'Accept-Language':'zh-CN,zh;q=0.8', 'Connection':'keep-alive', 'Content-Length':'763', 'Content-Type':'application/x-www-form-urlencoded', 'cookie':setting.cookie, 'DNT':'1', 'Host':'fengchao.baidu.com', 'Origin':'https://fengchao.baidu.com', 'Referer':'https://fengchao.baidu.com/fc/report/dashboard/user/%s/account'%setting.userid, 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.7 Safari/537.36', 'X-DevTools-Emulate-Network-Conditions-Client-Id':'87F3C66D-3166-46F6-8B46-141057354EBC' } params = { "userId":setting.userid, "ids":[setting.userid], "idType":2, "splitDimension":splitDimension, "limit":[0,1000], "sortRules":[], "levelOfDetails":211, "startDate":"2019-01-01", "endDate":endtime, "predicateConditions":[], "unitOfTime":unitOfTime, "columns":["userId","date","accountName","impression","click","cost","cpc","ctr","conversion","phoneConversion","bridgeConversion"] } formdata = { 'reqid': setting.reqid, 'eventId': setting.eventId, 'userid': setting.userid, 'token': setting.token, 'path': 'mars/GET/AccountReportDataService/getAccountDataCenterReportData', 'params': json.dumps(params) } #url是去F12---network获取的 url_1 = 'https://fengchao.baidu.com/hairuo/request.ajax?path=mars/GET/AccountReportDataService/getAccountDataCenterReportData&reqid=%s' % formdata['reqid'] cont_1 = main_session.post(url_1, headers=headers, data=formdata, verify=False) datas = cont_1.json() cont_list_1 = datas['data']['ACCOUNT']['rows'] for i in cont_list_1: cont_list_2 = i['subRows'] cont_list = [] for j in cont_list_2: if flag2 == 'month': time_1 = j['date'][0:7] time_list1 = time_1.split('-') date = '%s年%s月份' % (time_list1[0], str(int(time_list1[1]))) else: date = j['date'] zhanghu = j['accountName'] province, city = '', '' if flag1 == 'province': province = j['provinceName'] else: city_list = j['provinceCityName'].split('-') province, city = city_list[0], city_list[1] zhanxian = str(j['impression']) dianji = str(j['click']) xiaofei = str(j['cost']) pinjunjiage = str('%.2f' % (j['cpc'])) dianjilv = str('{:.2%}'.format(j['ctr'])) wangyezhuanhua = str(j['conversion']) dianhuazhuanhua = str(j['phoneConversion']) shangqiaozhuanhua = str(j['bridgeConversion'])cont_list.append([date, zhanghu, province, zhanxian, dianji, xiaofei, pinjunjiage, dianjilv, wangyezhuanhua, dianhuazhuanhua, shangqiaozhuanhua])