python爬虫做好了怎么用pandas保存为excle文件?
importrequestsimportrandomimporttimefrompandasimportSeries,DataFrameimportpandasaspdd...
import requestsimport randomimport timefrom pandas import Series,DataFrameimport pandas as pddef updata_cookies(url,my_hearders): s = requests.Session() s.get(my_headers['Referer'], headers=my_headers) cookies = s.cookies return cookiesdef crawler(req_url,my_headers,cookies,qs_params,form_data): req_result=requests.post(req_url,headers=my_headers,cookies=cookies,params=qs_params,data=form_data) result=req_result.json() position_infos=result['content']['positionResult']['result'] pn=form_data['pn'] data=[] for i in range(len(position_infos)): print("-------------第%s页第%s个电子工程师相关的职位的信息--------------"%(pn,i+1)) positionName=position_infos[i]['positionName'] companyShortName=position_infos[i]['companyShortName'] salary=position_infos[i]['salary'] data.append([positionName,companyShortName,salary]) df=DataFrame(data) print(df) df.to_excel('filepath.xlsx',encoding='utf-8',index=False,header=True)if __name__=="__main__": req_url='ee' my_headers={ 'User-agent':'xx', 'Referer':'sss', } qs_params = {'needAddtionalResult': 'false'}for i in range(1,6): if i==1: form_data_1={'first':True,'kd':'电子工程师','pn':1} number=random.randint(10,30) time.sleep(number) cookies=updata_cookies(my_headers['Referer'],my_headers) crawler(req_url,my_headers,cookies,qs_params,form_data_1) else: form_data_i={'first':'false','kd':'电子工程师','pn':i} number=random.randitn(10,30) cookies = updata_cookies(my_headers['Referer'], my_headers) crawler(req_url, my_headers, cookies, qs_params, form_data_i)
展开
1个回答
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询