用selenium+phantomjs抓取异步加载的网页内容 为什么抓不到
展开全部
这仅仅是一个post提交,为什么要搞得这么复杂
import requests, json
url = 'http://www.cninfo.com.cn/cninfo-new/disclosure/szse/fulltext'
data = 'stock=300027&searchkey=&category=&pageNum=1&pageSize=15&column=szse_gem&tabName=latest&sortName=&sortType=&limit=&seDate='
headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36'
}
r = requests.post(url, data=data, headers=headers)
result = json.loads(r.text)
files = [_[0]['adjunctUrl'] for _ in result['classifiedAnnouncements']]
for file in files:
file_url = 'http://www.cninfo.com.cn/{0}'.format(file)
file_name = file.split('/')[2]
with open(file_name, 'w') as f:
f.write(requests.get(file_url).content)
import requests, json
url = 'http://www.cninfo.com.cn/cninfo-new/disclosure/szse/fulltext'
data = 'stock=300027&searchkey=&category=&pageNum=1&pageSize=15&column=szse_gem&tabName=latest&sortName=&sortType=&limit=&seDate='
headers = {
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36'
}
r = requests.post(url, data=data, headers=headers)
result = json.loads(r.text)
files = [_[0]['adjunctUrl'] for _ in result['classifiedAnnouncements']]
for file in files:
file_url = 'http://www.cninfo.com.cn/{0}'.format(file)
file_name = file.split('/')[2]
with open(file_name, 'w') as f:
f.write(requests.get(file_url).content)
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询