如何用scrapy提取不在标签内的文字
展开全部
代码如下
def parse(self,response):
states = {}
list1 = []
list2 = []
for row in response.xpath("//*[@id='info']/*"):
if row.xpath("span[@class='pl']/肢数text()"历哗首):
title = row.xpath("span[@class='pl']/text()").extract()[0].strip()
text = row.xpath("a/text()").extract()[0].strip()
states[title]=text
elif row.xpath("芦谨text()"):
list1.append(row.xpath("text()").extract()[0].strip()[:-1])
for row in response.xpath("//*[@id='info']/text()").extract():
if row.strip():
list2.append(row.strip())
for i in range(len(list1)):
states[list1[i]]=list2[i]
for n in states:
print n,states[n]
def parse(self,response):
states = {}
list1 = []
list2 = []
for row in response.xpath("//*[@id='info']/*"):
if row.xpath("span[@class='pl']/肢数text()"历哗首):
title = row.xpath("span[@class='pl']/text()").extract()[0].strip()
text = row.xpath("a/text()").extract()[0].strip()
states[title]=text
elif row.xpath("芦谨text()"):
list1.append(row.xpath("text()").extract()[0].strip()[:-1])
for row in response.xpath("//*[@id='info']/text()").extract():
if row.strip():
list2.append(row.strip())
for i in range(len(list1)):
states[list1[i]]=list2[i]
for n in states:
print n,states[n]
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询