python HTML处理
我想提取网页中的表格数据,但不知为何只能输出至2004年的数据http://vip.stock.finance.sina.com.cn/corp/view/vFD_Fin...
我想提取网页中的表格数据,但不知为何只能输出至2004年的数据
http://vip.stock.finance.sina.com.cn/corp/view/vFD_FinancialGuideLineHistory.php?stockid=600159&typecode=financialratios59
代码:
import os
import sys
import time
import urllib.request
import bs4
from pprint import pprint
roe = open('ROE.txt', 'w')
req = "http://vip.stock.finance.sina.com.cn/corp/view/vFD_FinancialGuideLineHistory.php?stockid=600159&typecode=financialratios59"
soup = bs4.BeautifulSoup(urllib.request.urlopen(req, timeout=30).read())
buffer = soup.find(id='Table1')
rows = buffer.find_all('tr')
for row in rows:
cols = row.find_all('td')
text = str(cols)
if "-12-31" in text:
roe.write(text + '\n')
time.sleep(2)
roe.close 展开
http://vip.stock.finance.sina.com.cn/corp/view/vFD_FinancialGuideLineHistory.php?stockid=600159&typecode=financialratios59
代码:
import os
import sys
import time
import urllib.request
import bs4
from pprint import pprint
roe = open('ROE.txt', 'w')
req = "http://vip.stock.finance.sina.com.cn/corp/view/vFD_FinancialGuideLineHistory.php?stockid=600159&typecode=financialratios59"
soup = bs4.BeautifulSoup(urllib.request.urlopen(req, timeout=30).read())
buffer = soup.find(id='Table1')
rows = buffer.find_all('tr')
for row in rows:
cols = row.find_all('td')
text = str(cols)
if "-12-31" in text:
roe.write(text + '\n')
time.sleep(2)
roe.close 展开
1个回答
展开全部
[<td style="text-align:center">2014-12-31</td>, <td style="text-align:center">6.16</td>, <td style="text-align:center"><font style="color:red">隆眉</font>1.76</td>]
[<td style="text-align:center">2013-12-31</td>, <td style="text-align:center">11.47</td>, <td style="text-align:center"><font style="color:green">隆媒</font>1.72</td>]
[<td style="text-align:center">2012-12-31</td>, <td style="text-align:center">-5.5</td>, <td style="text-align:center"><font style="color:green">隆媒</font>4.31</td>]
[<td style="text-align:center">2011-12-31</td>, <td style="text-align:center">14.83</td>, <td style="text-align:center"><font style="color:green">隆媒</font>0.53</td>]
[<td style="text-align:center">2010-12-31</td>, <td style="text-align:center">1.8</td>, <td style="text-align:center"><font style="color:green">隆媒</font>0.63</td>]
[<td style="text-align:center">2009-12-31</td>, <td style="text-align:center">21.69</td>, <td style="text-align:center"><font style="color:red">隆眉</font>5.61</td>]
[<td style="text-align:center">2008-12-31</td>, <td style="text-align:center">0.19</td>, <td style="text-align:center"><font style="color:green">隆媒</font>0.03</td>]
[<td style="text-align:center">2007-12-31</td>, <td style="text-align:center">2.89</td>, <td style="text-align:center"><font style="color:red">隆眉</font>0.77</td>]
[<td style="text-align:center">2006-12-31</td>, <td style="text-align:center">7.37</td>, <td style="text-align:center"><font style="color:red">隆眉</font>2.71</td>]
[<td style="text-align:center">2005-12-31</td>, <td style="text-align:center">3.99</td>, <td style="text-align:center"><font style="color:red">隆眉</font>28.66</td>]
[<td style="text-align:center">2004-12-31</td>, <td style="text-align:center">-132.62</td>, <td style="text-align:center"><font style="color:green">隆媒</font>78.84</td>]
[<td style="text-align:center">2003-12-31</td>, <td style="text-align:center">23.79</td>, <td style="text-align:center">听</td>]
[<td style="text-align:center">2002-12-31</td>, <td style="text-align:center">-759.2139</td>, <td style="text-align:center"><font style="color:green">隆媒</font>745.9739</td>]
[<td style="text-align:center">2001-12-31</td>, <td style="text-align:center">-5.47</td>, <td style="text-align:center"><font style="color:green">隆媒</font>6.852</td>]
[<td style="text-align:center">2000-12-31</td>, <td style="text-align:center">6.82</td>, <td style="text-align:center"><font style="color:red">隆眉</font>3.95</td>]
[<td style="text-align:center">1999-12-31</td>, <td style="text-align:center">10.48</td>, <td style="text-align:center"><font style="color:red">隆眉</font>6.87</td>]
[<td style="text-align:center">1998-12-31</td>, <td style="text-align:center">12.28</td>, <td style="text-align:center"><font style="color:red">隆眉</font>7.21</td>]
[<td style="text-align:center">1997-12-31</td>, <td style="text-align:center">54</td>, <td style="text-align:center"><font style="color:green">隆媒</font>18</td>]
[<td style="text-align:center">1996-12-31</td>, <td style="text-align:center">72</td>, <td style="text-align:center"><font style="color:red">隆眉</font>7</td>]
[<td style="text-align:center">1995-12-31</td>, <td style="text-align:center">65</td>, <td style="text-align:center">听</td>]
不止2004年的数据
我修改了下你的代码用的requests的库
import os
import sys
import time
import requests
import bs4
from pprint import pprint
roe = open('ROE.txt', 'w')
req = "http://vip.stock.finance.sina.com.cn/corp/view/vFD_FinancialGuideLineHistory.php?stockid=600159&typecode=financialratios59"
soup = bs4.BeautifulSoup(requests.get(req, timeout=30).text)
buffer = soup.find(id='Table1')
rows = buffer.find_all('tr')
for row in rows:
cols = row.find_all('td')
text = str(cols)
if "-12-31" in text:
roe.write(text + '\n')
time.sleep(2)
roe.close()
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询