Python编程实现csv文件某一列的词频统计 10
展开全部
import re
import collections
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
#为避免出问题,文件名使用全路径
data = pd.read_csv('XXX.csv')
trainheadlines = []
for row in range(0, len(data.index)):
trainheadlines.append(' '.join(str(x) for x in data.iloc[row, m:n]))
#上面的m:n代表取那一列,或者那几列。
advancedvectorizer = TfidfVectorizer(
min_df=0, max_df=1, max_features=20000, ngram_range=(1, 1))
advancedtrain = advancedvectorizer.fit_transform(trainheadlines)
print(advancedtrain.shape)
import collections
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
#为避免出问题,文件名使用全路径
data = pd.read_csv('XXX.csv')
trainheadlines = []
for row in range(0, len(data.index)):
trainheadlines.append(' '.join(str(x) for x in data.iloc[row, m:n]))
#上面的m:n代表取那一列,或者那几列。
advancedvectorizer = TfidfVectorizer(
min_df=0, max_df=1, max_features=20000, ngram_range=(1, 1))
advancedtrain = advancedvectorizer.fit_transform(trainheadlines)
print(advancedtrain.shape)
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询