一道java题,求程序。。
分析https://github.com/zhutao2015/intern-test/blob/master/test2/src.txt文章中最高权重的10个单词并输出...
分析https://github.com/zhutao2015/intern-test/blob/master/test2/src.txt文章中最高权重的10个单词并输出
说明:
1. 单词总权重=出现次数*单词权重
2. 如下的单词权重为0.2, [am, is, are, was, been, has, have, had, a, an, the, in, at, on, to, or]
3. 2以外的单词权重为1 展开
说明:
1. 单词总权重=出现次数*单词权重
2. 如下的单词权重为0.2, [am, is, are, was, been, has, have, had, a, an, the, in, at, on, to, or]
3. 2以外的单词权重为1 展开
2个回答
展开全部
写了好久啊 望采纳
package com.ytf.ext.aa;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.security.cert.CertificateException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
public class A {
private static Set<String> set = new HashSet<String>();
private static String url = "https://github.com/zhutao2015/intern-test/blob/master/test2/src.txt";
static{
set.add("am");
set.add("or");
set.add("to");
set.add("on");
set.add("at");
set.add("in");
set.add("the");
set.add("an");
set.add("a");
set.add("had");
set.add("have");
set.add("has");
set.add("been");
set.add("was");
set.add("are");
set.add("is");
}
public static void main(String[] args) {
String conHtml = testHtml2(url);
String context = getContext(conHtml);
String[] chars = {",",".","\"","(",")"};
String[] words = context.split(" ");
Map<String, Double> map = new HashMap<String, Double>();
for(String word : words){
// for(String charStr :chars){
// if(word.contains(charStr)){
// word = word.replaceAll(charStr, "");
// }
// }
// if(word.contains(" ")){
word = word.replaceAll(" ", "");
// }
System.out.println("1"+word+"1");
if(map.containsKey(word)){
if(set.contains(word)){
map.put(word, map.get(word)+0.2);
}else {
map.put(word, map.get(word)+1);
}
}else {
if(set.contains(word)){
map.put(word, 0.2);
}else {
map.put(word, 1.0);
}
}
}
Set<String> keys = map.keySet();
for(String key:keys){
System.out.print(key+":"+map.get(key)+" ");
}
}
public static String getContext(String conHtml) {
conHtml = conHtml.substring(
conHtml.indexOf("data-tab-size=\"8\">") + 18, conHtml
.lastIndexOf("</table"));
String regEx_html = "<[^>]+>";
Pattern p_html = null;
p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
Matcher m_html = p_html.matcher(conHtml);
conHtml = m_html.replaceAll(""); // 过滤html标签
return conHtml;
}
/**
* 发送HTTPS请求,获得网页内容
*
* @param urlstr
* @return
* @throws IOException
*/
public static String testHtml2(String urlstr) {
// System.out.println("############");
String sCurrentLine;
String sTotalString;
sCurrentLine = "";
sTotalString = "";
String hsUrl = urlstr;
URL url;
try {
url = new URL(hsUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
X509TrustManager xtm = new X509TrustManager() {
public void checkClientTrusted(
java.security.cert.X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
// TODO Auto-generated method stub
return null;
}
};
TrustManager[] tm = { xtm };
SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, tm, null);
con.setSSLSocketFactory(ctx.getSocketFactory());
con.setHostnameVerifier(new HostnameVerifier() {
public boolean verify(String arg0, SSLSession arg1) {
return true;
}
});
InputStream l_urlStream = con.getInputStream();
;
BufferedReader l_reader = new BufferedReader(new InputStreamReader(
l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString += sCurrentLine + "\r\n";
}
} catch (Exception e) {
e.printStackTrace();
}
return sTotalString.trim();
}
}
package com.ytf.ext.aa;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.security.cert.CertificateException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
public class A {
private static Set<String> set = new HashSet<String>();
private static String url = "https://github.com/zhutao2015/intern-test/blob/master/test2/src.txt";
static{
set.add("am");
set.add("or");
set.add("to");
set.add("on");
set.add("at");
set.add("in");
set.add("the");
set.add("an");
set.add("a");
set.add("had");
set.add("have");
set.add("has");
set.add("been");
set.add("was");
set.add("are");
set.add("is");
}
public static void main(String[] args) {
String conHtml = testHtml2(url);
String context = getContext(conHtml);
String[] chars = {",",".","\"","(",")"};
String[] words = context.split(" ");
Map<String, Double> map = new HashMap<String, Double>();
for(String word : words){
// for(String charStr :chars){
// if(word.contains(charStr)){
// word = word.replaceAll(charStr, "");
// }
// }
// if(word.contains(" ")){
word = word.replaceAll(" ", "");
// }
System.out.println("1"+word+"1");
if(map.containsKey(word)){
if(set.contains(word)){
map.put(word, map.get(word)+0.2);
}else {
map.put(word, map.get(word)+1);
}
}else {
if(set.contains(word)){
map.put(word, 0.2);
}else {
map.put(word, 1.0);
}
}
}
Set<String> keys = map.keySet();
for(String key:keys){
System.out.print(key+":"+map.get(key)+" ");
}
}
public static String getContext(String conHtml) {
conHtml = conHtml.substring(
conHtml.indexOf("data-tab-size=\"8\">") + 18, conHtml
.lastIndexOf("</table"));
String regEx_html = "<[^>]+>";
Pattern p_html = null;
p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
Matcher m_html = p_html.matcher(conHtml);
conHtml = m_html.replaceAll(""); // 过滤html标签
return conHtml;
}
/**
* 发送HTTPS请求,获得网页内容
*
* @param urlstr
* @return
* @throws IOException
*/
public static String testHtml2(String urlstr) {
// System.out.println("############");
String sCurrentLine;
String sTotalString;
sCurrentLine = "";
sTotalString = "";
String hsUrl = urlstr;
URL url;
try {
url = new URL(hsUrl);
HttpsURLConnection con = (HttpsURLConnection) url.openConnection();
X509TrustManager xtm = new X509TrustManager() {
public void checkClientTrusted(
java.security.cert.X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
public void checkServerTrusted(
java.security.cert.X509Certificate[] arg0, String arg1)
throws CertificateException {
// TODO Auto-generated method stub
}
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
// TODO Auto-generated method stub
return null;
}
};
TrustManager[] tm = { xtm };
SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, tm, null);
con.setSSLSocketFactory(ctx.getSocketFactory());
con.setHostnameVerifier(new HostnameVerifier() {
public boolean verify(String arg0, SSLSession arg1) {
return true;
}
});
InputStream l_urlStream = con.getInputStream();
;
BufferedReader l_reader = new BufferedReader(new InputStreamReader(
l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString += sCurrentLine + "\r\n";
}
} catch (Exception e) {
e.printStackTrace();
}
return sTotalString.trim();
}
}
本回答被提问者采纳
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询
广告 您可能关注的内容 |