C# Regex 问题!高手请进
usingSystem;usingSystem.IO;usingSystem.Collections.Generic;usingSystem.Linq;usingSyst...
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace tokenizer
{
class Program
{
static void Main(string[] args)
{
MatchCollection mc;
int LineLength;
StreamReader MyReader ;
String MyLine;
StreamWriter MyWriter;
Regex r = new Regex("ad|an|av|a|c|dd|dh|do|dv|dw|d|e|h|i|l|k|md|mk|mm|mo|mr|m|nd|nk|nm|nr|nt|nv|nz|n|o|q|ra|rb|re|rk|ro|rs|ry|r|tt|
t|v|w|y");
if (args.Length !=2)
{
Console.WriteLine("Usage: tokenizer Inputfile Outputfile");
return;
}
try
{
MyReader = new StreamReader(args[0]);
MyWriter = new StreamWriter(args[1]);
while (!MyReader.EndOfStream)
{
MyLine = MyReader.ReadLine().Trim();
LineLength = MyLine.Length;
MyLine = MyLine.Remove(0, MyLine.IndexOf('.') + 1);
MyLine = MyLine.Replace("/", " ");
mc = r.Matches(MyLine);
for (int i = 0; i < mc.Count; i++)
MyLine = MyLine.Insert(mc[i].Index + mc[i].Value.Length + i, "*");
string[] split = MyLine.Split(new char[] { '*' });
for (int i = 0; i < split.Length; i++)
MyWriter.WriteLine(split[i]);
}
}
catch (Exception Err)
{
Console.WriteLine(Err.Message);
return;
}
MyReader.Close();
MyWriter.Close();
Console.ReadKey();
}
}
}这个代码我不怎么看懂,大家能帮我解释一下吗 ?朋友这是一个读取文件并把分词的代码,还有文本里面的数字替换掉空格,看不懂哪儿有这个部分? 展开
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace tokenizer
{
class Program
{
static void Main(string[] args)
{
MatchCollection mc;
int LineLength;
StreamReader MyReader ;
String MyLine;
StreamWriter MyWriter;
Regex r = new Regex("ad|an|av|a|c|dd|dh|do|dv|dw|d|e|h|i|l|k|md|mk|mm|mo|mr|m|nd|nk|nm|nr|nt|nv|nz|n|o|q|ra|rb|re|rk|ro|rs|ry|r|tt|
t|v|w|y");
if (args.Length !=2)
{
Console.WriteLine("Usage: tokenizer Inputfile Outputfile");
return;
}
try
{
MyReader = new StreamReader(args[0]);
MyWriter = new StreamWriter(args[1]);
while (!MyReader.EndOfStream)
{
MyLine = MyReader.ReadLine().Trim();
LineLength = MyLine.Length;
MyLine = MyLine.Remove(0, MyLine.IndexOf('.') + 1);
MyLine = MyLine.Replace("/", " ");
mc = r.Matches(MyLine);
for (int i = 0; i < mc.Count; i++)
MyLine = MyLine.Insert(mc[i].Index + mc[i].Value.Length + i, "*");
string[] split = MyLine.Split(new char[] { '*' });
for (int i = 0; i < split.Length; i++)
MyWriter.WriteLine(split[i]);
}
}
catch (Exception Err)
{
Console.WriteLine(Err.Message);
return;
}
MyReader.Close();
MyWriter.Close();
Console.ReadKey();
}
}
}这个代码我不怎么看懂,大家能帮我解释一下吗 ?朋友这是一个读取文件并把分词的代码,还有文本里面的数字替换掉空格,看不懂哪儿有这个部分? 展开
3个回答
展开全部
mc = r.Matches(MyLine);//假设MyLine是 sad1235sdttsa
for (int i = 0; i < mc.Count; i++)
MyLine = MyLine.Insert(mc[i].Index + mc[i].Value.Length + i, "*");
MyLine为sad*1235sdtt*sa
string[] split = MyLine.Split(new char[] { '*' });
split 3个string 为 sad,1235sdtt,sa
for (int i = 0; i < split.Length; i++)
MyWriter.WriteLine(split[i]);
输出sad
1235sdtt
sa
for (int i = 0; i < mc.Count; i++)
MyLine = MyLine.Insert(mc[i].Index + mc[i].Value.Length + i, "*");
MyLine为sad*1235sdtt*sa
string[] split = MyLine.Split(new char[] { '*' });
split 3个string 为 sad,1235sdtt,sa
for (int i = 0; i < split.Length; i++)
MyWriter.WriteLine(split[i]);
输出sad
1235sdtt
sa
更多追问追答
追问
这个是console 程序,我执行力以后一闪就没了,不知道怎么导入文件分词,问朋友他说,你自己写路径,可是一闪就消失怎么写呀?
这个是console 程序,我执行力以后一闪就没了,不知道怎么导入文件分词,问朋友他说,你自己写路径,可是一闪就消失怎么写呀?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询