Access数据库中怎么批量删除html标记
我在网上找到一个access数据库,可是里面每一个记录里都有大量的HTML标记,而且记录很多,用空白批量替换也只能替换一小部分.请问怎么能方便快捷的删除这些html标记....
我在网上找到一个access数据库,可是里面每一个记录里都有大量的HTML标记,而且记录很多,用空白批量替换也只能替换一小部分.请问怎么能方便快捷的删除这些html标记.
另外,这个数据库为什么有的内容不显示,好像隐藏了.有什么办法可以查看完整的数据记录么?
追加50分,能不能给我说具体点,比如正则怎么替换掉</B>或者更长的代码。如果真的实现。再追加一百分。 展开
另外,这个数据库为什么有的内容不显示,好像隐藏了.有什么办法可以查看完整的数据记录么?
追加50分,能不能给我说具体点,比如正则怎么替换掉</B>或者更长的代码。如果真的实现。再追加一百分。 展开
6个回答
展开全部
读取后用正则表达式全部替换
例如:
HTML相关的正则表达式2007-07-15 00:04#region 相关正则表达式
/// <summary>
/// 去掉所有html标签
/// </summary>
private static readonly Regex FilterAll = new Regex(
@"(\[([^=]*)(=[^\]]*)?\][\s\S]*?\[/\1\])|(?<lj>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");])<a\s+[^>]*>[^<]{2,}</a>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");]))|(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)|(\&\#\d+\;)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase); //(?<Link><a[\s\S]*?</a>)|
//(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)
/// <summary>
/// 找出title标签
/// </summary>
private static readonly Regex FindTitle = new Regex(
@"<\s*/?title\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出title标签内容
/// </summary>
private static readonly Regex FindTitleContent = new Regex(
@"<\s*/?title\s*>(?<Content>[\s\S]*?)<\s*/?title\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出h 和Strong标签
/// </summary>
private static readonly Regex FindHStrong = new Regex(
@"<\s*/?h\s*>|<\s*/?strong\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出p 和br标签
/// </summary>
private static readonly Regex FindPB = new Regex(
@"<\s*/?p\s*>|<\s*br\s*/?>|<\s*/?tr\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出nbsp标签
/// </summary>
private static readonly Regex FindNbsp = new Regex(
@" ",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出结尾标签
/// </summary>
private static readonly Regex FindS = new Regex(
@"(?<Content>[\s\S]*?)\$",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为标准句
/// </summary>
private static readonly Regex IsSen = new Regex(
@"[,.,。!!;;::……??《》“”""]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为垃圾句[strong][h]标签过多的
/// </summary>
private static readonly Regex IsWs = new Regex(
@"\[\(h\)\]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为垃圾句冒号和·-过多的
/// </summary>
private static readonly Regex IsWsM = new Regex(
@"\[·]|[-]|[::]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为BBS特征
/// </summary>
private static readonly Regex IsBbsInfo = new Regex(
@"第[^楼]{1,50}楼|Powered\s*/?by[\s\S]*?Dvbbs|Powered\s*/?by[\s\S]*?Discuz",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 取KEYWORD
/// </summary>
private static readonly Regex mKeyWord = new Regex(
@"<meta\s*name\s*=\s*['""]?keywords['""]?\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?\s*name\s*=\s*['""]?keywords['""]?\s*[^>]*>
",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
/// <summary>
/// 取DESCRIPTION
/// </summary>
private static readonly Regex mDescription = new Regex(
@"<meta\s*name\s*=\s*['""]?description['""]?\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?\s*name\s*=\s*['""]?description['""]?\s*[^>]*>
",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
/// <summary>
/// 取Tags
/// </summary>
private static readonly Regex mTag = new Regex(
@"<meta\s*name\s*=\s*['""]?tagwords['""]?\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?\s*name\s*=\s*['""]?tagwords['""]?\s*[^>]*>
", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为垃圾句:后字符号过少,:号前无“说”字,:号后无"关于"
/// </summary>
private static readonly Regex IsWsMM = new Regex(
@"^[^说\s]{0,8}?[::].{0,10}$",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出spider写入的url标记
/// </summary>
private static readonly Regex txtUrl = new Regex(
@"当前URL为:http://(?<URL>.*)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出spider写入的锚点描述标记
/// </summary>
private static readonly Regex txtDescription = new Regex(
@"当前链接描述为:(?<Describe>.*)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
///// <summary>
///// 取需要a标签
///// </summary>
//private static readonly Regex cleanFirst = new Regex(
// @"([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])(?<Robbish1><a\s+[^>]*>)[^<]{1,6}(?<Robbish2></a>)([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
#endregion
例如:
HTML相关的正则表达式2007-07-15 00:04#region 相关正则表达式
/// <summary>
/// 去掉所有html标签
/// </summary>
private static readonly Regex FilterAll = new Regex(
@"(\[([^=]*)(=[^\]]*)?\][\s\S]*?\[/\1\])|(?<lj>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");])<a\s+[^>]*>[^<]{2,}</a>(?=[^\u4E00-\u9FA5\uFE30-\uFFA0,."");]))|(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)|(\&\#\d+\;)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase); //(?<Link><a[\s\S]*?</a>)|
//(?<Style><style[\s\S]+?/style>)|(?<select><select[\s\S]+?/select>)|(?<Script><script[\s\S]*?/script>)|(?<Explein><\!\-\-[\s\S]*?\-\->)|(?<li><li(\s+[^>]+)?>[\s\S]*?/li>)|(?<Html></?\s*[^> ]+(\s*[^=>]+?=['""]?[^""']+?['""]?)*?[^\[<]*>)|(?<Other>&[a-zA-Z]+;)|(?<Other2>\#[a-z0-9]{6})|(?<Space>\s+)
/// <summary>
/// 找出title标签
/// </summary>
private static readonly Regex FindTitle = new Regex(
@"<\s*/?title\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出title标签内容
/// </summary>
private static readonly Regex FindTitleContent = new Regex(
@"<\s*/?title\s*>(?<Content>[\s\S]*?)<\s*/?title\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出h 和Strong标签
/// </summary>
private static readonly Regex FindHStrong = new Regex(
@"<\s*/?h\s*>|<\s*/?strong\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出p 和br标签
/// </summary>
private static readonly Regex FindPB = new Regex(
@"<\s*/?p\s*>|<\s*br\s*/?>|<\s*/?tr\s*>",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出nbsp标签
/// </summary>
private static readonly Regex FindNbsp = new Regex(
@" ",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出结尾标签
/// </summary>
private static readonly Regex FindS = new Regex(
@"(?<Content>[\s\S]*?)\$",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为标准句
/// </summary>
private static readonly Regex IsSen = new Regex(
@"[,.,。!!;;::……??《》“”""]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为垃圾句[strong][h]标签过多的
/// </summary>
private static readonly Regex IsWs = new Regex(
@"\[\(h\)\]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为垃圾句冒号和·-过多的
/// </summary>
private static readonly Regex IsWsM = new Regex(
@"\[·]|[-]|[::]",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为BBS特征
/// </summary>
private static readonly Regex IsBbsInfo = new Regex(
@"第[^楼]{1,50}楼|Powered\s*/?by[\s\S]*?Dvbbs|Powered\s*/?by[\s\S]*?Discuz",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 取KEYWORD
/// </summary>
private static readonly Regex mKeyWord = new Regex(
@"<meta\s*name\s*=\s*['""]?keywords['""]?\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<KeyWords>[^'"">]*)['""]?\s*name\s*=\s*['""]?keywords['""]?\s*[^>]*>
",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
/// <summary>
/// 取DESCRIPTION
/// </summary>
private static readonly Regex mDescription = new Regex(
@"<meta\s*name\s*=\s*['""]?description['""]?\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<description>[^'"">]*)['""]?\s*name\s*=\s*['""]?description['""]?\s*[^>]*>
",RegexOptions.ExplicitCapture| RegexOptions.Multiline| RegexOptions.IgnoreCase);
/// <summary>
/// 取Tags
/// </summary>
private static readonly Regex mTag = new Regex(
@"<meta\s*name\s*=\s*['""]?tagwords['""]?\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?[^>]*>|<meta\s*content\s*=\s*['""]?(?<tagwords>[^'"">]*)['""]?\s*name\s*=\s*['""]?tagwords['""]?\s*[^>]*>
", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
/// <summary>
/// 找出是否为垃圾句:后字符号过少,:号前无“说”字,:号后无"关于"
/// </summary>
private static readonly Regex IsWsMM = new Regex(
@"^[^说\s]{0,8}?[::].{0,10}$",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出spider写入的url标记
/// </summary>
private static readonly Regex txtUrl = new Regex(
@"当前URL为:http://(?<URL>.*)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
/// <summary>
/// 找出spider写入的锚点描述标记
/// </summary>
private static readonly Regex txtDescription = new Regex(
@"当前链接描述为:(?<Describe>.*)",
RegexOptions.ExplicitCapture
| RegexOptions.Multiline
| RegexOptions.IgnoreCase);
///// <summary>
///// 取需要a标签
///// </summary>
//private static readonly Regex cleanFirst = new Regex(
// @"([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])(?<Robbish1><a\s+[^>]*>)[^<]{1,6}(?<Robbish2></a>)([\u4E00-\u9FA5]|[\uFE30-\uFFA0]|[,."");])", RegexOptions.ExplicitCapture | RegexOptions.Multiline | RegexOptions.IgnoreCase);
#endregion
参考资料: http://hi.baidu.com/ganyu21/blog/item/75000a23d75c354c9258073a.html
展开全部
打开“我的电脑”在文件夹选项里点显示所有文件夹
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
用ASP之类语言 读出来或许可以
去掉HTML语言也可以在ASP读取时或读取后用正则表达式给过滤掉,
去掉HTML语言也可以在ASP读取时或读取后用正则表达式给过滤掉,
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
最好就是用正则来替换掉
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
展开全部
你会java?C++?C#?
写程序把数据库内容取出来,然后处理之后再插入到数据库中。
对Access不太了解。
写程序把数据库内容取出来,然后处理之后再插入到数据库中。
对Access不太了解。
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询