匹配单层html的小demo,应该能匹配大多数html字符串.多层(嵌套)html标签解析不出来.可能有小bug,我抛砖引玉下,哈哈.
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace ResolveHtmlText { class Program { static void Main(string[] args) { string text = @" <span style='color:#1F497D'><span>y<span></span> <span style='color:#1F497D;'>1</span> <span style='color:#1F497D;background-color:#123456'>2</span><span style='color:#1F497D;background-color:#123456;text-align:center'>3</span> <span style='color:#1F497D;background-color:#123456;text-align:center;'>4</span> <span style='color:#1F497D;background-color:#123456;text-align:center;tt-l: 134;'>5</span>ggjf<a>123456</a>"; Console.WriteLine("原字符串:" + text); text = text.Replace("\"", "'"); text = text.Replace(""", "'"); text = text.Replace(" ", ""); text = text.Replace("<", "<");//将<的转义码<都替换成< text = text.Replace(">", ">");//将>的转义码>都替换成> //string matchStr = @"<\s*[a-zA-Z0-9]+\s*>[^<^>]*<\s*/\s*[a-zA-Z0-9]+\s*>"; string matchStr = @"<\s*[a-zA-Z0-9]+\s*[a-zA-Z]+\s*=\s*'\s*[a-zA-Z]" + @"+\s*:\s*[^<^>];?'\s*>[^<^>]" + @"*<\s*/\s*[a-zA-Z0-9]+\s*>|<\s*[a-zA-Z0-9]" + @"+\s*(\s*[a-zA-Z-]+\s*=\s*'(\s*[a-zA-Z-]+\s*:" + @"\s*[^:^;^<^>]+\s*;\s*)*(\s*[a-zA-Z-]+\s*:\s*" + @"[^:^;^<^>]+\s*)\s*;?\s*'\s*)*" + @"\s*>[^<^>]*<\s*/\s*[a-zA-Z0-9]+\s*>"; Regex htmlReg = new Regex(matchStr); string result = null; MatchCollection htmlMatchCollection = htmlReg.Matches(text); StringBuilder sb = new StringBuilder(); foreach (Match m in htmlMatchCollection) { if (m != null && m.Groups != null && m.Groups.Count > ) { string temp = m.Groups[].Value; Console.WriteLine("临时值:" + temp); //Regex textReg1 = new Regex(@"[^<^>]+"); //Match textMatch1 = textReg1.Match(temp); //if (textMatch1 != null && textMatch1.Groups != null && textMatch1.Groups.Count > 0) //{ // result = textMatch1.Groups[0].Value; // sb.Append(result); //} Regex textReg = new Regex(@">.+<"); Match textMatch = textReg.Match(temp); if (textMatch != null && textMatch.Groups != null && textMatch.Groups.Count > ) { result = textMatch.Groups[].Value; if (result.Length > ) { result = result.Substring(, result.Length - ); sb.Append(result); } } } } Console.WriteLine("解析出的结果:" + sb.ToString()); Console.ReadLine(); } } }
以上就是C#解析单层html的中的文本,然后拼接起来的详细内容,更多关于C#解析单层html的中的文本,然后拼接起来的资料请关注九品源码其它相关文章!