如何用正则方法自动补上不完整的HTML标签？

给公司做了一个新闻系统。老板要求新闻摘要那里取200字左右。但为了美观，不能去掉HTML标签，结果出现的问题是，如提取的200字里有不完整的HTML标签，就整个页面都乱了。
比如下面的一段内容显示没问题了：
"aa   bb   cc........"

可是如加入了可视编辑器的自动标签后，问题就出现了，如：
" <font   color=red >aa   bbb...hhhh..lll. </font >"
如这一整段内容在200个字符内，就显示正常，如这段内容超过了200个字符，就只剩下没完成的HTML了，比如可能截取后为：
" <font   color=red >aa   bbb...hhhh"
这样就造成下面的字体出现颜色错误了。
同样的情况发生在前200个字有表格、图片\div\<b><font><Strong><a>……几乎一切的HTML标签。
我研究了下PJHOME的方法，可是看不懂。PJHOME的页面好像使用了这么久没发现什么问题。
以下是PJHOME的方法，不知道我贴对了没。
看它似乎用的是这个正则，对吗？
re.Pattern="\[""([^\r]*?)"";([^\r]*?);\(([^\r]*?)\)\]"

*******完整代码******
public function outHTML(loadType,outType,title)
    dim re, strMatchs, strMatch,i,j,id,aRight,hiddenC
    Set cacheStream = Server.CreateObject("ADODB.Stream")
    Set re=new RegExp
re.IgnoreCase =True
re.Global=True
re.Pattern="\[""([^\r]*?)"";([^\r]*?);\(([^\r]*?)\)\]"
Set strMatchs=re.Execute(cacheList)
For Each strMatch in strMatchs
    if loadType=strMatch.SubMatches(0) then
    dim aList,pageSize
    pageSize = blogPerPage
    if outType="list" then pageSize=pageSize*4
    aList=split(strMatch.SubMatches(2),",")
    hiddenC=strMatch.SubMatches(1)
    if stat_Admin Or stat_ShowHiddenCate then hiddenC=0
    if (ubound(aList)+1-hiddenC)>0 then
    %>
      <div class="pageContent" style="text-align:Right;overflow:hidden;height:18px;line-height:140%"><span style="float:left"><%=title%></span><%=MultiPage(ubound(aList)+1-hiddenC,pageSize,CurPage,Url_Add,"","float:Left")%> 预览模式: <a href="<%=Url_Add%>distype=normal" accesskey="1">普通</a> | <a href="<%=Url_Add%>distype=list" accesskey="2">列表</a></div>
    <%
    if outType="list" then response.write "<div class=""Content-body"" style=""text-align:Left""><table cellpadding=""2"" cellspacing=""2"" width=""100%"">"
    i=0
    Do Until i >= pageSize
      j = i + (CurPage-1)*pageSize
      if j<=ubound(aList) then
          id=split(aList(j),"|")(1)
  aRight=split(aList(j),"|")(0)
      LoadIntro id,aRight,outType
      i=i+1
       else
   if outType="list" then response.write "</table></div>"
    %>
<div class="pageContent"><%=MultiPage(ubound(aList)+1-hiddenC,pageSize,CurPage,Url_Add,"","float:Left")%></div>
    <%
     exit for
      end if
    Loop
    if outType="list" then response.write "</table></div>"
    %>
<div class="pageContent"><%=MultiPage(ubound(aList)+1-hiddenC,pageSize,CurPage,Url_Add,"","float:Left")%></div>
    <%
    else
     response.write "<b>抱歉，没有找到任何日志！</b>"
    end if
    set re=nothing
    exit function
    end if
Next
    set re = nothing
    Set cacheStream = nothing
  end function

31 个解决方案

#1

Mark

#2

Mark

#3

去除html再截取吧
或用正则截取第一段<p></p>

#4

要求不去掉HTML标签来实现这个功能是一个SB要求。

#5

这个需求，貌似去年偶回了一个。。。
并且还写了个双语的。。。
等我翻翻箱子。。。

#6



<script type="text/javascript">

var Left = {

	Max : 0, //控制查找最大数

	o : true, //控制是否继续查找

	n : 0, c : 0, x : 0, //n记录字数，c记录未结束标记数, x无效未结束标记数

	d : new Array, //d记录所有没有结尾的标记

	a : new Array, //a记录所有匹配出的内容

	sd : function (a) {

	//set d

		Left.d[Left.d.length] = a;

		return "\xff";

	},

	ss : function () {

	//scan string

		var s = Left.a.join(""), a = s.split("\xff"), n = new Array;

		for (var i = 0 ; i < a.length ; i ++) {

			n[n.length] = a[i];

			if (i < a.length - 1) n[n.length] = Left.d[i];

		}

		return n.join("");

	},

	ex : function (b, c) {

	//exec

		var a = b || c;

		if (c) {

			if (Left.n < Left.Max) {

				if (a != "\xff") Left.n ++;

				Left.a[Left.a.length] = a;

			}

		} else {

			if (/^<\//.test(a)) {

				if (Left.n < Left.Max) {

					Left.a[Left.a.length] = a;

				} else if (Left.x == 0 && Left.c > 0) {

					Left.a[Left.a.length] = a;

					if (Left.c == 1) Left.o = false;

				} else	Left.x --;

				Left.c --;

			} else {

				if (Left.n < Left.Max)

					Left.a[Left.a.length] = a;

				else

					Left.x ++;

				Left.c ++;

			}

		}

	},

	parse : function (s, n) {

	//select

		var p = /(<[^>]+>)|([\S\s])/g;

		Left.n = Left.c = Left.x = Left.d.length = Left.a.length = 0, Left.o = true;

		Left.Max = n;

		s = s.replace(/<[^>]+\/>/g, Left.sd);

		while(Left.o && p.exec(s))

			Left.ex(RegExp.$1, RegExp.$2);

		//alert(Left.a);

		return Left.ss();

	}

};


function show(s, n) {

	alert(s + " \n\n\n" + Left.parse(s, n));

}

show('<font color="red" size="2"><strong><img src="****" \/>String<\/strong><b><img src="****" \/>String</b></font><div><\/div>', 1);

show('<font color="red" size="2"><strong><img src="****" \/>String<\/strong><b><img src="****" \/>String</b></font><div><\/div>', 6);

show('<font color="red" size="2"><strong><img src="****" \/>String<\/strong><b><img src="****" \/>String</b></font><div><\/div>', 7);

</script>

#7




<%Class TLeft

	Private c_Max, c_o, c_n, c_c, c_x, c_s

	Private c_d, c_a, c_r


	Private Sub Class_Initialize()

	'c_Max 控制查找最大数

	'c_o 控制是否继续查找

	'c_n 记录字数

	'c_c 记录未结束标记数

	'c_x 无效未结束标记数

	'c_s 预处理的String

	'c_d 记录所有没有结尾的标记

	'c_a 记录所有匹配出的内容

	'c_r 公用正则对象

		c_Max = 0

		Set c_d = Server.CreateObject("Scripting.Dictionary")

		Set c_a = Server.CreateObject("Scripting.Dictionary")

		Set c_r = new RegExp

	End Sub


	Private Sub Class_Terminate

		c_d.RemoveAll : Set c_d = Nothing

		c_a.RemoveAll : Set c_a = Nothing

		Set c_r = Nothing

	End Sub


	Private Sub Sd()

	'set d

		Dim m, i

		c_r.Pattern = "<[^>]+\/>"

		c_r.Global = True

		Set m = c_r.Execute(c_s)

		For i = 0 To m.Count - 1

			c_d.Add i, m(i).Value

		Next

		c_s = c_r.Replace(c_s, "を")

	End Sub


	Private Function Ss()

	'scan string

		Dim a, i, s

		s = toString(c_a) : a = Split(s, "を") : s = ""

		For i = 0 To UBound(a)

			s = s & a(i)

			If i < UBound(a) - 1 Then s = s & c_d.Item(i)

		Next

		Ss = s

	End Function


	Private Function toString(o)

	'dic toString

		Dim a, i, s

		a = o.Keys

		For i = 0 To o.Count - 1

			s = s & o.Item(a(i))

		Next

		toString = s

	End Function


	Private Sub Exec(a, b, i)

		If a <> "" Then

			If c_n < c_Max Then

				If a <> "を" Then c_n = c_n + 1

				c_a.Add i, a

			End If

		Else

			If Instr(b, "</") = 1 Then

				If c_n < c_Max Then

					c_a.Add i, b

				ElseIf c_x = 0 And c_c > 0 Then

					c_a.Add i, b

					If c_c = 1 Then c_o = False

				Else

					c_x = c_x - 1

				End If

				c_c = c_c - 1

			Else

				If c_n < c_Max Then

					c_a.Add i, b

				Else

					c_x = c_x + 1

				End If

				c_c = c_c + 1

			End If

		End If

	End Sub


	Private Sub Start()

		Dim m, i

		Call Sd


		c_r.Pattern = "(<[^>]+>)|([\S\s])"

		c_r.Global = True

		Set m = c_r.Execute(c_s)

		For i = 0 To m.Count - 1

			If c_o = False Then Exit For

			Exec m(i).SubMatches(1), m(i).SubMatches(0), i

		Next


	End Sub


	Public Property Get Parse(s, n)

	'return String

		c_o = True : c_Max = n : c_n = 0 : c_c = 0 : c_x = 0 : c_s = s

		c_a.RemoveAll : c_d.RemoveAll

		Call Start

		Parse = Ss

	End Property

End Class


Dim wc, strng : strng = "<font color=""red"" size=""2""><strong><img src=""****"" />String</strong>" _

	& "<b><img src=""****"" />String</b></font><div></div>"

Set wc = new TLeft

With Response

	.Write Server.HTMLEncode(wc.Parse(strng, 1))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 6))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 7))

End With

Set wc = Nothing

%>

#8

刚看了下。貌似有个小BUG。。。就是关于无结束标记的。。。
嘿嘿。。。如果不希望出现那重的话，自行修改吧。。。。:D
飘走。。

#9

JS版的那个貌似没有那个BUG。。

#10

muxrwc，谢谢你的回复。我测试下先。我比较喜欢ASP版的。因为用JS版的不知道会不会造成搜索引擎搜索不到内容？能修改那个ASP版的吗？

#11



<%Class TLeft

	Private c_Max, c_o, c_n, c_c, c_x, c_s

	Private c_d, c_a, c_r


	Private Sub Class_Initialize()

	'c_Max 控制查找最大数

	'c_o 控制是否继续查找

	'c_n 记录字数

	'c_c 记录未结束标记数

	'c_x 无效未结束标记数

	'c_s 预处理的String

	'c_d 记录所有没有结尾的标记

	'c_a 记录所有匹配出的内容

	'c_r 公用正则对象

		c_Max = 0

		Set c_d = Server.CreateObject("Scripting.Dictionary")

		Set c_a = Server.CreateObject("Scripting.Dictionary")

		Set c_r = new RegExp

	End Sub


	Private Sub Class_Terminate

		c_d.RemoveAll : Set c_d = Nothing

		c_a.RemoveAll : Set c_a = Nothing

		Set c_r = Nothing

	End Sub


	Private Sub Sd()

	'set d

		Dim m, i

		c_r.Pattern = "<[^>]+\/>"

		c_r.Global = True

		Set m = c_r.Execute(c_s)


		For i = 0 To m.Count - 1

			c_d.Add i, m(i).Value

		Next

		c_s = c_r.Replace(c_s, "を")

	End Sub


	Private Function Ss()

	'scan string

		Dim a, i, s

		s = toString(c_a) : a = Split(s, "を") : s = ""

		For i = 0 To UBound(a)

			s = s & a(i)

			If i < UBound(a) Then s = s & c_d.Item(i)

		Next

		Ss = s

	End Function


	Private Function toString(o)

	'dic toString

		Dim a, i, s

		a = o.Keys

		For i = 0 To o.Count - 1

			s = s & o.Item(a(i))

		Next

		toString = s

	End Function


	Private Sub Exec(a, b, i)

		If a <> "" Then

			If c_n < c_Max Then

				If a <> "を" Then c_n = c_n + 1

				c_a.Add i, a

			End If

		Else

			If Instr(b, "</") = 1 Then

				If c_n < c_Max Then

					c_a.Add i, b

				ElseIf c_x = 0 And c_c > 0 Then

					c_a.Add i, b

					If c_c = 1 Then c_o = False

				Else

					c_x = c_x - 1

				End If

				c_c = c_c - 1

			Else

				If c_n < c_Max Then

					c_a.Add i, b

				Else

					c_x = c_x + 1

				End If

				c_c = c_c + 1

			End If

		End If

	End Sub


	Private Sub Start()

		Dim m, i

		Call Sd


		c_r.Pattern = "(<[^>]+>)|([\S\s])"

		c_r.Global = True

		Set m = c_r.Execute(c_s)

		For i = 0 To m.Count - 1

			If c_o = False Then Exit For

			Exec m(i).SubMatches(1), m(i).SubMatches(0), i

		Next


	End Sub


	Public Property Get Parse(s, n)

	'return String

		c_o = True : c_Max = n : c_n = 0 : c_c = 0 : c_x = 0 : c_s = s

		c_a.RemoveAll : c_d.RemoveAll

		Call Start

		Parse = Ss

	End Property

End Class


Dim wc, strng : strng = "<font color=""red"" size=""2""><strong><img src=""****"" />String</strong>" _

	& "<b><img src=""****"" />String</b></font><div></div>"

Set wc = new TLeft

With Response

	.Write Server.HTMLEncode(wc.Parse(strng, 1))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 6))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 7))

End With

Set wc = Nothing

%>

#12

谢谢。。我测试了下，发现很多不能封好，比如：
strng = "<div><font color=""red"" size=""2""><strong><img src=""****"" />String</strong>" _
& "<b><img src=""****"" />String</b></font><div></div>"
没有把DIV解决好。如碰到不完整的标签应该去掉或者补上才对。

#13

比如什么？哪有问题？

#14

学习！

#15

比如上面这个例子里，有一个DIV没闭合，输出后，还是没闭合。

#16

当然。。我这个例子是匹配里面所有的标记和内容。。
如果内容够数量了，则不继续匹配。。。
但是它还会继续匹配未结尾的标记。直到找全位置。。。

而不是
匹配标记和内容。。内容匹配完毕，自动加入为结尾标记（虽然这个方法简单，但是我用的是上面的方法）

#17

所以说。。。你的标记必须是完整的才可以用此函数操作。。。

#18

假如新闻内容有600个字，取出200字的话，那么就有可能已经是HTML标签不完整了。再用上面方法处理，就不会自己自动补充HTML标签而造成整个页面混乱了。

#19

- -，你取的时候数据不是完整的么？
干什么非要Left(数据,200)
然后在用这个函数呢？

并且这种需求，我相信不会有大批量的使用吧。所以说，效率的问题。。。应该是可以忽略的。。。

不过修改成补标记的策略也很简单。。。。
你可以自己写下。。。按照我那个例子里的处理方法重新写个就行了:D

偶闪了。。。

#20

不一定非要200个字，但新闻的摘要是不能太长吧？因为有的上千字的内容啊。。。我看PJHOME的代码好简单，可是居然看不懂。。惭愧

#21

upup

#22

upup

#23

直接调用PJHOME的代码不就行了吗?我也在用PJBLOG,很好用

#24

- -
补标记嘛。。等我周一上班，如果有空的话给你写一个好了。。。

#25

我知道PJHOME的方法了。它用FCKEDITOR的时候就截取行数的方法截取文章。所以就一定不会出错误了。
如用UBB的，就截取字符的方法。因为UBB没这个HTML的问题
靠。。解决了。真聪明的方法。

#26

# muxrwc，我还是需要那个补HTML的方法的。

#27

我还想咨询一下。。。。
你取内容的一段的话，怎么个取法？
难道不是拿所有内容出来仍给函数么？

如果补标记的话，需要考虑到嵌套啊，如果用递归加字典树的话，那效率一定不高啊。。
如果要用字符串，那处理起来相当的麻烦，效率也不能高到哪去。。。

#28

# muxrwc, 似乎我前面都解释得很清楚了哦。。。就是无论新闻有多长多短都好，我们都必须只可以截取前面的200-300个字符。然后被截取后的内容里就有可能存在不完整标签了。然后我要做的就是要做个程序将这个不完整的标签都补上。否则会导致全页面混乱。当然用普通常规的方法查询和补的效率是肯定低的，因为HTML有很多标签。这就是我要问的有没个正则的处理方法提高效率。

#29

我也遇到这种,没人办法
我的办法是另加一字段,输入简介......限文本

#30

楼主这个问题你采用模板的解决方案，采用模板的形式，输入文本是可以控制长度的，然后从预定的文件或数据库里读出相关的模板代码，再把文本信息替换进去就行了。

#31

标记匹配的，倒是用php写了一个
http://www.hua2r.com/blog/20c3dfecb972ccb0421cd0402bfa1106.html

用着也可以，不过不会改asp的。
如果哪位大哥改个asp出来的告诉我下下O.o

#1

Mark

#2

Mark

#3

去除html再截取吧
或用正则截取第一段<p></p>

#4

要求不去掉HTML标签来实现这个功能是一个SB要求。

#5

这个需求，貌似去年偶回了一个。。。
并且还写了个双语的。。。
等我翻翻箱子。。。

#6



<script type="text/javascript">

var Left = {

	Max : 0, //控制查找最大数

	o : true, //控制是否继续查找

	n : 0, c : 0, x : 0, //n记录字数，c记录未结束标记数, x无效未结束标记数

	d : new Array, //d记录所有没有结尾的标记

	a : new Array, //a记录所有匹配出的内容

	sd : function (a) {

	//set d

		Left.d[Left.d.length] = a;

		return "\xff";

	},

	ss : function () {

	//scan string

		var s = Left.a.join(""), a = s.split("\xff"), n = new Array;

		for (var i = 0 ; i < a.length ; i ++) {

			n[n.length] = a[i];

			if (i < a.length - 1) n[n.length] = Left.d[i];

		}

		return n.join("");

	},

	ex : function (b, c) {

	//exec

		var a = b || c;

		if (c) {

			if (Left.n < Left.Max) {

				if (a != "\xff") Left.n ++;

				Left.a[Left.a.length] = a;

			}

		} else {

			if (/^<\//.test(a)) {

				if (Left.n < Left.Max) {

					Left.a[Left.a.length] = a;

				} else if (Left.x == 0 && Left.c > 0) {

					Left.a[Left.a.length] = a;

					if (Left.c == 1) Left.o = false;

				} else	Left.x --;

				Left.c --;

			} else {

				if (Left.n < Left.Max)

					Left.a[Left.a.length] = a;

				else

					Left.x ++;

				Left.c ++;

			}

		}

	},

	parse : function (s, n) {

	//select

		var p = /(<[^>]+>)|([\S\s])/g;

		Left.n = Left.c = Left.x = Left.d.length = Left.a.length = 0, Left.o = true;

		Left.Max = n;

		s = s.replace(/<[^>]+\/>/g, Left.sd);

		while(Left.o && p.exec(s))

			Left.ex(RegExp.$1, RegExp.$2);

		//alert(Left.a);

		return Left.ss();

	}

};


function show(s, n) {

	alert(s + " \n\n\n" + Left.parse(s, n));

}

show('<font color="red" size="2"><strong><img src="****" \/>String<\/strong><b><img src="****" \/>String</b></font><div><\/div>', 1);

show('<font color="red" size="2"><strong><img src="****" \/>String<\/strong><b><img src="****" \/>String</b></font><div><\/div>', 6);

show('<font color="red" size="2"><strong><img src="****" \/>String<\/strong><b><img src="****" \/>String</b></font><div><\/div>', 7);

</script>

#7




<%Class TLeft

	Private c_Max, c_o, c_n, c_c, c_x, c_s

	Private c_d, c_a, c_r


	Private Sub Class_Initialize()

	'c_Max 控制查找最大数

	'c_o 控制是否继续查找

	'c_n 记录字数

	'c_c 记录未结束标记数

	'c_x 无效未结束标记数

	'c_s 预处理的String

	'c_d 记录所有没有结尾的标记

	'c_a 记录所有匹配出的内容

	'c_r 公用正则对象

		c_Max = 0

		Set c_d = Server.CreateObject("Scripting.Dictionary")

		Set c_a = Server.CreateObject("Scripting.Dictionary")

		Set c_r = new RegExp

	End Sub


	Private Sub Class_Terminate

		c_d.RemoveAll : Set c_d = Nothing

		c_a.RemoveAll : Set c_a = Nothing

		Set c_r = Nothing

	End Sub


	Private Sub Sd()

	'set d

		Dim m, i

		c_r.Pattern = "<[^>]+\/>"

		c_r.Global = True

		Set m = c_r.Execute(c_s)

		For i = 0 To m.Count - 1

			c_d.Add i, m(i).Value

		Next

		c_s = c_r.Replace(c_s, "を")

	End Sub


	Private Function Ss()

	'scan string

		Dim a, i, s

		s = toString(c_a) : a = Split(s, "を") : s = ""

		For i = 0 To UBound(a)

			s = s & a(i)

			If i < UBound(a) - 1 Then s = s & c_d.Item(i)

		Next

		Ss = s

	End Function


	Private Function toString(o)

	'dic toString

		Dim a, i, s

		a = o.Keys

		For i = 0 To o.Count - 1

			s = s & o.Item(a(i))

		Next

		toString = s

	End Function


	Private Sub Exec(a, b, i)

		If a <> "" Then

			If c_n < c_Max Then

				If a <> "を" Then c_n = c_n + 1

				c_a.Add i, a

			End If

		Else

			If Instr(b, "</") = 1 Then

				If c_n < c_Max Then

					c_a.Add i, b

				ElseIf c_x = 0 And c_c > 0 Then

					c_a.Add i, b

					If c_c = 1 Then c_o = False

				Else

					c_x = c_x - 1

				End If

				c_c = c_c - 1

			Else

				If c_n < c_Max Then

					c_a.Add i, b

				Else

					c_x = c_x + 1

				End If

				c_c = c_c + 1

			End If

		End If

	End Sub


	Private Sub Start()

		Dim m, i

		Call Sd


		c_r.Pattern = "(<[^>]+>)|([\S\s])"

		c_r.Global = True

		Set m = c_r.Execute(c_s)

		For i = 0 To m.Count - 1

			If c_o = False Then Exit For

			Exec m(i).SubMatches(1), m(i).SubMatches(0), i

		Next


	End Sub


	Public Property Get Parse(s, n)

	'return String

		c_o = True : c_Max = n : c_n = 0 : c_c = 0 : c_x = 0 : c_s = s

		c_a.RemoveAll : c_d.RemoveAll

		Call Start

		Parse = Ss

	End Property

End Class


Dim wc, strng : strng = "<font color=""red"" size=""2""><strong><img src=""****"" />String</strong>" _

	& "<b><img src=""****"" />String</b></font><div></div>"

Set wc = new TLeft

With Response

	.Write Server.HTMLEncode(wc.Parse(strng, 1))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 6))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 7))

End With

Set wc = Nothing

%>

#8

刚看了下。貌似有个小BUG。。。就是关于无结束标记的。。。
嘿嘿。。。如果不希望出现那重的话，自行修改吧。。。。:D
飘走。。

#9

JS版的那个貌似没有那个BUG。。

#10

muxrwc，谢谢你的回复。我测试下先。我比较喜欢ASP版的。因为用JS版的不知道会不会造成搜索引擎搜索不到内容？能修改那个ASP版的吗？

#11



<%Class TLeft

	Private c_Max, c_o, c_n, c_c, c_x, c_s

	Private c_d, c_a, c_r


	Private Sub Class_Initialize()

	'c_Max 控制查找最大数

	'c_o 控制是否继续查找

	'c_n 记录字数

	'c_c 记录未结束标记数

	'c_x 无效未结束标记数

	'c_s 预处理的String

	'c_d 记录所有没有结尾的标记

	'c_a 记录所有匹配出的内容

	'c_r 公用正则对象

		c_Max = 0

		Set c_d = Server.CreateObject("Scripting.Dictionary")

		Set c_a = Server.CreateObject("Scripting.Dictionary")

		Set c_r = new RegExp

	End Sub


	Private Sub Class_Terminate

		c_d.RemoveAll : Set c_d = Nothing

		c_a.RemoveAll : Set c_a = Nothing

		Set c_r = Nothing

	End Sub


	Private Sub Sd()

	'set d

		Dim m, i

		c_r.Pattern = "<[^>]+\/>"

		c_r.Global = True

		Set m = c_r.Execute(c_s)


		For i = 0 To m.Count - 1

			c_d.Add i, m(i).Value

		Next

		c_s = c_r.Replace(c_s, "を")

	End Sub


	Private Function Ss()

	'scan string

		Dim a, i, s

		s = toString(c_a) : a = Split(s, "を") : s = ""

		For i = 0 To UBound(a)

			s = s & a(i)

			If i < UBound(a) Then s = s & c_d.Item(i)

		Next

		Ss = s

	End Function


	Private Function toString(o)

	'dic toString

		Dim a, i, s

		a = o.Keys

		For i = 0 To o.Count - 1

			s = s & o.Item(a(i))

		Next

		toString = s

	End Function


	Private Sub Exec(a, b, i)

		If a <> "" Then

			If c_n < c_Max Then

				If a <> "を" Then c_n = c_n + 1

				c_a.Add i, a

			End If

		Else

			If Instr(b, "</") = 1 Then

				If c_n < c_Max Then

					c_a.Add i, b

				ElseIf c_x = 0 And c_c > 0 Then

					c_a.Add i, b

					If c_c = 1 Then c_o = False

				Else

					c_x = c_x - 1

				End If

				c_c = c_c - 1

			Else

				If c_n < c_Max Then

					c_a.Add i, b

				Else

					c_x = c_x + 1

				End If

				c_c = c_c + 1

			End If

		End If

	End Sub


	Private Sub Start()

		Dim m, i

		Call Sd


		c_r.Pattern = "(<[^>]+>)|([\S\s])"

		c_r.Global = True

		Set m = c_r.Execute(c_s)

		For i = 0 To m.Count - 1

			If c_o = False Then Exit For

			Exec m(i).SubMatches(1), m(i).SubMatches(0), i

		Next


	End Sub


	Public Property Get Parse(s, n)

	'return String

		c_o = True : c_Max = n : c_n = 0 : c_c = 0 : c_x = 0 : c_s = s

		c_a.RemoveAll : c_d.RemoveAll

		Call Start

		Parse = Ss

	End Property

End Class


Dim wc, strng : strng = "<font color=""red"" size=""2""><strong><img src=""****"" />String</strong>" _

	& "<b><img src=""****"" />String</b></font><div></div>"

Set wc = new TLeft

With Response

	.Write Server.HTMLEncode(wc.Parse(strng, 1))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 6))

	.Write "<hr />"

	.Write Server.HTMLEncode(wc.Parse(strng, 7))

End With

Set wc = Nothing

%>

#12

#13

比如什么？哪有问题？

#14

学习！

#15

比如上面这个例子里，有一个DIV没闭合，输出后，还是没闭合。

#16

#17

所以说。。。你的标记必须是完整的才可以用此函数操作。。。

#18

#19

#20

不一定非要200个字，但新闻的摘要是不能太长吧？因为有的上千字的内容啊。。。我看PJHOME的代码好简单，可是居然看不懂。。惭愧

#21

upup

#22

upup

#23

直接调用PJHOME的代码不就行了吗?我也在用PJBLOG,很好用

#24

- -
补标记嘛。。等我周一上班，如果有空的话给你写一个好了。。。

#25

#26

# muxrwc，我还是需要那个补HTML的方法的。

#27

#28

#29

我也遇到这种,没人办法
我的办法是另加一字段,输入简介......限文本

如何用正则方法自动补上不完整的HTML标签？

31 个解决方案

#1

#2

#3

#4

#5

#6

#7

#8

#9

#10

#11

#12

#13

#14

#15

#16

#17

#18

#19

#20

#21

#22

#23

#24

#25

#26

#27

#28

#29

#30

#31

#1

#2

#3

#4

#5

#6

#7

#8

#9

#10

#11

#12

#13

#14

#15

#16

#17

#18

#19

#20

#21

#22

#23

#24

#25

#26

#27

#28

#29

#30

#31

相关文章