CString CHtmlcodeIE::ExtractTextAImg(CString htmlcode)
{
int count=0;
CString str;
CString ret,retstr,groupstr;
CString regexstr,countstr;
CString textstr,astr,imgstr;
//提取超链接表达式
astr="(?:option .*?value=[\"|\'| ](.*?)[\"|\'].*?)?(?:a .*?href=[\"|\'| ](.*?)[\"|\'| ].*?)?";
imgstr="(?:IMG .*?src=[\"|\'](.*?)[\"|\'].*?)?"; //提取图片正则表达式
textstr=">([^>]+)<";//提取文本正则表达式
regexstr=astr+imgstr+textstr;
boost::regex express(regexstr,Para);
boost::cmatch result;
str=htmlcode;
while(boost::regex_search(str, result, express))
{
groupstr="";
for(int i=result.size()-1; i>=1; i--)
{
ret = result[i].str().c_str();
ret.TrimLeft();
ret.TrimRight();
if(i==result.size()-1)
ret=DeleteTag(ret);
if(ret!="")
groupstr=groupstr+ret+" ";
}
if(groupstr!="")
{
count++;
countstr.Format("%d",count);
retstr=retstr+countstr+" "+groupstr+"\r\n";
}
str = result[result.size()].second;
}
return retstr; }
|