Pages

Friday, October 8, 2010

Removing HTML tags from a string in C#

This following code can be useful to remove html tags from a string using C# language...


Code:

private string StripHTML(string strContent)
{
//strContent = Regex.Replace(strContent, @"[^\u0000-\u007F-\u0x0B]", "");
string asAscii = Encoding.ASCII.GetString(
Encoding.Convert(
Encoding.UTF8,
Encoding.GetEncoding(
Encoding.ASCII.EncodingName,
new EncoderReplacementFallback(string.Empty),
new DecoderExceptionFallback()
),
Encoding.UTF8.GetBytes(strContent)
)
);
return Regex.Replace(asAscii, "<.*?>", string.Empty);
}