diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs index 8e8c099c661..8c15ac96d97 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/Common/BasicHtmlWebResponseObject.Common.cs @@ -16,17 +16,6 @@ namespace Microsoft.PowerShell.Commands /// public class BasicHtmlWebResponseObject : WebResponseObject { - #region Private Fields - - private static Regex s_attribNameValueRegex; - private static Regex s_attribsRegex; - private static Regex s_imageRegex; - private static Regex s_inputFieldRegex; - private static Regex s_linkRegex; - private static Regex s_tagRegex; - - #endregion Private Fields - #region Constructors /// @@ -43,7 +32,6 @@ public BasicHtmlWebResponseObject(HttpResponseMessage response) : this(response, /// public BasicHtmlWebResponseObject(HttpResponseMessage response, Stream contentStream) : base(response, contentStream) { - EnsureHtmlParser(); InitializeContent(); InitializeRawContent(response); } @@ -82,10 +70,8 @@ public WebCmdletElementCollection InputFields { if (_inputFields == null) { - EnsureHtmlParser(); - List parsedFields = new(); - MatchCollection fieldMatch = s_inputFieldRegex.Matches(Content); + MatchCollection fieldMatch = HtmlParser.InputFieldRegex.Matches(Content); foreach (Match field in fieldMatch) { parsedFields.Add(CreateHtmlObject(field.Value, "INPUT")); @@ -109,10 +95,8 @@ public WebCmdletElementCollection Links { if (_links == null) { - EnsureHtmlParser(); - List parsedLinks = new(); - MatchCollection linkMatch = s_linkRegex.Matches(Content); + MatchCollection linkMatch = HtmlParser.LinkRegex.Matches(Content); foreach (Match link in linkMatch) { parsedLinks.Add(CreateHtmlObject(link.Value, "A")); @@ -136,10 +120,8 @@ public WebCmdletElementCollection Images { if (_images == null) { - EnsureHtmlParser(); - List parsedImages = new(); - MatchCollection imageMatch = s_imageRegex.Matches(Content); + MatchCollection imageMatch = HtmlParser.ImageRegex.Matches(Content); foreach (Match image in imageMatch) { parsedImages.Add(CreateHtmlObject(image.Value, "IMG")); @@ -188,27 +170,6 @@ private static PSObject CreateHtmlObject(string html, string tagName) return elementObject; } - private static void EnsureHtmlParser() - { - s_tagRegex ??= new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_attribsRegex ??= new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_attribNameValueRegex ??= new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_inputFieldRegex ??= new Regex(@"]*(/?>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_linkRegex ??= new Regex(@"]*(/>|>.*?)", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - - s_imageRegex ??= new Regex(@"]*?>", - RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); - } - private void InitializeRawContent(HttpResponseMessage baseResponse) { StringBuilder raw = ContentHelper.GetRawContentHeader(baseResponse); @@ -223,16 +184,16 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) { // Extract just the opening tag of the HTML element (omitting the closing tag and any contents, // including contained HTML elements) - Match match = s_tagRegex.Match(outerHtml); + Match match = HtmlParser.TagRegex.Match(outerHtml); // Extract all the attribute specifications within the HTML element opening tag - MatchCollection attribMatches = s_attribsRegex.Matches(match.Value); + MatchCollection attribMatches = HtmlParser.AttribsRegex.Matches(match.Value); foreach (Match attribMatch in attribMatches) { // Extract the name and value for this attribute (allowing for variations like single/double/no // quotes, and no value at all) - Match nvMatches = s_attribNameValueRegex.Match(attribMatch.Value); + Match nvMatches = HtmlParser.AttribNameValueRegex.Match(attribMatch.Value); Debug.Assert(nvMatches.Groups.Count == 5); // Name is always captured by group #1 @@ -259,5 +220,21 @@ private static void ParseAttributes(string outerHtml, PSObject elementObject) } #endregion Methods + + // This class is needed so the static Regexes are initialized only the first time they are used + private static class HtmlParser + { + internal static readonly Regex AttribsRegex = new Regex(@"(?<=\s+)([^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static readonly Regex AttribNameValueRegex = new Regex(@"([^""'>/=\s\p{Cc}]+)(?:\s*=\s*(?:""(.*?)""|'(.*?)'|([^'"">\s]+)))?", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static readonly Regex ImageRegex = new Regex(@"]*?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static readonly Regex InputFieldRegex = new Regex(@"]*(/?>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static readonly Regex LinkRegex = new Regex(@"]*(/>|>.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + + internal static readonly Regex TagRegex = new Regex(@"<\w+((\s+[^""'>/=\s\p{Cc}]+(\s*=\s*(?:"".*?""|'.*?'|[^'"">\s]+))?)+\s*|\s*)/?>", RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.Compiled); + } } }