From 64b238cf58b7d9024973668cac3f3ee118e6a248 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:04:27 +0100 Subject: [PATCH 1/9] get { return to -> --- .../utility/WebCmdlet/StreamHelper.cs | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 9314cfd1a5d..f8a525e6781 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -49,43 +49,19 @@ internal WebResponseContentMemoryStream(Stream stream, int initialCapacity, Cmdl /// /// - public override bool CanRead - { - get - { - return true; - } - } + public override bool CanRead => true; /// /// - public override bool CanSeek - { - get - { - return true; - } - } + public override bool CanSeek => true; /// /// - public override bool CanTimeout - { - get - { - return base.CanTimeout; - } - } + public override bool CanTimeout => base.CanTimeout; /// /// - public override bool CanWrite - { - get - { - return true; - } - } + public override bool CanWrite =>true; /// /// From 13c0ad9becf1adb596a6b96cea8b14a2d8e0c745 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:06:03 +0100 Subject: [PATCH 2/9] #endregion --- .../commands/utility/WebCmdlet/StreamHelper.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index f8a525e6781..66d3ae333fa 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -28,7 +28,7 @@ internal class WebResponseContentMemoryStream : MemoryStream private bool _isInitialized = false; private readonly Cmdlet _ownerCmdlet; - #endregion + #endregion Data #region Constructors /// @@ -45,7 +45,7 @@ internal WebResponseContentMemoryStream(Stream stream, int initialCapacity, Cmdl _originalStreamToProxy = stream; _ownerCmdlet = cmdlet; } - #endregion + #endregion Constructors /// /// From a81c1c8c4d126d2bcc5ef625aae33021534f8b04 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:11:47 +0100 Subject: [PATCH 3/9] add regex to detect encoding from xml declaration --- .../commands/utility/WebCmdlet/StreamHelper.cs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 66d3ae333fa..c5ced97360e 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -418,15 +418,20 @@ internal static bool TryGetEncoding(string characterSet, out Encoding encoding) return result; } - private static readonly Regex s_metaexp = new( + private static readonly Regex s_metaRegex = new( @"<]*charset\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]", RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase ); + + private static readonly Regex s_xmlRegex = new( + @"<\?xml\s.*[^.><]*encoding\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]", + RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase + ); internal static string DecodeStream(Stream stream, ref Encoding encoding) { bool isDefaultEncoding = false; - if (encoding == null) + if (encoding is null) { // Use the default encoding if one wasn't provided encoding = ContentHelper.GetDefaultEncoding(); @@ -441,11 +446,12 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) // check for a charset attribute on the meta element to override the default // we only look within the first 1k characters as the meta tag is in the head // tag which is at the start of the document - Match match = s_metaexp.Match(content.Substring(0, Math.Min(content.Length, 1024))); - if (match.Success) + Match match = s_metaRegex.Match(content.Substring(0, Math.Min(content.Length, 1024))); + Match match2 = s_xmlRegex.Match(content.Substring(0, Math.Min(content.Length, 256))); + if (match.Success || match2.Success) { Encoding localEncoding = null; - string characterSet = match.Groups["charset"].Value; + string characterSet = (string.IsNullOrEmpty(match.Groups["charset"].Value)) ? match2.Groups["encoding"].Value : match.Groups["charset"].Value; if (TryGetEncoding(characterSet, out localEncoding)) { From 6db83db2517552f08f1f687825b581c401f2d8be Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:14:00 +0100 Subject: [PATCH 4/9] remove useless do-while --- .../utility/WebCmdlet/StreamHelper.cs | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index c5ced97360e..e0b53d99fe4 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -441,27 +441,24 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) string content = StreamToString(stream, encoding); if (isDefaultEncoding) { - do + // check for a charset attribute on the meta element to override the default + // we only look within the first 1k characters as the meta tag is in the head + // tag which is at the start of the document + Match match = s_metaRegex.Match(content.Substring(0, Math.Min(content.Length, 1024))); + Match match2 = s_xmlRegex.Match(content.Substring(0, Math.Min(content.Length, 256))); + if (match.Success || match2.Success) { - // check for a charset attribute on the meta element to override the default - // we only look within the first 1k characters as the meta tag is in the head - // tag which is at the start of the document - Match match = s_metaRegex.Match(content.Substring(0, Math.Min(content.Length, 1024))); - Match match2 = s_xmlRegex.Match(content.Substring(0, Math.Min(content.Length, 256))); - if (match.Success || match2.Success) - { - Encoding localEncoding = null; - string characterSet = (string.IsNullOrEmpty(match.Groups["charset"].Value)) ? match2.Groups["encoding"].Value : match.Groups["charset"].Value; + Encoding localEncoding = null; + string characterSet = (string.IsNullOrEmpty(match.Groups["charset"].Value)) ? match2.Groups["encoding"].Value : match.Groups["charset"].Value; - if (TryGetEncoding(characterSet, out localEncoding)) - { - stream.Seek(0, SeekOrigin.Begin); - content = StreamToString(stream, localEncoding); - // report the encoding used. - encoding = localEncoding; - } + if (TryGetEncoding(characterSet, out localEncoding)) + { + stream.Seek(0, SeekOrigin.Begin); + content = StreamToString(stream, localEncoding); + // report the encoding used. + encoding = localEncoding; } - } while (false); + } } return content; From 42f4f2bc4569cc60a4daae7fa980b2c674c2b9a4 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:16:03 +0100 Subject: [PATCH 5/9] fix typo --- .../commands/utility/WebCmdlet/StreamHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index e0b53d99fe4..9883650cec7 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -61,7 +61,7 @@ internal WebResponseContentMemoryStream(Stream stream, int initialCapacity, Cmdl /// /// - public override bool CanWrite =>true; + public override bool CanWrite => true; /// /// From 282c328d8035a700ad79313704a9ce12b4fe529e Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:19:18 +0100 Subject: [PATCH 6/9] add comment --- .../commands/utility/WebCmdlet/StreamHelper.cs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 9883650cec7..7c43239bb35 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -441,11 +441,16 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) string content = StreamToString(stream, encoding); if (isDefaultEncoding) { - // check for a charset attribute on the meta element to override the default + // Check for a charset attribute on the meta element to override the default // we only look within the first 1k characters as the meta tag is in the head // tag which is at the start of the document Match match = s_metaRegex.Match(content.Substring(0, Math.Min(content.Length, 1024))); + + // Check for a encoding attribute on the xml declaration to override the default + // we only look within the first 256 characters as the declaration is in the first + // line of the document Match match2 = s_xmlRegex.Match(content.Substring(0, Math.Min(content.Length, 256))); + if (match.Success || match2.Success) { Encoding localEncoding = null; From ce00870d3287428cd415459028f1a2f48e6c12f0 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 03:45:52 +0100 Subject: [PATCH 7/9] remove useless () --- .../commands/utility/WebCmdlet/StreamHelper.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 7c43239bb35..34226dbf998 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -454,7 +454,7 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) if (match.Success || match2.Success) { Encoding localEncoding = null; - string characterSet = (string.IsNullOrEmpty(match.Groups["charset"].Value)) ? match2.Groups["encoding"].Value : match.Groups["charset"].Value; + string characterSet = string.IsNullOrEmpty(match.Groups["charset"].Value) ? match2.Groups["encoding"].Value : match.Groups["charset"].Value; if (TryGetEncoding(characterSet, out localEncoding)) { From 93c9b1266282683bc79fdbe25a4568b145436dbe Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Thu, 8 Dec 2022 20:32:42 +0100 Subject: [PATCH 8/9] reuse the same substring --- .../commands/utility/WebCmdlet/StreamHelper.cs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index 34226dbf998..c5519bb04a7 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -441,15 +441,15 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) string content = StreamToString(stream, encoding); if (isDefaultEncoding) { + // We only look within the first 1k characters as the meta element and + // the xml declaration are at the start of the document + string substring = content.Substring(0, Math.Min(content.Length, 1024)); + // Check for a charset attribute on the meta element to override the default - // we only look within the first 1k characters as the meta tag is in the head - // tag which is at the start of the document - Match match = s_metaRegex.Match(content.Substring(0, Math.Min(content.Length, 1024))); + Match match = s_metaRegex.Match(substring); // Check for a encoding attribute on the xml declaration to override the default - // we only look within the first 256 characters as the declaration is in the first - // line of the document - Match match2 = s_xmlRegex.Match(content.Substring(0, Math.Min(content.Length, 256))); + Match match2 = s_xmlRegex.Match(substring); if (match.Success || match2.Success) { @@ -460,6 +460,7 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) { stream.Seek(0, SeekOrigin.Begin); content = StreamToString(stream, localEncoding); + // report the encoding used. encoding = localEncoding; } From 41d524f0e47da39ef4a96216a89354724ad76521 Mon Sep 17 00:00:00 2001 From: CarloToso <105941898+CarloToso@users.noreply.github.com> Date: Fri, 9 Dec 2022 10:40:05 +0100 Subject: [PATCH 9/9] Updated with suggestions --- .../commands/utility/WebCmdlet/StreamHelper.cs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs index c5519bb04a7..82dfba1e8fd 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/WebCmdlet/StreamHelper.cs @@ -420,12 +420,12 @@ internal static bool TryGetEncoding(string characterSet, out Encoding encoding) private static readonly Regex s_metaRegex = new( @"<]*charset\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]", - RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase + RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.NonBacktracking ); private static readonly Regex s_xmlRegex = new( - @"<\?xml\s.*[^.><]*encoding\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]", - RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase + @"<\?xml\s.*[^.><]*encoding\s*=\s*[""'\n]?(?[A-Za-z].[^\s""'\n<>]*)[\s""'\n>]", + RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.NonBacktracking ); internal static string DecodeStream(Stream stream, ref Encoding encoding) @@ -449,19 +449,20 @@ internal static string DecodeStream(Stream stream, ref Encoding encoding) Match match = s_metaRegex.Match(substring); // Check for a encoding attribute on the xml declaration to override the default - Match match2 = s_xmlRegex.Match(substring); + if (!match.Success) + { + match = s_xmlRegex.Match(substring); + } - if (match.Success || match2.Success) + if (match.Success) { Encoding localEncoding = null; - string characterSet = string.IsNullOrEmpty(match.Groups["charset"].Value) ? match2.Groups["encoding"].Value : match.Groups["charset"].Value; + string characterSet = match.Groups["charset"].Value; if (TryGetEncoding(characterSet, out localEncoding)) { stream.Seek(0, SeekOrigin.Begin); content = StreamToString(stream, localEncoding); - - // report the encoding used. encoding = localEncoding; } }