diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs index 6313a9a5ddc..71049f48643 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/CSVCommands.cs @@ -10,6 +10,7 @@ using System.IO; using System.Linq; using System.Management.Automation; +using Microsoft.PowerShell; using System.Text; using Dbg = System.Management.Automation.Diagnostics; @@ -206,8 +207,7 @@ public SwitchParameter NoClobber /// Encoding optional flag /// [Parameter()] - [ValidateSetAttribute(new string[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding { get; set; } + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; /// /// Property that sets append parameter. @@ -367,7 +367,7 @@ private void CreateFileStream() PathUtils.MasterStreamOpen( this, this.Path, - Encoding ?? "ASCII", + Encoding, false, // defaultEncoding Append, Force, @@ -571,8 +571,7 @@ public SwitchParameter UseCulture /// Encoding optional flag /// [Parameter()] - [ValidateSetAttribute(new[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding { get; set; } + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; /// /// Avoid writing out duplicate warning messages when there are diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs index 26e75084930..9c7ae6314d6 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ExportAliasCommand.cs @@ -387,7 +387,7 @@ private StreamWriter OpenFile(out FileInfo readOnlyFileInfo) PathUtils.MasterStreamOpen( this, this.Path, - EncodingConversion.Unicode, + FileEncoding.Unicode, false, // defaultEncoding Append, Force, diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs index b1c65d3f2b1..9b442e69a60 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/format-hex/Format-Hex.cs @@ -46,14 +46,7 @@ public sealed class FormatHex : PSCmdlet /// Type of character encoding for InputObject /// [Parameter(ParameterSetName = "ByInputObject")] - [ValidateSetAttribute(new string[] { - EncodingConversion.Unicode, - EncodingConversion.BigEndianUnicode, - EncodingConversion.Utf8, - EncodingConversion.Utf7, - EncodingConversion.Utf32, - EncodingConversion.Ascii})] - public string Encoding { get; set; } = "Ascii"; + public FileEncoding Encoding { get; set; } = FileEncoding.UTF8NoBOM; /// /// This parameter is no-op @@ -239,7 +232,7 @@ private void ProcessObjectContent(PSObject inputObject) else if (obj is string) { string inputString = obj.ToString(); - Encoding resolvedEncoding = EncodingConversion.Convert(this, Encoding); + Encoding resolvedEncoding = EncodingUtils.GetEncoding(this, Encoding); inputBytes = resolvedEncoding.GetBytes(inputString); } diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs index b8fd1cc09b6..d61c109a588 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/FormatAndOutput/out-file/Out-File.cs @@ -8,6 +8,7 @@ using System.Management.Automation.Host; using System.IO; using Microsoft.PowerShell.Commands.Internal.Format; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands { @@ -72,25 +73,13 @@ public string LiteralPath /// /// [Parameter(Position = 1)] - [ValidateNotNullOrEmpty] - [ValidateSetAttribute(new string[] { - EncodingConversion.Unknown, - EncodingConversion.String, - EncodingConversion.Unicode, - EncodingConversion.BigEndianUnicode, - EncodingConversion.Utf8, - EncodingConversion.Utf7, - EncodingConversion.Utf32, - EncodingConversion.Ascii, - EncodingConversion.Default, - EncodingConversion.OEM })] - public string Encoding + public FileEncoding Encoding { get { return _encoding; } set { _encoding = value; } } - private string _encoding; + private FileEncoding _encoding; /// /// Property that sets append parameter. diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs index e694403c6b8..cf3f4082ba4 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/ImplicitRemotingCommands.cs @@ -77,19 +77,7 @@ public SwitchParameter Force /// Encoding optional flag /// [Parameter] - [ValidateSetAttribute(new string[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding - { - get - { - return _encoding.GetType().Name; - } - set - { - _encoding = EncodingConversion.Convert(this, value); - } - } - private Encoding _encoding = System.Text.Encoding.UTF8; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; #endregion Parameters @@ -144,7 +132,7 @@ protected override void BeginProcessing() List generatedFiles = GenerateProxyModule( tempDirectory, Path.GetFileName(directory.FullName), - _encoding, + EncodingUtils.GetEncoding(this, Encoding), _force, listOfCommandMetadata, alias2resolvedCommandName, diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs index 5cc6d26738b..cd64a8176f1 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/MatchString.cs @@ -1200,17 +1200,7 @@ public SwitchParameter AllMatches /// The text encoding to process each file as. /// [Parameter] - [ValidateNotNullOrEmpty] - [ValidateSetAttribute(new string[] { - EncodingConversion.Unicode, - EncodingConversion.Utf7, - EncodingConversion.Utf8, - EncodingConversion.Utf32, - EncodingConversion.Ascii, - EncodingConversion.BigEndianUnicode, - EncodingConversion.Default, - EncodingConversion.OEM })] - public string Encoding { get; set; } + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; private System.Text.Encoding _textEncoding; @@ -1282,9 +1272,9 @@ public SwitchParameter AllMatches protected override void BeginProcessing() { // Process encoding switch. - if (Encoding != null) + if (Encoding != FileEncoding.Unspecified ) { - _textEncoding = EncodingConversion.Convert(this, Encoding); + _textEncoding = EncodingUtils.GetEncoding(this, Encoding); } else { diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs index 13ed7e00f1a..d391780c29b 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/Send-MailMessage.cs @@ -8,6 +8,7 @@ using System.Net.Mail; using System.Diagnostics.CodeAnalysis; using System.Management.Automation; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands @@ -492,35 +493,22 @@ protected override void EndProcessing() /// /// To make it easier to specify -Encoding parameter, we add an ArgumentTransformationAttribute here. - /// When the input data is of type string and is valid to be converted to System.Text.Encoding, we do - /// the conversion and return the converted value. Otherwise, we just return the input data. + /// When the input data is of type string and is valid to be converted to System.Text.Encoding + /// via EncodingUtils.GetEncoding(), we do the conversion and return the converted value. + /// Otherwise, we just return the input data. /// internal sealed class ArgumentToEncodingNameTransformationAttribute : ArgumentTransformationAttribute { public override object Transform(EngineIntrinsics engineIntrinsics, object inputData) { - string encodingName; - if (LanguagePrimitives.TryConvertTo(inputData, out encodingName)) - { - if (string.Equals(encodingName, EncodingConversion.Unknown, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.String, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Unicode, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.BigEndianUnicode, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Utf8, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Utf7, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Utf32, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Ascii, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.Default, StringComparison.OrdinalIgnoreCase) || - string.Equals(encodingName, EncodingConversion.OEM, StringComparison.OrdinalIgnoreCase)) - { - // the encodingName is guaranteed to be valid, so it is safe to pass null to method - // Convert(Cmdlet cmdlet, string encoding) as the value of 'cmdlet'. - return EncodingConversion.Convert(null, encodingName); - } + FileEncoding encoding; + if (LanguagePrimitives.TryConvertTo(inputData, out encoding)) + { + return EncodingUtils.GetEncoding(encoding); } return inputData; } } #endregion -} \ No newline at end of file +} diff --git a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs index 2a9263060bf..46ce0561c30 100644 --- a/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs +++ b/src/Microsoft.PowerShell.Commands.Utility/commands/utility/XmlCommands.cs @@ -14,6 +14,7 @@ using System.Security; using System.Text; using System.Xml; +using Microsoft.PowerShell; using Dbg = System.Management.Automation.Diagnostics; namespace Microsoft.PowerShell.Commands @@ -108,8 +109,7 @@ public SwitchParameter NoClobber /// /// [Parameter] - [ValidateSetAttribute(new string[] { "Unicode", "UTF7", "UTF8", "ASCII", "UTF32", "BigEndianUnicode", "Default", "OEM" })] - public string Encoding { get; set; } = "Unicode"; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; #endregion Command Line Parameters diff --git a/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs b/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs index 117024cfd2b..d3e98d71b92 100644 --- a/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs +++ b/src/System.Management.Automation/commands/utility/FormatAndOutput/common/FormatXMLWriter.cs @@ -8,6 +8,7 @@ using System.Collections.Generic; using System.Globalization; using System.Xml; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands { @@ -37,7 +38,7 @@ internal static void WriteToPs1Xml(PSCmdlet cmdlet, List StreamWriter streamWriter; FileStream fileStream; FileInfo fileInfo; - PathUtils.MasterStreamOpen(cmdlet, filepath, "ascii", true, false, force, noclobber, + PathUtils.MasterStreamOpen(cmdlet, filepath, FileEncoding.Ascii, true, false, force, noclobber, out fileStream, out streamWriter, out fileInfo, isLiteralPath); try diff --git a/src/System.Management.Automation/engine/ExternalScriptInfo.cs b/src/System.Management.Automation/engine/ExternalScriptInfo.cs index 0a2897ed506..eb670e6b7bb 100644 --- a/src/System.Management.Automation/engine/ExternalScriptInfo.cs +++ b/src/System.Management.Automation/engine/ExternalScriptInfo.cs @@ -6,6 +6,7 @@ using System.Text; using System.Collections.ObjectModel; using System.Management.Automation.Runspaces; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using System.Collections.Generic; using System.Management.Automation.Language; @@ -467,7 +468,7 @@ private void ReadScriptContents() { using (FileStream readerStream = new FileStream(_path, FileMode.Open, FileAccess.Read)) { - Encoding defaultEncoding = ClrFacade.GetDefaultEncoding(); + Encoding defaultEncoding = EncodingUtils.GetDefaultEncoding(); Microsoft.Win32.SafeHandles.SafeFileHandle safeFileHandle = readerStream.SafeFileHandle; using (StreamReader scriptReader = new StreamReader(readerStream, defaultEncoding)) diff --git a/src/System.Management.Automation/engine/InitialSessionState.cs b/src/System.Management.Automation/engine/InitialSessionState.cs index 235b1dc943a..8561989746f 100644 --- a/src/System.Management.Automation/engine/InitialSessionState.cs +++ b/src/System.Management.Automation/engine/InitialSessionState.cs @@ -1,4 +1,4 @@ -/********************************************************************++ +/*********************************************************************++ Copyright (c) Microsoft Corporation. All rights reserved. --********************************************************************/ @@ -15,6 +15,7 @@ using System.Management.Automation.Language; using System.Reflection; using System.Threading; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using Debug = System.Management.Automation.Diagnostics; using System.Management.Automation.Host; @@ -4828,6 +4829,7 @@ .ForwardHelpCategory Cmdlet internal const ActionPreference defaultVerbosePreference = ActionPreference.SilentlyContinue; internal const ActionPreference defaultWarningPreference = ActionPreference.Continue; internal const ActionPreference defaultInformationPreference = ActionPreference.SilentlyContinue; + internal const Microsoft.PowerShell.FileEncoding defaultFileEncodingPreference = FileEncoding.Unspecified; internal const bool defaultWhatIfPreference = false; internal const ConfirmImpact defaultConfirmPreference = ConfirmImpact.High; @@ -4908,6 +4910,13 @@ .ForwardHelpCategory Cmdlet ScopedItemOptions.None, new ArgumentTypeConverterAttribute(typeof(ActionPreference)) ), + new SessionStateVariableEntry( + SpecialVariables.DefaultFileEncodingPreference, + defaultFileEncodingPreference, + RunspaceInit.DefaultFileEncodingDescription, + ScopedItemOptions.None, + new ArgumentTypeConverterAttribute(typeof(Microsoft.PowerShell.FileEncoding)) + ), new SessionStateVariableEntry( SpecialVariables.ErrorView, "NormalView", diff --git a/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs b/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs index 1ea15017e1d..6656e843885 100644 --- a/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs +++ b/src/System.Management.Automation/engine/Modules/NewModuleManifestCommand.cs @@ -14,6 +14,7 @@ using System.Management.Automation; using System.Management.Automation.Internal; using System.Diagnostics.CodeAnalysis; +using Microsoft.PowerShell; using Dbg = System.Management.Automation.Diagnostics; // @@ -940,12 +941,8 @@ protected override void EndProcessing() // Now open the output file... PathUtils.MasterStreamOpen( this, - filePath, -#if UNIX - new UTF8Encoding(false), // UTF-8, no BOM -#else - EncodingConversion.Unicode, // UTF-16 with BOM -#endif + filePath, + FileEncoding.Unspecified, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, diff --git a/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs b/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs index 8fb272b488b..7c87f828aca 100644 --- a/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs +++ b/src/System.Management.Automation/engine/Modules/ScriptAnalysis.cs @@ -9,6 +9,7 @@ using System.Text; using System.Management.Automation.Language; using System.Text.RegularExpressions; +using Microsoft.PowerShell; namespace System.Management.Automation { @@ -95,7 +96,7 @@ internal static string ReadScript(string path) { using (FileStream readerStream = new FileStream(path, FileMode.Open, FileAccess.Read)) { - Encoding defaultEncoding = ClrFacade.GetDefaultEncoding(); + Encoding defaultEncoding = EncodingUtils.GetDefaultEncoding(); Microsoft.Win32.SafeHandles.SafeFileHandle safeFileHandle = readerStream.SafeFileHandle; using (StreamReader scriptReader = new StreamReader(readerStream, defaultEncoding)) @@ -554,4 +555,4 @@ internal class RequiredModuleInfo internal string Name { get; set; } internal List CommandsToPostFilter { get; set; } } -} // System.Management.Automation \ No newline at end of file +} // System.Management.Automation diff --git a/src/System.Management.Automation/engine/SpecialVariables.cs b/src/System.Management.Automation/engine/SpecialVariables.cs index 835384138f6..597fd1732d9 100644 --- a/src/System.Management.Automation/engine/SpecialVariables.cs +++ b/src/System.Management.Automation/engine/SpecialVariables.cs @@ -196,6 +196,9 @@ internal static class SpecialVariables internal const string InformationPreference = "InformationPreference"; internal static readonly VariablePath InformationPreferenceVarPath = new VariablePath(InformationPreference); + internal const string DefaultFileEncodingPreference = "PSDefaultFileEncoding"; + internal static readonly VariablePath DefaultFileEncodingVarPath = new VariablePath(DefaultFileEncodingPreference); + #endregion Preference Variables internal const string ErrorView = "ErrorView"; diff --git a/src/System.Management.Automation/engine/Utils.cs b/src/System.Management.Automation/engine/Utils.cs index a5d9611202d..848a79af18f 100644 --- a/src/System.Management.Automation/engine/Utils.cs +++ b/src/System.Management.Automation/engine/Utils.cs @@ -1200,151 +1200,6 @@ internal static bool Succeeded(int hresult) return hresult >= 0; } - internal static FileSystemCmdletProviderEncoding GetEncoding(string path) - { - if (!File.Exists(path)) - { - return FileSystemCmdletProviderEncoding.Default; - } - - byte[] initialBytes = new byte[100]; - int bytesRead = 0; - - try - { - using (FileStream stream = System.IO.File.OpenRead(path)) - { - using (BinaryReader reader = new BinaryReader(stream)) - { - bytesRead = reader.Read(initialBytes, 0, 100); - } - } - } - catch (IOException) - { - return FileSystemCmdletProviderEncoding.Default; - } - - // Test for four-byte preambles - string preamble = null; - FileSystemCmdletProviderEncoding foundEncoding = FileSystemCmdletProviderEncoding.Default; - - if (bytesRead > 3) - { - preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2], initialBytes[3]); - - if (encodingMap.TryGetValue(preamble, out foundEncoding)) - { - return foundEncoding; - } - } - - // Test for three-byte preambles - if (bytesRead > 2) - { - preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2]); - if (encodingMap.TryGetValue(preamble, out foundEncoding)) - { - return foundEncoding; - } - } - - // Test for two-byte preambles - if (bytesRead > 1) - { - preamble = String.Join("-", initialBytes[0], initialBytes[1]); - if (encodingMap.TryGetValue(preamble, out foundEncoding)) - { - return foundEncoding; - } - } - - // Check for binary - string initialBytesAsAscii = System.Text.Encoding.ASCII.GetString(initialBytes, 0, bytesRead); - if (initialBytesAsAscii.IndexOfAny(nonPrintableCharacters) >= 0) - { - return FileSystemCmdletProviderEncoding.Byte; - } - - return FileSystemCmdletProviderEncoding.Ascii; - } - - internal static Encoding GetEncodingFromEnum(FileSystemCmdletProviderEncoding encoding) - { - // Default to unicode encoding - Encoding result = Encoding.Unicode; - - switch (encoding) - { - case FileSystemCmdletProviderEncoding.String: - result = Encoding.Unicode; - break; - - case FileSystemCmdletProviderEncoding.Unicode: - result = Encoding.Unicode; - break; - - case FileSystemCmdletProviderEncoding.BigEndianUnicode: - result = Encoding.BigEndianUnicode; - break; - - case FileSystemCmdletProviderEncoding.UTF8: - result = Encoding.UTF8; - break; - - case FileSystemCmdletProviderEncoding.UTF7: - result = Encoding.UTF7; - break; - - case FileSystemCmdletProviderEncoding.UTF32: - result = Encoding.UTF32; - break; - - case FileSystemCmdletProviderEncoding.BigEndianUTF32: - result = Encoding.BigEndianUnicode; - break; - - case FileSystemCmdletProviderEncoding.Ascii: - result = Encoding.ASCII; - break; - - case FileSystemCmdletProviderEncoding.Default: - result = ClrFacade.GetDefaultEncoding(); - break; - - case FileSystemCmdletProviderEncoding.Oem: - result = ClrFacade.GetOEMEncoding(); - break; - - default: - break; - } - - return result; - } // GetEncodingFromEnum - - // [System.Text.Encoding]::GetEncodings() | Where-Object { $_.GetEncoding().GetPreamble() } | - // Add-Member ScriptProperty Preamble { $this.GetEncoding().GetPreamble() -join "-" } -PassThru | - // Format-Table -Auto - internal static Dictionary encodingMap = - new Dictionary() - { - { "255-254", FileSystemCmdletProviderEncoding.Unicode }, - { "254-255", FileSystemCmdletProviderEncoding.BigEndianUnicode }, - { "255-254-0-0", FileSystemCmdletProviderEncoding.UTF32 }, - { "0-0-254-255", FileSystemCmdletProviderEncoding.BigEndianUTF32 }, - { "239-187-191", FileSystemCmdletProviderEncoding.UTF8 }, - }; - - internal static char[] nonPrintableCharacters = { - (char) 0, (char) 1, (char) 2, (char) 3, (char) 4, (char) 5, (char) 6, (char) 7, (char) 8, - (char) 11, (char) 12, (char) 14, (char) 15, (char) 16, (char) 17, (char) 18, (char) 19, (char) 20, - (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, - (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; - - internal static readonly UTF8Encoding utf8NoBom = - new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); - #if !CORECLR // TODO:CORECLR - WindowsIdentity.Impersonate() is not available. Use WindowsIdentity.RunImpersonated to replace it. /// /// Queues a CLR worker thread with impersonation of provided Windows identity. diff --git a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs index d3cf16bc0a8..f3a353b5390 100644 --- a/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs +++ b/src/System.Management.Automation/engine/hostifaces/MshHostUserInterface.cs @@ -10,6 +10,7 @@ using System.Security; using System.Globalization; using System.Management.Automation.Runspaces; +using Microsoft.PowerShell; using Microsoft.PowerShell.Commands; using System.Threading; using System.Threading.Tasks; @@ -1070,11 +1071,11 @@ internal string Path _path = value; Encoding = Encoding.UTF8; - FileSystemCmdletProviderEncoding fileEncoding = Utils.GetEncoding(value); + FileEncoding fileEncoding = EncodingUtils.GetFileEncodingFromFile(value); - if (fileEncoding != FileSystemCmdletProviderEncoding.Default) + if (fileEncoding != FileEncoding.Default) { - Encoding = Utils.GetEncodingFromEnum(fileEncoding); + Encoding = EncodingUtils.GetEncoding(fileEncoding); } } } diff --git a/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs b/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs index 065f37b379a..c80761fff9c 100644 --- a/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs +++ b/src/System.Management.Automation/engine/remoting/commands/NewPSSessionConfigurationFile.cs @@ -604,7 +604,7 @@ protected override void ProcessRecord() PathUtils.MasterStreamOpen( this, filePath, - EncodingConversion.Unicode, + FileEncoding.Unicode, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, @@ -1460,7 +1460,7 @@ protected override void ProcessRecord() PathUtils.MasterStreamOpen( this, filePath, - EncodingConversion.Unicode, + FileEncoding.Unicode, /* defaultEncoding */ false, /* Append */ false, /* Force */ false, @@ -2029,4 +2029,4 @@ internal static string CombineHashTableOrStringArray(object[] values, StreamWrit } #endregion -} \ No newline at end of file +} diff --git a/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs b/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs index aeaa379110f..b7748feb751 100644 --- a/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs +++ b/src/System.Management.Automation/engine/remoting/common/RunspaceConnectionInfo.cs @@ -19,6 +19,7 @@ using System.Runtime.InteropServices; using System.Threading; using System.Security.AccessControl; +using Microsoft.PowerShell; using Microsoft.Win32.SafeHandles; using Dbg = System.Management.Automation.Diagnostics; using WSManAuthenticationMechanism = System.Management.Automation.Remoting.Client.WSManNativeApi.WSManAuthenticationMechanism; @@ -2179,20 +2180,20 @@ internal static int StartSSHProcess( { Debug.Assert(stdinFd >= 0, "Invalid Fd"); standardInput = new StreamWriter(OpenStream(stdinFd, FileAccess.Write), - Utils.utf8NoBom, StreamBufferSize) + EncodingUtils.utf8NoBom, StreamBufferSize) { AutoFlush = true }; } if (startInfo.RedirectStandardOutput) { Debug.Assert(stdoutFd >= 0, "Invalid Fd"); standardOutput = new StreamReader(OpenStream(stdoutFd, FileAccess.Read), - startInfo.StandardOutputEncoding ?? Utils.utf8NoBom, true, StreamBufferSize); + startInfo.StandardOutputEncoding ?? EncodingUtils.utf8NoBom, true, StreamBufferSize); } if (startInfo.RedirectStandardError) { Debug.Assert(stderrFd >= 0, "Invalid Fd"); standardError = new StreamReader(OpenStream(stderrFd, FileAccess.Read), - startInfo.StandardErrorEncoding ?? Utils.utf8NoBom, true, StreamBufferSize); + startInfo.StandardErrorEncoding ?? EncodingUtils.utf8NoBom, true, StreamBufferSize); } return childPid; diff --git a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs index 999eca632bb..b688e93b9f3 100644 --- a/src/System.Management.Automation/namespaces/FileSystemContentStream.cs +++ b/src/System.Management.Automation/namespaces/FileSystemContentStream.cs @@ -8,6 +8,7 @@ using System.Diagnostics.CodeAnalysis; using System.IO; using System.Runtime.InteropServices; +using Microsoft.PowerShell; using System.Text; using System.Management.Automation; using System.Management.Automation.Internal; @@ -796,7 +797,7 @@ private bool ReadByteEncoded(bool waitChanges, ArrayList blocks, bool readBack) // the changes if (waitChanges) { - WaitForChanges(_path, _mode, _access, _share, ClrFacade.GetDefaultEncoding()); + WaitForChanges(_path, _mode, _access, _share, EncodingUtils.GetDefaultEncoding()); byteRead = _stream.ReadByte(); } } @@ -1160,8 +1161,8 @@ internal FileStreamBackReader(FileStream fileStream, Encoding encoding) _currentPosition = _stream.Position; // Get the oem encoding and system current ANSI code page - _oemEncoding = EncodingConversion.Convert(null, EncodingConversion.OEM); - _defaultAnsiEncoding = EncodingConversion.Convert(null, EncodingConversion.Default); + _oemEncoding = EncodingUtils.GetEncoding(FileEncoding.Oem); + _defaultAnsiEncoding = EncodingUtils.GetEncoding(FileEncoding.Default); } } diff --git a/src/System.Management.Automation/namespaces/FileSystemProvider.cs b/src/System.Management.Automation/namespaces/FileSystemProvider.cs index 7205120bdd1..4a5c855b889 100644 --- a/src/System.Management.Automation/namespaces/FileSystemProvider.cs +++ b/src/System.Management.Automation/namespaces/FileSystemProvider.cs @@ -23,6 +23,7 @@ using Dbg = System.Management.Automation; using System.Runtime.InteropServices; using System.Management.Automation.Runspaces; +using Microsoft.PowerShell; namespace Microsoft.PowerShell.Commands { @@ -6557,7 +6558,7 @@ public IContentReader GetContentReader(string path) // Defaults for the file read operation string delimiter = "\n"; - Encoding encoding = ClrFacade.GetDefaultEncoding(); + Encoding encoding = EncodingUtils.GetDefaultEncoding(); bool waitForChanges = false; bool streamTypeSpecified = false; @@ -6592,7 +6593,7 @@ public IContentReader GetContentReader(string path) if (streamTypeSpecified) { - encoding = dynParams.EncodingType; + encoding = EncodingUtils.GetProviderEncoding(this, dynParams.Encoding); } // Get the wait value @@ -6720,7 +6721,8 @@ public IContentWriter GetContentWriter(string path) // If this is true, then the content will be read as bytes bool usingByteEncoding = false; bool streamTypeSpecified = false; - Encoding encoding = ClrFacade.GetDefaultEncoding(); + // we need to discover the encoding + Encoding encoding = EncodingUtils.GetProviderEncoding(this, FileEncoding.Unspecified); FileMode filemode = FileMode.OpenOrCreate; string streamName = null; bool suppressNewline = false; @@ -6739,7 +6741,7 @@ public IContentWriter GetContentWriter(string path) if (streamTypeSpecified) { - encoding = dynParams.EncodingType; + encoding = EncodingUtils.GetProviderEncoding(this, dynParams.Encoding); } streamName = dynParams.Stream; @@ -7440,73 +7442,6 @@ public static Hashtable Invoke(System.Management.Automation.PowerShell ps, FileS } } - /// - /// Defines the values that can be supplied as the encoding parameter in the - /// FileSystemContentDynamicParametersBase class. - /// - public enum FileSystemCmdletProviderEncoding - { - /// - /// No encoding. - /// - Unknown, - - /// - /// Unicode encoding. - /// - String, - - /// - /// Unicode encoding. - /// - Unicode, - - /// - /// Byte encoding. - /// - Byte, - - /// - /// Big Endian Unicode encoding. - /// - BigEndianUnicode, - - /// - /// UTF8 encoding. - /// - UTF8, - - /// - /// UTF7 encoding. - /// - UTF7, - - /// - /// UTF32 encoding. - /// - UTF32, - - /// - /// ASCII encoding. - /// - Ascii, - - /// - /// Default encoding. - /// - Default, - - /// - /// OEM encoding. - /// - Oem, - - /// - /// Big Endian UTF32 encoding. - /// - BigEndianUTF32, - } // FileSystemCmdletProviderEncoding - #endregion #region Dynamic Parameters @@ -7610,7 +7545,7 @@ public class FileSystemContentDynamicParametersBase /// reading data from the file. /// [Parameter] - public FileSystemCmdletProviderEncoding Encoding { get; set; } = FileSystemCmdletProviderEncoding.String; + public FileEncoding Encoding { get; set; } = FileEncoding.Unspecified; /// /// A parameter to return a stream of an item. @@ -7618,18 +7553,6 @@ public class FileSystemContentDynamicParametersBase [Parameter] public String Stream { get; set; } - - /// - /// Gets the encoding from the specified StreamType parameter. - /// - public Encoding EncodingType - { - get - { - return Utils.GetEncodingFromEnum(Encoding); - } - } // EncodingType - /// /// Gets the Byte Encoding status of the StreamType parameter. Returns true /// if the stream was opened with "Byte" encoding, false otherwise. @@ -7638,7 +7561,7 @@ public bool UsingByteEncoding { get { - return Encoding == FileSystemCmdletProviderEncoding.Byte; + return Encoding == FileEncoding.Byte; } // get } // UsingByteEncoding @@ -7650,7 +7573,7 @@ public bool WasStreamTypeSpecified { get { - return (Encoding != FileSystemCmdletProviderEncoding.String); + return (Encoding != FileEncoding.String); } // get } // WasStreamTypeSpecified diff --git a/src/System.Management.Automation/resources/RunspaceInit.resx b/src/System.Management.Automation/resources/RunspaceInit.resx index d036c4897c0..0b5242e6f5a 100644 --- a/src/System.Management.Automation/resources/RunspaceInit.resx +++ b/src/System.Management.Automation/resources/RunspaceInit.resx @@ -174,6 +174,9 @@ Dictates the action taken when a command generates an item in the Information stream + + Dictates the encoding used when creating a file + Dictates the view mode to use when displaying errors diff --git a/src/System.Management.Automation/utils/ClrFacade.cs b/src/System.Management.Automation/utils/ClrFacade.cs index 69058bfa373..f8b15ad2c50 100644 --- a/src/System.Management.Automation/utils/ClrFacade.cs +++ b/src/System.Management.Automation/utils/ClrFacade.cs @@ -20,6 +20,7 @@ using System.Security; using Microsoft.Win32.SafeHandles; using System.Runtime.InteropServices.ComTypes; +using Microsoft.PowerShell; namespace System.Management.Automation { @@ -113,7 +114,7 @@ internal static Encoding GetDefaultEncoding() EncodingRegisterProvider(); uint currentAnsiCp = NativeMethods.GetACP(); - s_defaultEncoding = Encoding.GetEncoding((int)currentAnsiCp); + s_defaultEncoding = EncodingUtils.GetDefaultEncoding(); #endif } return s_defaultEncoding; @@ -126,15 +127,20 @@ internal static Encoding GetDefaultEncoding() /// internal static Encoding GetOEMEncoding() { + // The OEM code pages are sometimes used by Win32 console applications, and + // on non-Windows platforms they still may have uses (if installed) and + // could be used if desired. + // On non-windows platforms, they have more limited uses, and probably won't + // be installed. if (s_oemEncoding == null) { #if UNIX // PowerShell Core on Unix s_oemEncoding = GetDefaultEncoding(); -#else // PowerShell Core on Windows +#else // PowerShell Core on Windows, which needs provider registration EncodingRegisterProvider(); uint oemCp = NativeMethods.GetOEMCP(); - s_oemEncoding = Encoding.GetEncoding((int)oemCp); + s_oemEncoding = EncodingUtils.GetDefaultEncoding(); #endif } return s_oemEncoding; @@ -269,6 +275,7 @@ private static SecurityZone ReadFromZoneIdentifierDataStream(string filePath) FileAccess.Read, FileShare.Read); // If we successfully get the zone data stream, try to read the ZoneId information + // use the method in this class not EncodingUtils. using (StreamReader zoneDataReader = new StreamReader(zoneDataSteam, GetDefaultEncoding())) { string line = null; diff --git a/src/System.Management.Automation/utils/Encoding.cs b/src/System.Management.Automation/utils/Encoding.cs new file mode 100644 index 00000000000..df29f12b34a --- /dev/null +++ b/src/System.Management.Automation/utils/Encoding.cs @@ -0,0 +1,402 @@ +/********************************************************************++ +Copyright (c) Microsoft Corporation. All rights reserved. +--********************************************************************/ + +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; +using System.Management.Automation; +using System.Management.Automation.Provider; + +namespace Microsoft.PowerShell +{ + /// + /// The list of available file encodings + /// + public enum FileEncoding + { + /// + /// No encoding, or unset. + /// + Unspecified, + + /// + /// Unicode encoding. + /// + String, + + /// + /// Unicode encoding. + /// + Unicode, + + /// + /// Byte encoding. + /// + Byte, + + /// + /// Big Endian Unicode encoding. + /// + BigEndianUnicode, + + /// + /// Backward compatibility - UTF8 encoding without BOM + /// + UTF8, + + /// + /// UTF8 encoding which includes BOM. + /// + UTF8BOM, + + /// + /// UTF8 encoding without BOM. + /// + UTF8NoBOM, + + /// + /// UTF7 encoding. + /// + UTF7, + + /// + /// UTF32 encoding. + /// + UTF32, + + /// + /// ASCII encoding. + /// + Ascii, + + /// + /// Default encoding. + /// + Default, + + /// + /// OEM encoding. + /// + Oem, + + /// + /// Big Endian UTF32 encoding + /// + BigEndianUTF32, + + /// + /// Windows legacy encoding. This requires a cmdlet object to resolve. + /// + WindowsLegacy, + } + + /// + /// the helper class for determining encodings for PowerShell + /// + public static class EncodingUtils + { + + /// + /// Return the default PowerShell encoding which is UTF8 without a BOM. + /// There is no distinction between platforms + /// + public static Encoding GetDefaultEncoding() + { + return utf8NoBom; + } + + /// + /// translate a FileEncoding to an actual System.Text.Encoding + /// The enum value + /// System.Text.Encoding + /// + public static Encoding GetEncoding(FileEncoding textEncoding) + { + System.Text.Encoding result; + switch ( textEncoding ) + { + case FileEncoding.String: + result = Encoding.Unicode; + break; + + case FileEncoding.Unicode: + result = Encoding.Unicode; + break; + + case FileEncoding.BigEndianUnicode: + result = Encoding.BigEndianUnicode; + break; + + case FileEncoding.UTF8BOM: + result = Encoding.UTF8; // The default UTF8 encoder includes the BOM + break; + + case FileEncoding.Byte: + result = Encoding.Unicode; + break; + + case FileEncoding.UTF8: + case FileEncoding.UTF8NoBOM: + result = utf8NoBom; + break; + + case FileEncoding.UTF7: + result = Encoding.UTF7; + break; + + case FileEncoding.UTF32: + result = Encoding.UTF32; + break; + + case FileEncoding.BigEndianUTF32: + // This can possibly throw, but if so, we can't provide + // the encoding which the user requested, so we should fail + result = Encoding.GetEncoding("utf-32BE"); + break; + + case FileEncoding.Ascii: + result = Encoding.ASCII; + break; + + case FileEncoding.Default: + result = GetDefaultEncoding(); + break; + + case FileEncoding.Oem: + result = ClrFacade.GetOEMEncoding(); + break; + + default: + result = GetDefaultEncoding(); + break; + } + + return result; + } + + /// + /// Retrieve the encoding based on the Cmdlet and the Encoding + /// The cmdlet of interest + /// The Encoding parameter value + /// System.Text.Encoding + /// + public static Encoding GetEncoding(Cmdlet cmdlet, FileEncoding encoding) + { + Encoding resolvedEncoding = GetDefaultEncoding(); + FileEncoding encodingPreference = FileEncoding.Unspecified; + bool preferenceSetAndValid = false; + + // An encoding has been specified as a parameter (or the explicit parameter value is "Unknown") + if ( encoding != FileEncoding.Unspecified ) + { + // If the encoding has been set to WindowsLegacy, we need to look up the actual encoding + if ( encoding == FileEncoding.WindowsLegacy ) + { + resolvedEncoding = GetWindowsLegacyEncoding(cmdlet); + } + else + { + resolvedEncoding = GetEncoding(encoding); + } + } + else + { + // if we have a cmdlet and the parameter is not specifically set, + // so check the preference variable + if ( cmdlet != null ) + { + encodingPreference = GetEncodingPreference(cmdlet.Context.SessionState); + } + // If set to unknown, we accept that it is unset + preferenceSetAndValid = encodingPreference != FileEncoding.Unspecified; + // If the encoding preference has been set to WindowsLegacy, we need to look up the actual encoding + if ( encodingPreference == FileEncoding.WindowsLegacy ) + { + resolvedEncoding = GetWindowsLegacyEncoding(cmdlet); + } + else if ( encodingPreference != FileEncoding.Unspecified ) + { + resolvedEncoding = GetEncoding(encodingPreference); + } + // the final else would be set the encoding to GetDefaultEncoding() which was handled above + } + + return resolvedEncoding; + } + + /// + /// Given a path to a file, attempt to retrieve the encoding + /// The path to a file to inspect for an encoding + /// System.Text.Encoding + /// + internal static FileEncoding GetFileEncodingFromFile(string path) + { + if (!File.Exists(path)) + { + return FileEncoding.Default; + } + + byte[] initialBytes = new byte[100]; + int bytesRead = 0; + + try + { + using (FileStream stream = System.IO.File.OpenRead(path)) + { + using (BinaryReader reader = new BinaryReader(stream)) + { + bytesRead = reader.Read(initialBytes, 0, initialBytes.Length); + } + } + } + catch (IOException) + { + return FileEncoding.Default; + } + + // Test for four-byte preambles + string preamble = null; + FileEncoding foundEncoding; + + if (bytesRead > 3) + { + preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2], initialBytes[3]); + + if (encodingMap.TryGetValue(preamble, out foundEncoding)) + { + return foundEncoding; + } + } + + // Test for three-byte preambles + if (bytesRead > 2) + { + preamble = String.Join("-", initialBytes[0], initialBytes[1], initialBytes[2]); + if (encodingMap.TryGetValue(preamble, out foundEncoding)) + { + return foundEncoding; + } + } + + // Test for two-byte preambles + if (bytesRead > 1) + { + preamble = String.Join("-", initialBytes[0], initialBytes[1]); + if (encodingMap.TryGetValue(preamble, out foundEncoding)) + { + return foundEncoding; + } + } + + // Check for binary + string initialBytesAsAscii = System.Text.Encoding.ASCII.GetString(initialBytes, 0, bytesRead); + if (initialBytesAsAscii.IndexOfAny(nonPrintableCharacters) >= 0) + { + return FileEncoding.Byte; + } + + // we couldn't determine anything from direct examination, + // return UTF8 without a BOM which should be good for both Windows and Non-Windows + return FileEncoding.UTF8NoBOM; + } + + /// + /// Retrieve the PSDefaultFileEncoding preference value if set + /// SessionState to use to retrieve the preference variable if set + /// + public static FileEncoding GetEncodingPreference(SessionState sessionState) + { + FileEncoding encodingPreference = FileEncoding.Unspecified; + try + { + // It doesn't matter if this fails or throws, we will return unknown in that case + object tmp = sessionState.PSVariable.GetValue("PSDefaultFileEncoding"); + LanguagePrimitives.TryConvertTo(tmp, out encodingPreference); + } + catch + { + ; + } + return encodingPreference; + } + + /// + /// Retrieve the encoding in a provider context + /// + public static Encoding GetProviderEncoding(CmdletProvider provider, FileEncoding encoding) + { + Encoding resolvedEncoding = GetDefaultEncoding(); + FileEncoding encodingPreference = GetEncodingPreference(provider.SessionState); + // If the encoding isn't set, but is available as $PSDefaultFileEncoding, use that + // It the encoding is set use that, otherwise return the default encoding + if ( encoding == FileEncoding.Unspecified && encodingPreference != FileEncoding.Unspecified ) + { + resolvedEncoding = GetEncoding(encodingPreference); + } + else if ( encoding != FileEncoding.Unspecified ) + { + resolvedEncoding = GetEncoding(encoding); + } + return resolvedEncoding; + } + + // This is the way the encoding is implemented in PowerShell 5 and earlier. + // If the user sets the default encoding to WindowsLegacy, we will + // be able to encode for that + internal static Dictionary legacyEncodingMap = + new Dictionary(StringComparer.OrdinalIgnoreCase) + { + { "microsoft.powershell.commands.addcontentcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.exportclixmlcommand", Encoding.Unicode }, + { "microsoft.powershell.commands.exportcsvcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.exportpssessioncommand", Encoding.UTF8 }, // with BOM + { "microsoft.powershell.commands.formathex", Encoding.ASCII }, + { "microsoft.powershell.commands.newmodulemanifestcommand", Encoding.Unicode }, + { "microsoft.powershell.commands.getcontentcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.importcsvcommand", Encoding.ASCII }, + { "microsoft.powershell.commands.outfilecommand", Encoding.Unicode }, // This includes redirection + { "microsoft.powershell.commands.setcontentcommand", Encoding.ASCII }, + // Providers are handled here + { "microsoft.powershell.commands.filesystemprovider", Encoding.ASCII }, + }; + + /// Get the Windows legacy encoding from our encoding map + internal static Encoding GetWindowsLegacyEncoding(Cmdlet cmdlet) + { + Encoding encoding = Encoding.Default; + if ( cmdlet != null ) + { + legacyEncodingMap.TryGetValue(cmdlet.GetType().FullName, out encoding); + } + return encoding; + } + + + // [System.Text.Encoding]::GetEncodings() | ? { $_.GetEncoding().GetPreamble() } | + // Add-Member ScriptProperty Preamble { $this.GetEncoding().GetPreamble() -join "-" } -PassThru | + // Format-Table -Auto + internal static Dictionary encodingMap = + new Dictionary() + { + { "255-254", FileEncoding.Unicode }, + { "254-255", FileEncoding.BigEndianUnicode }, + { "255-254-0-0", FileEncoding.UTF32 }, + { "0-0-254-255", FileEncoding.BigEndianUTF32 }, + { "239-187-191", FileEncoding.UTF8BOM }, + }; + + internal static char[] nonPrintableCharacters = { + (char) 0, (char) 1, (char) 2, (char) 3, (char) 4, (char) 5, (char) 6, (char) 7, (char) 8, + (char) 11, (char) 12, (char) 14, (char) 15, (char) 16, (char) 17, (char) 18, (char) 19, (char) 20, + (char) 21, (char) 22, (char) 23, (char) 24, (char) 25, (char) 26, (char) 28, (char) 29, (char) 30, + (char) 31, (char) 127, (char) 129, (char) 141, (char) 143, (char) 144, (char) 157 }; + + internal static readonly UTF8Encoding utf8NoBom = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); + + } + +} + diff --git a/src/System.Management.Automation/utils/PathUtils.cs b/src/System.Management.Automation/utils/PathUtils.cs index 632938e40e7..8b90b83ec83 100644 --- a/src/System.Management.Automation/utils/PathUtils.cs +++ b/src/System.Management.Automation/utils/PathUtils.cs @@ -6,6 +6,7 @@ using System.Globalization; using System.IO; using System.Text; +using Microsoft.PowerShell; using System.Management.Automation.Internal; using Dbg = System.Management.Automation.Diagnostics; @@ -17,6 +18,7 @@ namespace System.Management.Automation /// internal static class PathUtils { + /// /// THE method for opening a file for writing. /// Should be used by all cmdlets that write to a file. @@ -35,7 +37,7 @@ internal static class PathUtils internal static void MasterStreamOpen( PSCmdlet cmdlet, string filePath, - string encoding, + FileEncoding encoding, bool defaultEncoding, bool Append, bool Force, @@ -46,7 +48,7 @@ internal static void MasterStreamOpen( bool isLiteralPath ) { - Encoding resolvedEncoding = EncodingConversion.Convert(cmdlet, encoding); + Encoding resolvedEncoding = EncodingUtils.GetEncoding(cmdlet, encoding); MasterStreamOpen(cmdlet, filePath, resolvedEncoding, defaultEncoding, Append, Force, NoClobber, out fileStream, out streamWriter, out readOnlyFileInfo, isLiteralPath); } @@ -57,7 +59,7 @@ bool isLiteralPath /// /// cmdlet that is opening the file (used mainly for error reporting) /// path to the file (as specified on the command line - this method will resolve the path) - /// encoding (this method will convert the command line string to an Encoding instance) + /// the encoding (this method will convert the FileEncoding value to an Encoding instance) /// if true, then we will use default .NET encoding instead of the encoding specified in parameter /// /// @@ -188,17 +190,10 @@ internal static void ReportFileOpenFailure(Cmdlet cmdlet, string filePath, Excep cmdlet.ThrowTerminatingError(errorRecord); } - internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, string encoding, bool isLiteralPath) + internal static StreamReader OpenStreamReader(PSCmdlet command, string filePath, FileEncoding encoding, bool isLiteralPath) { FileStream fileStream = OpenFileStream(filePath, command, isLiteralPath); - if (encoding == null) - { - return new StreamReader(fileStream); - } - else - { - return new StreamReader(fileStream, EncodingConversion.Convert(command, encoding)); - } + return new StreamReader(fileStream, EncodingUtils.GetEncoding(command, encoding)); } internal static FileStream OpenFileStream(string filePath, PSCmdlet command, bool isLiteralPath) @@ -437,83 +432,4 @@ internal static DirectoryInfo CreateTemporaryDirectory() } } - internal static class EncodingConversion - { - internal const string Unknown = "unknown"; - internal const string String = "string"; - internal const string Unicode = "unicode"; - internal const string BigEndianUnicode = "bigendianunicode"; - internal const string Ascii = "ascii"; - internal const string Utf8 = "utf8"; - internal const string Utf7 = "utf7"; - internal const string Utf32 = "utf32"; - internal const string Default = "default"; - internal const string OEM = "oem"; - - /// - /// retrieve the encoding parameter from the command line - /// it throws if the encoding does not match the known ones - /// - /// a System.Text.Encoding object (null if no encoding specified) - internal static Encoding Convert(Cmdlet cmdlet, string encoding) - { - if (string.IsNullOrEmpty(encoding)) - { - // no parameter passed, default to Unicode (OS preferred) - return System.Text.Encoding.Unicode; - } - - // Default to unicode (this matches Get-Content) - if (string.Equals(encoding, Unknown, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.Unicode; - - if (string.Equals(encoding, String, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.Unicode; - - // these are the encodings the CLR supports - if (string.Equals(encoding, Unicode, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.Unicode; - - if (string.Equals(encoding, BigEndianUnicode, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.BigEndianUnicode; - - if (string.Equals(encoding, Utf8, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.UTF8; - - if (string.Equals(encoding, Ascii, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.ASCII; - - if (string.Equals(encoding, Utf7, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.UTF7; - - if (string.Equals(encoding, Utf32, StringComparison.OrdinalIgnoreCase)) - return System.Text.Encoding.UTF32; - - if (string.Equals(encoding, Default, StringComparison.OrdinalIgnoreCase)) - return ClrFacade.GetDefaultEncoding(); - - if (string.Equals(encoding, OEM, StringComparison.OrdinalIgnoreCase)) - { - return ClrFacade.GetOEMEncoding(); - } - - // error condition: unknown encoding value - string validEncodingValues = string.Join( - ", ", - new string[] { Unknown, String, Unicode, BigEndianUnicode, Ascii, Utf8, Utf7, Utf32, Default, OEM }); - string msg = StringUtil.Format(PathUtilsStrings.OutFile_WriteToFileEncodingUnknown, - encoding, validEncodingValues); - - ErrorRecord errorRecord = new ErrorRecord( - PSTraceSource.NewArgumentException("Encoding"), - "WriteToFileEncodingUnknown", - ErrorCategory.InvalidArgument, - null); - - errorRecord.ErrorDetails = new ErrorDetails(msg); - cmdlet.ThrowTerminatingError(errorRecord); - - return null; - } - } } diff --git a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 index 20b35867651..284e88d381c 100644 --- a/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 +++ b/test/powershell/Language/Parser/RedirectionOperator.Tests.ps1 @@ -29,22 +29,21 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { $psdefaultParameterValues.Remove("out-file:encoding") } - It "If encoding is unset, redirection should be Unicode" { + It "If encoding is unset, redirection should be platform appropriate" { $asciiString > TESTDRIVE:\file.txt + $encoder = [Microsoft.PowerShell.EncodingUtils]::GetEncoding("utf8nobom") $bytes = get-content -encoding byte TESTDRIVE:\file.txt # create the expected - $BOM = [text.encoding]::unicode.GetPreamble() - $TXT = [text.encoding]::unicode.GetBytes($asciiString) - $CR = [text.encoding]::unicode.GetBytes($asciiCR) + $BOM = $encoder.GetPreamble() + $TXT = $encoder.GetBytes($asciiString) + $CR = $encoder.GetBytes($asciiCR) $expectedBytes = .{ $BOM; $TXT; $CR } $bytes.Count | should be $expectedBytes.count - for($i = 0; $i -lt $bytes.count; $i++) { - $bytes[$i] | Should be $expectedBytes[$i] - } + $bytes -join "-" | should be ($expectedBytes -join "-") } - # $availableEncodings = "unknown","string","unicode","bigendianunicode","utf8","utf7", "utf32","ascii","default","oem" - $availableEncodings = (get-command out-file).Parameters["Encoding"].Attributes.ValidValues + # WindowsLegacy encoding tests will be done elsewhere + $availableEncodings = [enum]::GetNames([Microsoft.PowerShell.FileEncoding])|?{@("default","WindowsLegacy") -notcontains $_ } foreach($encoding in $availableEncodings) { $skipTest = $false @@ -60,7 +59,7 @@ Describe "Redirection operator now supports encoding changes" -Tags "CI" { # and out-file has its own translation, so we'll # not do that logic here, but simply ignore those encodings # as they eventually are translated to "real" encoding - $enc = [system.text.encoding]::$encoding + $enc = [Microsoft.PowerShell.EncodingUtils]::GetEncoding($encoding) if ( $enc ) { $msg = "Overriding encoding for out-file is respected for $encoding" diff --git a/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 b/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 index 8f48d529620..202b048b1e9 100644 --- a/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 +++ b/test/powershell/Modules/Microsoft.PowerShell.Core/TestGetCommand.Tests.ps1 @@ -211,7 +211,7 @@ $paramName = "Encoding" $results = get-command -verb get -noun content -Encoding Unicode VerifyDynamicParametersExist -cmdlet $results[0] -parameterNames $paramName - VerifyParameterType -cmdlet $results[0] -parameterName $paramName -parameterType Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding + VerifyParameterType -cmdlet $results[0] -parameterName $paramName -parameterType Microsoft.PowerShell.FileEncoding } It "Verify Single Cmdlet Using Verb&Noun ParameterSet With Usage" { @@ -261,7 +261,7 @@ $paramName = "Encoding" $results = Get-Command -verb get -noun content -encoding UTF8 VerifyDynamicParametersExist -cmdlet $results[0] -parameterNames $paramName - VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding + VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.FileEncoding } #unsupported parameter: -synop @@ -269,6 +269,7 @@ $paramName = "Encoding" $results = get-command -verb get -noun content -encoding UTF8 -synop VerifyDynamicParametersExist -cmdlet $results[0] -parameterNames $paramName - VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.Commands.FileSystemCmdletProviderEncoding + VerifyParameterType -cmdlet $results[0] -parameterName $paramName -ParameterType Microsoft.PowerShell.FileEncoding } -} \ No newline at end of file +} + diff --git a/test/powershell/engine/Encoding.Tests.ps1 b/test/powershell/engine/Encoding.Tests.ps1 new file mode 100644 index 00000000000..ffebae9a42d --- /dev/null +++ b/test/powershell/engine/Encoding.Tests.ps1 @@ -0,0 +1,219 @@ +Describe "Encoding classes and methods are available" -Tag CI { + BeforeAll { + $testString = "t" + ([char]233) + "st" + $provider = get-item $TESTDRIVE + $testFile = "${TESTDRIVE}/file.txt" + $preamble = @{ + Ascii = '' + BigEndianUTF32 = '254-255' + BigEndianUnicode = '254-255' + Byte = '255-254' + Default = '' + Oem = '' + String = '255-254' + UTF32 = '255-254-0-0' + UTF7 = '' + UTF8 = '' + UTF8BOM = '239-187-191' + UTF8NoBOM = '' + Unicode = '255-254' + Unspecified = '' + WindowsLegacy = '' + } + + function Get-FileBytes + { + param ( $file, [int]$count = [int]::MaxValue ) + (Get-Content $file -Encoding byte | Select-Object -First $count) -Join "-" + } + + function Get-NewLineBytes + { + param ( [Microsoft.PowerShell.FileEncoding]$encoding ) + $encoder = [Microsoft.PowerShell.EncodingUtils]::GetEncoding($encoding) + $encoder.GetBytes([Environment]::NewLine) -Join "-" + } + + $preambleTests = + @{ Encoding = 'Ascii'; Preamble = '' }, + @{ Encoding = 'BigEndianUTF32'; Preamble = '0-0-254-255' }, + @{ Encoding = 'BigEndianUnicode'; Preamble = '254-255' }, + @{ Encoding = 'Byte'; Preamble = '255-254' }, + @{ Encoding = 'Default'; Preamble = '' }, + @{ Encoding = 'Oem'; Preamble = '' }, + @{ Encoding = 'String'; Preamble = '255-254' }, + @{ Encoding = 'UTF32'; Preamble = '255-254-0-0' }, + @{ Encoding = 'UTF7'; Preamble = '' }, + @{ Encoding = 'UTF8'; Preamble = '' }, + @{ Encoding = 'UTF8BOM'; Preamble = '239-187-191' }, + @{ Encoding = 'UTF8NoBOM'; Preamble = '' }, + @{ Encoding = 'Unicode'; Preamble = '255-254' }, + @{ Encoding = 'Unspecified'; Preamble = '' }, + @{ Encoding = 'WindowsLegacy'; Preamble = '' } + + $testStringEncodedBytes = @{ + Ascii = "116-63-115-116-" + (Get-NewLineBytes Ascii) + BigEndianUTF32 = "0-0-254-255-0-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-" + (Get-NewLineBytes BigEndianUTF32) + BigEndianUnicode = "254-255-0-116-0-233-0-115-0-116-" + (Get-NewLineBytes BigEndianUnicode) + Byte = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Byte) + Default = "116-195-169-115-116-" + (Get-NewLineBytes Default) + # Oem encoding can change depending on system, calculate the expected string + Oem = ([Microsoft.PowerShell.EncodingUtils]::GetEncoding("Oem").GetBytes($testString) -join "-") + "-" + (Get-NewLineBytes Oem) + String = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes String) + UTF32 = "255-254-0-0-116-0-0-0-233-0-0-0-115-0-0-0-116-0-0-0-" + (Get-NewLineBytes UTF32) + UTF7 = "116-43-65-79-107-45-115-116-" + (Get-NewLineBytes UTF7) + UTF8 = "116-195-169-115-116-" + (Get-NewLineBytes UTF8 ) + UTF8BOM = "239-187-191-116-195-169-115-116-" + (Get-NewLineBytes UTF8BOM) + UTF8NoBOM = "116-195-169-115-116-" + (Get-NewLineBytes UTF8NoBOM) + Unicode = "255-254-116-0-233-0-115-0-116-0-" + (Get-NewLineBytes Unicode) + Unspecified = "116-195-169-115-116-" + (Get-NewLineBytes Unspecified) + } + + $contentTests = + @{ Encoding = 'Ascii'; Bytes = $testStringEncodedBytes['Ascii'] }, + @{ Encoding = 'BigEndianUTF32'; Bytes = $testStringEncodedBytes['BigEndianUTF32'] }, + @{ Encoding = 'BigEndianUnicode'; Bytes = $testStringEncodedBytes['BigEndianUnicode'] }, + @{ Encoding = 'Byte'; Bytes = $testStringEncodedBytes['Byte'] }, + @{ Encoding = 'Default'; Bytes = $testStringEncodedBytes['Default'] }, + # Oem encoding can change depending on system, calculate the expected string + @{ Encoding = 'Oem'; Bytes = $testStringEncodedBytes['Oem'] }, + @{ Encoding = 'String'; Bytes = $testStringEncodedBytes['String'] }, + @{ Encoding = 'UTF32'; Bytes = $testStringEncodedBytes['UTF32'] }, + @{ Encoding = 'UTF7'; Bytes = $testStringEncodedBytes['UTF7'] }, + @{ Encoding = 'UTF8'; Bytes = $testStringEncodedBytes['UTF8'] }, + @{ Encoding = 'UTF8BOM'; Bytes = $testStringEncodedBytes['UTF8BOM'] }, + @{ Encoding = 'UTF8NoBOM'; Bytes = $testStringEncodedBytes['UTF8NoBOM'] }, + @{ Encoding = 'Unicode'; Bytes = $testStringEncodedBytes['Unicode'] }, + @{ Encoding = 'Unspecified'; Bytes = $testStringEncodedBytes['Unspecified'] } + + } + + AfterEach { + if ( Test-Path $testFile ) + { + remove-item $testFile + } + $PSDefaultFileEncoding = "Unspecified" + } + + It "Encoding for '' should have correct preamble ''" -TestCase $preambleTests { + param ( $Encoding, $Preamble ) + [Microsoft.PowerShell.EncodingUtils]::GetEncoding($Encoding).GetPreamble() -Join "-" | Should be $Preamble + } + + It "Encoding for '' should create file with proper encoding" -TestCase $contentTests { + param ( $Encoding, $Bytes ) + $testString | out-file -encoding $Encoding $testFile + Get-FileBytes $testFile | should be $Bytes + } + + It "Setting PSDefaultFileEncoding to '' should create file with proper encoding" -TestCase $contentTests { + param ( $Encoding, $Bytes ) + $PSDefaultFileEncoding = $Encoding + $testString | out-file $testFile + Get-FileBytes $testFile | should be $Bytes + } + + It "Explicit encoding is not overridden by setting PSDefaultFileEncoding to ''" -TestCase $contentTests { + param ( $Encoding, $Bytes ) + $PSDefaultFileEncoding = $Encoding + $testString | out-file -encoding ascii $testFile + Get-FileBytes $testFile | should be $testStringEncodedBytes['Ascii'] + } + + It "Explicit encoding set to unknown and preference variable set to unicode creates unicode file" { + $PSDefaultFileEncoding = "Unicode" + $testString | set-content -encoding unspecified $testfile + Get-FileBytes $testFile | should be $testStringEncodedBytes['Unicode'] + } + + It "When session state is null, GetEncodingPreference returns unspecified" { + [Microsoft.PowerShell.EncodingUtils]::GetEncodingPreference($null) | should be "unspecified" + } + + Context "GetFileEncodingFromFile tests" { + BeforeAll { + $TestCases = @{ Encoding = "Unicode"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF8NoBOM"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF32"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "BigEndianUTF32"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF8Bom"; Text = $testString; FilePath = $testFile }, + @{ Encoding = "UTF8NoBom"; Text = ""; FilePath = $testFile }, + @{ Encoding = "Default"; Text = ""; FilePath = "$TESTDRIVE/ThisFileCouldNotPossiblyExist" } + } + + It "GetFileEncodingFromFile can discover a encoded file" -TestCase $TestCases { + param ( $Encoding, $Text, $FilePath ) + # I need a way to not open the right file to test the missing file scenario + $Text | set-content -encoding $Encoding $testFile + get-content $testFile | should be $Text + } + } + + Context "Legacy Windows Behavior" { + + It "Add-Content creates utf8 encoded files" { + $testString | add-content -encoding WindowsLegacy $TESTDRIVE/file.txt + Get-FileBytes $TESTDRIVE/file.txt | should be $testStringEncodedBytes['UTF8'] + } + + It "Set-Content creates utf8 encoded files" { + $testString | set-content -encoding WindowsLegacy $TESTDRIVE/file.txt + Get-FileBytes $TESTDRIVE/file.txt | should be $testStringEncodedBytes['UTF8'] + } + + It "Export-CliXml creates unicode encoded files" { + [pscustomobject]@{ text = $testString } | export-clixml -encoding WindowsLegacy $TESTDRIVE/file.clixml + # these are the characters + # $TESTDRIVE/file.txt + } + finally { + $PSDefaultFileEncoding = "Unspecified" + } + # we are using the first 10 bytes to convince us that we created the proper encoding + # this doesn't include the new line + Get-FileBytes $TESTDRIVE/file.txt -count 10 | should match "255-254-116-0-233-0-115-0-116-0" + } + } +} + + diff --git a/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 b/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 index 2baae5dfac5..6ba187eaff6 100644 --- a/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 +++ b/test/powershell/engine/Module/NewModuleManifest.Tests.ps1 @@ -9,9 +9,10 @@ Describe "New-ModuleManifest tests" -tags "CI" { } BeforeAll { - if ($IsWindows) + # encoding is the same on all platforms, except for new lines + if ( $IsWindows ) { - $ExpectedManifestBytes = @(255,254,35,0,13,0,10,0) + $ExpectedManifestBytes = @(35,13,10) } else {