From f9d9626e6c145836eac5dafe7bb54e7590d382c6 Mon Sep 17 00:00:00 2001 From: Joel <32407840+vexx32@users.noreply.github.com> Date: Tue, 18 Sep 2018 13:04:20 -0400 Subject: [PATCH 01/12] Add CharTraits to suffix characters for additional numeric types s -> short u -> unsigned --- .../engine/parser/CharTraits.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/System.Management.Automation/engine/parser/CharTraits.cs b/src/System.Management.Automation/engine/parser/CharTraits.cs index a686e3e4a5b..420b6296ed2 100644 --- a/src/System.Management.Automation/engine/parser/CharTraits.cs +++ b/src/System.Management.Automation/engine/parser/CharTraits.cs @@ -155,9 +155,9 @@ static CharExtensions() /* P */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, /* Q */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* R */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, -/* S */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, +/* S */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* T */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, -/* U */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, +/* U */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* V */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* W */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* X */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, @@ -187,9 +187,9 @@ static CharExtensions() /* p */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, /* q */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* r */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, -/* s */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, +/* s */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* t */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.MultiplierStart, -/* u */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, +/* u */ CharTraits.IdentifierStart | CharTraits.VarNameFirst | CharTraits.TypeSuffix, /* v */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* w */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, /* x */ CharTraits.IdentifierStart | CharTraits.VarNameFirst, From 2bccd8d82e0f0668487e81902afab75defe1364a Mon Sep 17 00:00:00 2001 From: Joel <32407840+vexx32@users.noreply.github.com> Date: Tue, 18 Sep 2018 13:04:44 -0400 Subject: [PATCH 02/12] Add tokenizing logic for parsing short and unsigned values --- .../engine/parser/tokenizer.cs | 169 +++++++++++++++--- 1 file changed, 146 insertions(+), 23 deletions(-) diff --git a/src/System.Management.Automation/engine/parser/tokenizer.cs b/src/System.Management.Automation/engine/parser/tokenizer.cs index 3e857fd94ee..27bade3de11 100644 --- a/src/System.Management.Automation/engine/parser/tokenizer.cs +++ b/src/System.Management.Automation/engine/parser/tokenizer.cs @@ -477,6 +477,16 @@ internal enum TokenizerMode Signature, // i.e. class or method declaration } + [Flags] + internal enum NumberSuffixFlags + { + None = 0x0, + Unsigned = 0x1, + Short = 0x2, + Long = 0x4, + Decimal = 0x8 + } + // // Class used to do a partial snapshot of the state of the tokenizer. // This is used for nested scans on the same string. @@ -1307,7 +1317,8 @@ private char ScanUnicodeEscape(out char surrogateCharacter) } return s_invalidChar; } - else if (i == s_maxNumberOfUnicodeHexDigits) { + else if (i == s_maxNumberOfUnicodeHexDigits) + { UngetChar(); Release(sb); @@ -2832,7 +2843,7 @@ private Token ScanVariable(bool splatted, bool inStringExpandable) // occur after a variable. case '.': case '[': - // Something like $a.b or $a[1]. + // Something like $a.b or $a[1]. case '=': // Something like $a= UngetChar(); @@ -3237,7 +3248,7 @@ private void ScanNumberAfterDot(StringBuilder sb, ref int signIndex, ref bool no } } - private static bool TryGetNumberValue(string strNum, bool hex, bool real, char suffix, long multiplier, out object result) + private static bool TryGetNumberValue(string strNum, bool hex, bool real, NumberSuffixFlags suffix, long multiplier, out object result) { checked { @@ -3245,7 +3256,10 @@ private static bool TryGetNumberValue(string strNum, bool hex, bool real, char s { NumberStyles style = NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent; - if (suffix == 'd' || suffix == 'D') + + // Decimal parser does not accept hex literals, and 'd' is a valid hex character, so will never be read as Decimal literal + // e.g., 0x1d == 29 + if (suffix == NumberSuffixFlags.Decimal) { decimal d; if (Decimal.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out d)) @@ -3268,17 +3282,44 @@ private static bool TryGetNumberValue(string strNum, bool hex, bool real, char s { d = -0.0; } - if (suffix == 'l' || suffix == 'L') - { - result = ((long)Convert.ChangeType(d, typeof(long), CultureInfo.InvariantCulture)) * multiplier; - } - else + + switch (suffix) { - result = d * multiplier; + case NumberSuffixFlags.None: + result = d * multiplier; + break; + case NumberSuffixFlags.Long: + result = ((long)Convert.ChangeType(d, typeof(long), CultureInfo.InvariantCulture) * multiplier); + break; + case NumberSuffixFlags.Short: + result = (short)((short)Convert.ChangeType(d, typeof(short), CultureInfo.InvariantCulture) * multiplier); + break; + case NumberSuffixFlags.Unsigned | NumberSuffixFlags.Long: + result = (ulong)Convert.ChangeType(d, typeof(ulong), CultureInfo.InvariantCulture) * (ulong)multiplier; + break; + case NumberSuffixFlags.Unsigned | NumberSuffixFlags.Short: + result = (ushort)((ushort)Convert.ChangeType(d, typeof(ushort), CultureInfo.InvariantCulture) * multiplier); + break; + case NumberSuffixFlags.Unsigned: + ulong testresult = (ulong)Convert.ChangeType(d, typeof(ulong), CultureInfo.InvariantCulture) * (ulong)multiplier; + if (testresult < uint.MaxValue) + { + result = (uint)testresult; + } + else + { + result = testresult; + } + break; + default: + result = null; + return false; } + return true; } + // TryParse on (real) number fails. result = null; return false; } @@ -3294,15 +3335,47 @@ private static bool TryGetNumberValue(string strNum, bool hex, bool real, char s style = hex ? NumberStyles.AllowHexSpecifier : NumberStyles.AllowLeadingSign; long longValue; - if (suffix == 'l' || suffix == 'L') + switch (suffix) { - if (long.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out longValue)) - { - result = longValue * multiplier; - return true; - } - result = null; - return false; + case NumberSuffixFlags.Long: + if (long.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out longValue)) + { + result = longValue * multiplier; + return true; + } + + result = null; + return false; + case NumberSuffixFlags.Short: + if (short.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out short s)) + { + result = (short)(s * multiplier); + return true; + } + + result = null; + return false; + default: + if (suffix.HasFlag(NumberSuffixFlags.Unsigned) && ulong.TryParse(strNum, style, NumberFormatInfo.InvariantInfo, out ulong u)) + { + u *= (ulong)multiplier; + if (suffix.HasFlag(NumberSuffixFlags.Short) && u <= ushort.MaxValue) + { + result = (ushort)u; + } + else if (!suffix.HasFlag(NumberSuffixFlags.Long) && u <= uint.MaxValue) + { + result = (uint)u; + } + else + { + // ulong + result = u; + } + return true; + } + + break; } // From here on - the user hasn't specified the type, so we need to figure it out. @@ -3377,7 +3450,7 @@ private Token ScanNumber(char firstChar) || (AllowSignedNumbers && (firstChar == '+' || firstChar.IsDash())), "Number must start with '.', '-', or digit."); bool hex, real; - char suffix; + NumberSuffixFlags suffix; long multiplier; string strNum = ScanNumberHelper(firstChar, out hex, out real, out suffix, out multiplier); @@ -3418,11 +3491,11 @@ private Token ScanNumber(char firstChar) /// OR /// return the string format of the number /// - private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out char suffix, out long multiplier) + private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out NumberSuffixFlags suffix, out long multiplier) { hex = false; real = false; - suffix = '\0'; + suffix = NumberSuffixFlags.None; multiplier = 1; bool notNumber = false; @@ -3492,8 +3565,58 @@ private string ScanNumberHelper(char firstChar, out bool hex, out bool real, out if (c.IsTypeSuffix()) { SkipChar(); - suffix = c; + switch (c) + { + case 'u': + case 'U': + suffix |= NumberSuffixFlags.Unsigned; + break; + case 's': + case 'S': + suffix |= NumberSuffixFlags.Short; + break; + case 'l': + case 'L': + suffix |= NumberSuffixFlags.Long; + break; + case 'd': + case 'D': + suffix |= NumberSuffixFlags.Decimal; + break; + default: + notNumber = true; + break; + } + c = PeekChar(); + + if (c.IsTypeSuffix()) + { + SkipChar(); + if (suffix == NumberSuffixFlags.Unsigned) + { + switch (c) + { + case 'l': + case 'L': + suffix |= NumberSuffixFlags.Long; + break; + case 's': + case 'S': + suffix |= NumberSuffixFlags.Short; + break; + default: + notNumber = true; + break; + } + } + else + { + notNumber = true; + } + + c = PeekChar(); + } } if (c.IsMultiplierStart()) @@ -4317,7 +4440,7 @@ internal Token NextToken() if (InExpressionMode() && (char.IsDigit(c1) || c1 == '.')) { bool hex, real; - char suffix; + NumberSuffixFlags suffix; long multiplier; // check if the next token is actually a number From 401a3541d4e6f669f4312b7d77203aecd7ae2edb Mon Sep 17 00:00:00 2001 From: Joel <32407840+vexx32@users.noreply.github.com> Date: Tue, 18 Sep 2018 13:05:05 -0400 Subject: [PATCH 03/12] Add tests for parsing new numeric types Update numeric parsing tests --- .../Language/Parser/Parser.Tests.ps1 | 219 +++++++++++++++--- 1 file changed, 183 insertions(+), 36 deletions(-) diff --git a/test/powershell/Language/Parser/Parser.Tests.ps1 b/test/powershell/Language/Parser/Parser.Tests.ps1 index a8ff8b9e86b..5d25739c44d 100644 --- a/test/powershell/Language/Parser/Parser.Tests.ps1 +++ b/test/powershell/Language/Parser/Parser.Tests.ps1 @@ -648,43 +648,190 @@ foo``u{2195}abc Context "Numerical Notations Tests (starting at line 2374 to line 2452)" { $testData = @( - #Test various numbers using the standard notation. - @{ Script = "0"; Expected = "0" } - @{ Script = "-2"; Expected = "-2" } - @{ Script = "2"; Expected = "2" } - @{ Script = $([int32]::MaxValue); Expected = $([int32]::MaxValue) } - @{ Script = $([int32]::MinValue); Expected = $([int32]::MinValue) } - #Tests for hexadecimal notation. - @{ Script = "0x0"; Expected = "0" } - @{ Script = "0xF"; Expected = "15" } - @{ Script = "0x80000000"; Expected = $([int32]::MinValue) } - @{ Script = "0xFFFFFFFF"; Expected = "-1" } - @{ Script = "0x7fffffff"; Expected = $([int32]::MaxValue) } - @{ Script = "0x100000000"; Expected = [int64]0x100000000 } - #Tests for exponential notation. - @{ Script = "0e0"; Expected = "0" } - @{ Script = "0e1"; Expected = "0" } - @{ Script = "1e2"; Expected = "100" } - @{ Script = $([int32]::MaxValue); Expected = $([int32]::MaxValue) } - @{ Script = "0e2"; Expected = "0" } - @{ Script = "-2e2"; Expected = "-200" } - @{ Script = "-0e2"; Expected = "0" } - @{ Script = "3e0"; Expected = "3" } - #Tests for floating point notation. - @{ Script = ".01"; Expected = "0.01" } - @{ Script = "0.0"; Expected = "0" } - @{ Script = "-0.1"; Expected = "-0.1" } - @{ Script = "9.12"; Expected = "9.12" } - @{ Script = $([single]::MinValue); Expected = $([float]::MinValue).ToString() } - @{ Script = $([float]::MaxValue); Expected = $([float]::MaxValue).ToString() } - #Tests for the K suffix for numbers. - @{ Script = "0kb"; Expected = "0" } - @{ Script = "1kb"; Expected = "1024" } - @{ Script = "-2KB"; Expected = "-2048" } + #Standard numeric notation + #Standard + @{ Script = "0"; ExpectedValue = "0"; ExpectedType = [int] } + @{ Script = "10"; ExpectedValue = "10"; ExpectedType = [int] } + @{ Script = "-10"; ExpectedValue = "-10"; ExpectedType = [int] } + @{ Script = "+10"; ExpectedValue = "10"; ExpectedType = [int] } + # + @{ Script = "0.0"; ExpectedValue = "0"; ExpectedType = [double] } + @{ Script = "6.5"; ExpectedValue = "6.5"; ExpectedType = [double] } + @{ Script = "-6.5"; ExpectedValue = "-6.5"; ExpectedType = [double] } + #Exponential + @{ Script = "0e0"; ExpectedValue = "0"; ExpectedType = [double] } + @{ Script = "5e-2"; ExpectedValue = "0.05"; ExpectedType = [double] } + @{ Script = "5e2"; ExpectedValue = "500"; ExpectedType = [double] } + @{ Script = "-5e-2"; ExpectedValue = "-0.05"; ExpectedType = [double] } + @{ Script = "-5e2"; ExpectedValue = "-500"; ExpectedType = [double] } + #Hexadecimal + @{ Script = "0x0"; ExpectedValue = "0"; ExpectedType = [int] } + @{ Script = "0x12"; ExpectedValue = "18"; ExpectedType = [int] } + @{ Script = "-0x12"; ExpectedValue = "-18"; ExpectedType = [int] } + #Multipliers + @{ Script = "1kb"; ExpectedValue = "1024"; ExpectedType = [int] } + @{ Script = "1mb"; ExpectedValue = "1048576"; ExpectedType = [int] } + @{ Script = "1gb"; ExpectedValue = "1073741824"; ExpectedType = [int] } + @{ Script = "1tb"; ExpectedValue = "1099511627776"; ExpectedType = [long] } + @{ Script = "1pb"; ExpectedValue = "1125899906842624"; ExpectedType = [long] } + + #Decimal notation + #Integer + @{ Script = "0d"; ExpectedValue = "0"; ExpectedType = [decimal] } + @{ Script = "100d"; ExpectedValue = "100"; ExpectedType = [decimal] } + @{ Script = "-100d"; ExpectedValue = "-100"; ExpectedType = [decimal] } + @{ Script = "+100d"; ExpectedValue = "100"; ExpectedType = [decimal] } + # + @{ Script = "0.0d"; ExpectedValue = "0.0"; ExpectedType = [decimal] } + @{ Script = "1.5d"; ExpectedValue = "1.5"; ExpectedType = [decimal] } + @{ Script = "-1.5d"; ExpectedValue = "-1.5"; ExpectedType = [decimal] } + #Exponential + @{ Script = "0e0d"; ExpectedValue = "0"; ExpectedType = [decimal] } + @{ Script = "15e3d"; ExpectedValue = "15000"; ExpectedType = [decimal] } + @{ Script = "-15e3d"; ExpectedValue = "-15000"; ExpectedType = [decimal] } + #Multipliers + @{ Script = "1dkb"; ExpectedValue = "1024"; ExpectedType = [decimal] } + @{ Script = "1dmb"; ExpectedValue = "1048576"; ExpectedType = [decimal] } + @{ Script = "1dgb"; ExpectedValue = "1073741824"; ExpectedType = [decimal] } + @{ Script = "1dtb"; ExpectedValue = "1099511627776"; ExpectedType = [decimal] } + @{ Script = "1dpb"; ExpectedValue = "1125899906842624"; ExpectedType = [decimal] } + + #Short Integer notation + #Standard + @{ Script = "0s"; ExpectedValue = "0"; ExpectedType = [short] } + @{ Script = "10s"; ExpectedValue = "10"; ExpectedType = [short] } + @{ Script = "-10s"; ExpectedValue = "-10"; ExpectedType = [short] } + @{ Script = "+10s"; ExpectedValue = "10"; ExpectedType = [short] } + #Conversion from + @{ Script = "0.0s"; ExpectedValue = "0"; ExpectedType = [short] } + @{ Script = "3.72s"; ExpectedValue = "4"; ExpectedType = [short] } + @{ Script = "-3.72s"; ExpectedValue = "-4"; ExpectedType = [short] } + #Exponential + @{ Script = "0e0s"; ExpectedValue = "0"; ExpectedType = [short] } + @{ Script = "3e0s"; ExpectedValue = "3"; ExpectedType = [short] } + @{ Script = "-3e0s"; ExpectedValue = "-3"; ExpectedType = [short] } + @{ Script = "3e2s"; ExpectedValue = "300"; ExpectedType = [short] } + @{ Script = "-3e2s"; ExpectedValue = "-300"; ExpectedType = [short] } + #Hexadecimal + @{ Script = "0x0s"; ExpectedValue = "0"; ExpectedType = [short] } + @{ Script = "0x41s"; ExpectedValue = "65"; ExpectedType = [short] } + @{ Script = "-0x41s"; ExpectedValue = "-65"; ExpectedType = [short] } + #Multipliers + @{ Script = "1skb"; ExpectedValue = "1024"; ExpectedType = [short] } + + #Long Integer notation + #Standard + @{ Script = "0l"; ExpectedValue = "0"; ExpectedType = [long] } + @{ Script = "10l"; ExpectedValue = "10"; ExpectedType = [long] } + @{ Script = "-10l"; ExpectedValue = "-10"; ExpectedType = [long] } + @{ Script = "+10l"; ExpectedValue = "10"; ExpectedType = [long] } + #Conversion from + @{ Script = "0.0l"; ExpectedValue = "0"; ExpectedType = [long] } + @{ Script = "2.5l"; ExpectedValue = "2"; ExpectedType = [long] } + @{ Script = "-2.5l"; ExpectedValue = "-2"; ExpectedType = [long] } + #Exponential + @{ Script = "0e0l"; ExpectedValue = "0"; ExpectedType = [long] } + @{ Script = "3e2l"; ExpectedValue = "300"; ExpectedType = [long] } + @{ Script = "-3e2l"; ExpectedValue = "-300"; ExpectedType = [long] } + #Hexadecimal + @{ Script = "0x0l"; ExpectedValue = "0"; ExpectedType = [long] } + @{ Script = "0x41l"; ExpectedValue = "65"; ExpectedType = [long] } + @{ Script = "-0x41l"; ExpectedValue = "-65"; ExpectedType = [long] } + #Multipliers + @{ Script = "1lkb"; ExpectedValue = "1024"; ExpectedType = [long] } + @{ Script = "1lmb"; ExpectedValue = "1048576"; ExpectedType = [long] } + @{ Script = "1lgb"; ExpectedValue = "1073741824"; ExpectedType = [long] } + @{ Script = "1ltb"; ExpectedValue = "1099511627776"; ExpectedType = [long] } + @{ Script = "1lpb"; ExpectedValue = "1125899906842624"; ExpectedType = [long] } + + #Unsigned Integer notation + #Standard + @{ Script = "0u"; ExpectedValue = "0"; ExpectedType = [uint] } + @{ Script = "10u"; ExpectedValue = "10"; ExpectedType = [uint] } + @{ Script = "+10u"; ExpectedValue = "10"; ExpectedType = [uint] } + #Conversion from + @{ Script = "0.0u"; ExpectedValue = "0"; ExpectedType = [uint] } + @{ Script = "2.5u"; ExpectedValue = "2"; ExpectedType = [uint] } + #Exponential + @{ Script = "0e0u"; ExpectedValue = "0"; ExpectedType = [uint] } + @{ Script = "3e2u"; ExpectedValue = "300"; ExpectedType = [uint] } + #Hexadecimal + @{ Script = "0x0u"; ExpectedValue = "0"; ExpectedType = [uint] } + @{ Script = "0x41u"; ExpectedValue = "65"; ExpectedType = [uint] } + #Multipliers + @{ Script = "1ukb"; ExpectedValue = "1024"; ExpectedType = [uint] } + @{ Script = "1umb"; ExpectedValue = "1048576"; ExpectedType = [uint] } + @{ Script = "1ugb"; ExpectedValue = "1073741824"; ExpectedType = [uint] } + @{ Script = "1utb"; ExpectedValue = "1099511627776"; ExpectedType = [ulong] } + @{ Script = "1upb"; ExpectedValue = "1125899906842624"; ExpectedType = [ulong] } + + #Unsigned-Short Integer Notation + #Standard + @{ Script = "0us"; ExpectedValue = "0"; ExpectedType = [ushort] } + @{ Script = "10us"; ExpectedValue = "10"; ExpectedType = [ushort] } + @{ Script = "+10us"; ExpectedValue = "10"; ExpectedType = [ushort] } + #Conversion from + @{ Script = "0.0us"; ExpectedValue = "0"; ExpectedType = [ushort] } + @{ Script = "3.72us"; ExpectedValue = "4"; ExpectedType = [ushort] } + #Exponential + @{ Script = "0e0us"; ExpectedValue = "0"; ExpectedType = [ushort] } + @{ Script = "3e0us"; ExpectedValue = "3"; ExpectedType = [ushort] } + @{ Script = "3e2us"; ExpectedValue = "300"; ExpectedType = [ushort] } + #Hexadecimal + @{ Script = "0x0us"; ExpectedValue = "0"; ExpectedType = [ushort] } + @{ Script = "0x41us"; ExpectedValue = "65"; ExpectedType = [ushort] } + #Multipliers + @{ Script = "1uskb"; ExpectedValue = "1024"; ExpectedType = [ushort] } + + #Unsigned-Long Integer Notation + #Standard + @{ Script = "0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } + @{ Script = "10ul"; ExpectedValue = "10"; ExpectedType = [ulong] } + @{ Script = "+10ul"; ExpectedValue = "10"; ExpectedType = [ulong] } + #Conversion from + @{ Script = "0.0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } + @{ Script = "2.5ul"; ExpectedValue = "2"; ExpectedType = [ulong] } + #Exponential + @{ Script = "0e0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } + @{ Script = "3e2ul"; ExpectedValue = "300"; ExpectedType = [ulong] } + #Hexadecimal + @{ Script = "0x0ul"; ExpectedValue = "0"; ExpectedType = [ulong] } + @{ Script = "0x41ul"; ExpectedValue = "65"; ExpectedType = [ulong] } + #Multipliers + @{ Script = "1ulkb"; ExpectedValue = "1024"; ExpectedType = [ulong] } + @{ Script = "1ulmb"; ExpectedValue = "1048576"; ExpectedType = [ulong] } + @{ Script = "1ulgb"; ExpectedValue = "1073741824"; ExpectedType = [ulong] } + @{ Script = "1ultb"; ExpectedValue = "1099511627776"; ExpectedType = [ulong] } + @{ Script = "1ulpb"; ExpectedValue = "1125899906842624"; ExpectedType = [ulong] } ) - It "