JavaScriptBench
diff --git a/‎bin/NativeTests/FileLoadHelpers.cpp‎
Lines changed: 2 additions & 2 deletions b/‎bin/NativeTests/FileLoadHelpers.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎lib/Common/Codex/Utf8Codex.cpp‎
Lines changed: 15 additions & 41 deletions b/‎lib/Common/Codex/Utf8Codex.cpp‎
Lines changed: 15 additions & 41 deletions
diff --git a/‎lib/Common/Codex/Utf8Codex.h‎
Lines changed: 6 additions & 15 deletions b/‎lib/Common/Codex/Utf8Codex.h‎
Lines changed: 6 additions & 15 deletions
diff --git a/‎lib/Common/Codex/Utf8Helper.h‎
Lines changed: 1 addition & 2 deletions b/‎lib/Common/Codex/Utf8Helper.h‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎lib/Jsrt/JsrtDebugUtils.cpp‎
Lines changed: 13 additions & 8 deletions b/‎lib/Jsrt/JsrtDebugUtils.cpp‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎lib/Parser/Hash.cpp‎
Lines changed: 28 additions & 16 deletions b/‎lib/Parser/Hash.cpp‎
Lines changed: 28 additions & 16 deletions
@@ -9,7 +9,7 @@ HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents,
 {
     HRESULT hr = S_OK;
     LPCWSTR contentsRaw = nullptr;
-    byte * pRawBytes = nullptr;
+    LPCUTF8 pRawBytes = nullptr;
     UINT lengthBytes = 0;
     bool isUtf8 = false;
     contents = nullptr;
@@ -119,7 +119,7 @@ HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents,
             IfFailGo(E_OUTOFMEMORY);
         }
 
-        utf8::DecodeIntoAndNullTerminate((char16*) contents, pRawBytes, cUtf16Chars, decodeOptions);
+        utf8::DecodeUnitsIntoAndNullTerminate((char16*)contents, pRawBytes, pRawBytes + lengthBytes, decodeOptions);
     }
 
 Error:
 
@@ -376,42 +376,9 @@ namespace utf8
         else
             return ptr;
     }
-
-    void DecodeInto(__out_ecount_full(cch) char16 *buffer, LPCUTF8 ptr, size_t cch, DecodeOptions options)
-    {
-        DecodeOptions localOptions = options;
-
-        if (!ShouldFastPath(ptr, buffer)) goto LSlowPath;
-
-LFastPath:
-        while (cch >= 4)
-        {
-            uint32 bytes = *(uint32 *)ptr;
-            if ((bytes & 0x80808080) != 0) goto LSlowPath;
-            ((uint32 *)buffer)[0] = (bytes & 0x7F) | ((bytes << 8) & 0x7F0000);
-            ((uint32 *)buffer)[1] = ((bytes >> 16) & 0x7F) | ((bytes >> 8) & 0x7F0000);
-            ptr += 4;
-            buffer += 4;
-            cch -= 4;
-        }
-LSlowPath:
-        while (cch-- > 0)
-        {
-            LPCUTF8 end = ptr + cch + 1; // WARNING: Assume cch correct, suppress end-of-buffer checking
-
-            *buffer++ = Decode(ptr, end, localOptions);
-            if (ShouldFastPath(ptr, buffer)) goto LFastPath;
-        }
-    }
-
-    void DecodeIntoAndNullTerminate(__out_ecount(cch+1) __nullterminated char16 *buffer, LPCUTF8 ptr, size_t cch, DecodeOptions options)
-    {
-        DecodeInto(buffer, ptr, cch, options);
-        buffer[cch] = 0;
-    }
-
-    _Ret_range_(0, pbEnd - _Old_(pbUtf8))
-    size_t DecodeUnitsInto(_Out_writes_(pbEnd - pbUtf8) char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
+    
+    _Use_decl_annotations_
+    size_t DecodeUnitsInto(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
     {
         DecodeOptions localOptions = options;
 
@@ -456,22 +423,29 @@ namespace utf8
         return dest - buffer;
     }
 
-    size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
+    _Use_decl_annotations_
+    size_t DecodeUnitsIntoAndNullTerminate(char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
     {
         size_t result = DecodeUnitsInto(buffer, pbUtf8, pbEnd, options);
         buffer[(int)result] = 0;
         return result;
     }
 
-    bool CharsAreEqual(__in_ecount(cch) LPCOLESTR pch, LPCUTF8 bch, size_t cch, DecodeOptions options)
+    _Use_decl_annotations_
+    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options)
+    {
+        return DecodeUnitsIntoAndNullTerminate(buffer, pbUtf8, pbEnd, options);
+    }
+
+    bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options)
     {
         DecodeOptions localOptions = options;
-        while (cch-- > 0)
+        while (bch < end)
         {
-            LPCUTF8 end = bch + cch + 1; // WARNING: Assume cch correct, suppress end-of-buffer checking
-
             if (*pch++ != utf8::Decode(bch, end, localOptions))
+            {
                 return false;
+            }
         }
         return true;
     }
 
@@ -26,6 +26,9 @@ typedef char16_t char16;
 
 typedef char16 wchar;
 
+#ifndef Unused
+#define Unused(var) var
+#endif
 
 #ifndef _WIN32
 // Templates are defined here in order to avoid a dependency on C++
@@ -270,27 +273,15 @@ namespace utf8
         return PrevCharFull(ptr, start);
     }
 
-    // Decode a UTF-8 sequence of cch UTF-16 characters into buffer. ptr could advance up to 3 times
-    // longer than cch so DecodeInto should only be used when it is already known that
-    // ptr refers to at least cch number of UTF-8 sequences.
-    void DecodeInto(__out_ecount_full(cch) char16 *buffer, LPCUTF8 ptr, size_t cch, DecodeOptions options = doDefault);
-
-    // Provided for dual-mode templates
-    inline void DecodeInto(__out_ecount_full(cch) char16 *buffer, const char16 *ptr, size_t cch, DecodeOptions /* options */ = doDefault)
-    {
-        memcpy_s(buffer, cch * sizeof(char16), ptr, cch * sizeof(char16));
-    }
-
-    // Like DecodeInto but ensures buffer ends with a NULL at buffer[cch].
-    void DecodeIntoAndNullTerminate(__out_ecount(cch+1) __nullterminated char16 *buffer, LPCUTF8 ptr, size_t cch, DecodeOptions options = doDefault);
-
     // Decode cb bytes from ptr to into buffer returning the number of characters converted and written to buffer
     _Ret_range_(0, pbEnd - _Old_(pbUtf8))
     size_t DecodeUnitsInto(_Out_writes_(pbEnd - pbUtf8) char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
 
     // Decode cb bytes from ptr to into buffer returning the number of characters converted and written to buffer (excluding the null terminator)
     size_t DecodeUnitsIntoAndNullTerminate(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8& pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
 
+    size_t DecodeUnitsIntoAndNullTerminateNoAdvance(__out_ecount(pbEnd - pbUtf8 + 1) __nullterminated char16 *buffer, LPCUTF8 pbUtf8, LPCUTF8 pbEnd, DecodeOptions options = doDefault);
+
     // Encode a UTF-8 sequence into a UTF-8 sequence (which is just a memcpy). This is included for convenience in templates
     // when the character encoding is a template parameter.
     __range(cch, cch)
@@ -316,7 +307,7 @@ namespace utf8
     size_t EncodeTrueUtf8IntoAndNullTerminate(__out_ecount(cch * 3 + 1) utf8char_t *buffer, __in_ecount(cch) const char16 *source, charcount_t cch);
 
     // Returns true if the pch refers to a UTF-16LE encoding of the given UTF-8 encoding bch.
-    bool CharsAreEqual(__in_ecount(cch) LPCOLESTR pch, LPCUTF8 bch, size_t cch, DecodeOptions options = doDefault);
+    bool CharsAreEqual(LPCOLESTR pch, LPCUTF8 bch, LPCUTF8 end, DecodeOptions options = doDefault);
 
     // Convert the character index into a byte index.
     size_t CharacterIndexToByteIndex(__in_ecount(cbLength) LPCUTF8 pch, size_t cbLength, const charcount_t cchIndex, size_t cbStartIndex, charcount_t cchStartIndex, DecodeOptions options = doDefault);
 
@@ -73,8 +73,7 @@ namespace utf8
         // Some node tests depend on the utf8 decoder not swallowing invalid unicode characters
         // instead of replacing them with the "replacement" chracter. Pass a flag to our 
         // decoder to require such behavior
-        utf8::DecodeIntoAndNullTerminate(destString, (LPCUTF8) sourceString, cchDestString,
-            DecodeOptions::doAllowInvalidWCHARs);
+        utf8::DecodeUnitsIntoAndNullTerminateNoAdvance(destString, (LPCUTF8) sourceString, (LPCUTF8) sourceString + cbSourceString, DecodeOptions::doAllowInvalidWCHARs);
         Assert(destString[cchDestString] == 0);
         static_assert(sizeof(utf8char_t) == sizeof(char), "Needs to be valid for cast");
         *destStringPtr = destString;
 
@@ -59,18 +59,21 @@ void JsrtDebugUtils::AddSourceLengthAndTextToObject(Js::DynamicObject* object, J
     Assert(statementMap != nullptr);
 
     LPCUTF8 source = functionBody->GetStartOfDocument(_u("Source for debugging"));
-    size_t startByte = utf8::CharacterIndexToByteIndex(source, functionBody->GetUtf8SourceInfo()->GetCbLength(), (const charcount_t)statementMap->sourceSpan.begin);
+    size_t cbLength = functionBody->GetUtf8SourceInfo()->GetCbLength();
+    size_t startByte = utf8::CharacterIndexToByteIndex(source, cbLength, (const charcount_t)statementMap->sourceSpan.begin);
+    size_t endByte = utf8::CharacterIndexToByteIndex(source, cbLength, (const charcount_t)statementMap->sourceSpan.end);
+    int cch = statementMap->sourceSpan.end - statementMap->sourceSpan.begin;
 
-    int byteLength = statementMap->sourceSpan.end - statementMap->sourceSpan.begin;
+    JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceLength, (double)cch, functionBody->GetScriptContext());
 
-    JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceLength, (double)byteLength, functionBody->GetScriptContext());
-
-    AutoArrayPtr<char16> sourceContent(HeapNewNoThrowArray(char16, byteLength + 1), byteLength + 1);
+    AutoArrayPtr<char16> sourceContent(HeapNewNoThrowArray(char16, cch + 1), cch + 1);
     if (sourceContent != nullptr)
     {
+        LPCUTF8 pbStart = source + startByte;
+        LPCUTF8 pbEnd = pbStart + (endByte - startByte);
         utf8::DecodeOptions options = functionBody->GetUtf8SourceInfo()->IsCesu8() ? utf8::doAllowThreeByteSurrogates : utf8::doDefault;
-        utf8::DecodeIntoAndNullTerminate(sourceContent, source + startByte, byteLength, options);
-        JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceText, sourceContent, byteLength, functionBody->GetScriptContext());
+        utf8::DecodeUnitsIntoAndNullTerminate(sourceContent, pbStart, pbEnd, options);
+        JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::sourceText, sourceContent, cch, functionBody->GetScriptContext());
     }
     else
     {
@@ -92,8 +95,10 @@ void JsrtDebugUtils::AddSouceToObject(Js::DynamicObject * object, Js::Utf8Source
     AutoArrayPtr<char16> sourceContent(HeapNewNoThrowArray(char16, cchLength + 1), cchLength + 1);
     if (sourceContent != nullptr)
     {
+        LPCUTF8 source = utf8SourceInfo->GetSource();
+        size_t cbLength = utf8SourceInfo->GetCbLength();
         utf8::DecodeOptions options = utf8SourceInfo->IsCesu8() ? utf8::doAllowThreeByteSurrogates : utf8::doDefault;
-        utf8::DecodeIntoAndNullTerminate(sourceContent, utf8SourceInfo->GetSource(), cchLength, options);
+        utf8::DecodeUnitsIntoAndNullTerminate(sourceContent, source, source + cbLength, options);
         JsrtDebugUtils::AddPropertyToObject(object, JsrtDebugPropertyId::source, sourceContent, cchLength, utf8SourceInfo->GetScriptContext());
     }
     else
 
@@ -209,31 +209,42 @@ IdentPtr HashTbl::PidFromTk(tokens token)
     {
         StaticSym const * sym = s_reservedWordInfo[token].sym;
         Assert(sym != nullptr);
-        rpid = this->PidHashNameLenWithHash(sym->sz, sym->cch, sym->luHash);
+        rpid = this->PidHashNameLenWithHash(sym->sz, sym->sz + sym->cch, sym->cch, sym->luHash);
         rpid->SetTk(token, s_reservedWordInfo[token].grfid);
         m_rpid[token] = rpid;
     }
     return rpid;
 }
 
 template <typename CharType>
-IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, uint32 cch)
+IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, CharType const * end, uint32 cch)
 {
     // NOTE: We use case sensitive hash during compilation, but the runtime
     // uses case insensitive hashing so it can do case insensitive lookups.
-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, cch);
-    return PidHashNameLenWithHash(prgch, cch, luHash);
+
+    uint32 luHash = CaseSensitiveComputeHash(prgch, end);
+    return PidHashNameLenWithHash(prgch, end, cch, luHash);
+}
+template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, utf8char_t const * end, uint32 cch);
+template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, char const * end, uint32 cch);
+template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, char16 const * end, uint32 cch);
+
+template <typename CharType>
+IdentPtr HashTbl::PidHashNameLen(CharType const * prgch, uint32 cch)
+{
+    Assert(sizeof(CharType) == 2);
+    return PidHashNameLen(prgch, prgch + cch, cch);
 };
 template IdentPtr HashTbl::PidHashNameLen<utf8char_t>(utf8char_t const * prgch, uint32 cch);
 template IdentPtr HashTbl::PidHashNameLen<char>(char const * prgch, uint32 cch);
 template IdentPtr HashTbl::PidHashNameLen<char16>(char16 const * prgch, uint32 cch);
 
 template <typename CharType>
-IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch, int32 cch, uint32 luHash)
+IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch, CharType const * end, int32 cch, uint32 luHash)
 {
     Assert(cch >= 0);
     AssertArrMemR(prgch, cch);
-    Assert(luHash == CaseSensitiveComputeHashCch(prgch, cch));
+    Assert(luHash == CaseSensitiveComputeHash(prgch, end));
 
     IdentPtr * ppid;
     IdentPtr pid;
@@ -245,7 +256,7 @@ IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch,
     int depth = 0;
 #endif
 
-    pid = this->FindExistingPid(prgch, cch, luHash, &ppid, &bucketCount
+    pid = this->FindExistingPid(prgch, end, cch, luHash, &ppid, &bucketCount
 #if PROFILE_DICTIONARY
                                 , depth
 #endif
@@ -313,14 +324,15 @@ IdentPtr HashTbl::PidHashNameLenWithHash(_In_reads_(cch) CharType const * prgch,
     pid->m_propertyId = Js::Constants::NoProperty;
     pid->assignmentState = NotAssigned;
 
-    HashTbl::CopyString(pid->m_sz, prgch, cch);
+    HashTbl::CopyString(pid->m_sz, prgch, end);
 
     return pid;
 }
 
 template <typename CharType>
 IdentPtr HashTbl::FindExistingPid(
     CharType const * prgch,
+    CharType const * end,
     int32 cch,
     uint32 luHash,
     IdentPtr **pppInsert,
@@ -340,7 +352,7 @@ IdentPtr HashTbl::FindExistingPid(
     for (bucketCount = 0; nullptr != (pid = *ppid); ppid = &pid->m_pidNext, bucketCount++)
     {
         if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
-            HashTbl::CharsAreEqual(pid->m_sz, prgch, cch))
+            HashTbl::CharsAreEqual(pid->m_sz, prgch, end))
         {
             return pid;
         }
@@ -362,32 +374,32 @@ IdentPtr HashTbl::FindExistingPid(
 }
 
 template IdentPtr HashTbl::FindExistingPid<utf8char_t>(
-    utf8char_t const * prgch, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
+    utf8char_t const * prgch, utf8char_t const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
 #if PROFILE_DICTIONARY
     , int& depth
 #endif
     );
 template IdentPtr HashTbl::FindExistingPid<char>(
-    char const * prgch, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
+    char const * prgch, char const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
 #if PROFILE_DICTIONARY
     , int& depth
 #endif
     );
 template IdentPtr HashTbl::FindExistingPid<char16>(
-    char16 const * prgch, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
+    char16 const * prgch, char16 const * end, int32 cch, uint32 luHash, IdentPtr **pppInsert, int32 *pBucketCount
 #if PROFILE_DICTIONARY
     , int& depth
 #endif
     );
 
 bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
 {
-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, cch);
+    uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
 
     for (auto pid = m_prgpidName[luHash & m_luMask]; pid; pid = pid->m_pidNext)
     {
         if (pid->m_luHash == luHash && (int)pid->m_cch == cch &&
-            HashTbl::CharsAreEqual(pid->m_sz, prgch, cch))
+            HashTbl::CharsAreEqual(pid->m_sz, prgch + cch, prgch))
         {
             return true;
         }
@@ -407,7 +419,7 @@ bool HashTbl::Contains(_In_reads_(cch) LPCOLESTR prgch, int32 cch)
 // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
 tokens HashTbl::TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, uint32 cch)
 {
-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, cch);
+    uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
 
     // look for a keyword
 #include "kwds_sw.h"
@@ -434,7 +446,7 @@ tokens HashTbl::TkFromNameLenColor(_In_reads_(cch) LPCOLESTR prgch, uint32 cch)
 // This method is used during colorizing when scanner isn't interested in storing the actual id and does not care about conversion of escape sequences
 tokens HashTbl::TkFromNameLen(_In_reads_(cch) LPCOLESTR prgch, uint32 cch, bool isStrictMode)
 {
-    uint32 luHash = CaseSensitiveComputeHashCch(prgch, cch);
+    uint32 luHash = CaseSensitiveComputeHash(prgch, prgch + cch);
 
     // look for a keyword
 #include "kwds_sw.h"
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,7 @@ HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents,`
`9`	`9`	`{`
`10`	`10`	`HRESULT hr = S_OK;`
`11`	`11`	`LPCWSTR contentsRaw = nullptr;`
`12`		`- byte * pRawBytes = nullptr;`
	`12`	`+ LPCUTF8 pRawBytes = nullptr;`
`13`	`13`	`UINT lengthBytes = 0;`
`14`	`14`	`bool isUtf8 = false;`
`15`	`15`	`contents = nullptr;`
`@@ -119,7 +119,7 @@ HRESULT FileLoadHelpers::LoadScriptFromFile(LPCSTR filename, LPCWSTR& contents,`
`119`	`119`	`IfFailGo(E_OUTOFMEMORY);`
`120`	`120`	`}`
`121`	`121`
`122`		`- utf8::DecodeIntoAndNullTerminate((char16*) contents, pRawBytes, cUtf16Chars, decodeOptions);`
	`122`	`+ utf8::DecodeUnitsIntoAndNullTerminate((char16*)contents, pRawBytes, pRawBytes + lengthBytes, decodeOptions);`
`123`	`123`	`}`
`124`	`124`
`125`	`125`	`Error:`