diff --git a/stdlib/private/StdlibCollectionUnittest/CheckCollectionType.swift.gyb b/stdlib/private/StdlibCollectionUnittest/CheckCollectionType.swift.gyb index ab708470d739c..3a7022ea7bb5e 100644 --- a/stdlib/private/StdlibCollectionUnittest/CheckCollectionType.swift.gyb +++ b/stdlib/private/StdlibCollectionUnittest/CheckCollectionType.swift.gyb @@ -468,18 +468,6 @@ internal enum _SubSequenceSubscriptOnRangeMode { } } -internal func _product( - _ c1: C1, _ c2: C2 -) -> [(C1.Iterator.Element, C2.Iterator.Element)] { - var result: [(C1.Iterator.Element, C2.Iterator.Element)] = [] - for e1 in c1 { - for e2 in c2 { - result.append((e1, e2)) - } - } - return result -} - %{ from gyb_stdlib_support import collectionForTraversal def testConstraints(protocol): @@ -626,7 +614,7 @@ extension TestSuite { _blackHole(c[index]) } - let tests = _product( + let tests = cartesianProduct( subscriptRangeTests, _SubSequenceSubscriptOnIndexMode.all) @@ -725,7 +713,7 @@ extension TestSuite { _blackHole(c[index..( return () } } + +public func cartesianProduct( + _ c1: C1, _ c2: C2 +) -> [(C1.Iterator.Element, C2.Iterator.Element)] { + var result: [(C1.Iterator.Element, C2.Iterator.Element)] = [] + for e1 in c1 { + for e2 in c2 { + result.append((e1, e2)) + } + } + return result +} + diff --git a/stdlib/public/SwiftShims/CMakeLists.txt b/stdlib/public/SwiftShims/CMakeLists.txt index 652a0ac584c7b..c25cc871ea85b 100644 --- a/stdlib/public/SwiftShims/CMakeLists.txt +++ b/stdlib/public/SwiftShims/CMakeLists.txt @@ -9,6 +9,7 @@ set(sources RefCount.h RuntimeShims.h RuntimeStubs.h + SwiftStdbool.h SwiftStddef.h SwiftStdint.h UnicodeShims.h diff --git a/stdlib/public/SwiftShims/GlobalObjects.h b/stdlib/public/SwiftShims/GlobalObjects.h index 7c9f21f49cf3e..3eb7f9ba2216d 100644 --- a/stdlib/public/SwiftShims/GlobalObjects.h +++ b/stdlib/public/SwiftShims/GlobalObjects.h @@ -39,6 +39,14 @@ struct _SwiftEmptyArrayStorage { extern SWIFT_RUNTIME_STDLIB_INTERFACE struct _SwiftEmptyArrayStorage _swiftEmptyArrayStorage; +struct _SwiftHashingSecretKey { + __swift_uint64_t key0; + __swift_uint64_t key1; +}; + +extern SWIFT_RUNTIME_STDLIB_INTERFACE +struct _SwiftHashingSecretKey _swift_stdlib_Hashing_secretKey; + extern SWIFT_RUNTIME_STDLIB_INTERFACE __swift_uint64_t _swift_stdlib_HashingDetail_fixedSeedOverride; diff --git a/stdlib/public/SwiftShims/SwiftStdbool.h b/stdlib/public/SwiftShims/SwiftStdbool.h new file mode 100644 index 0000000000000..6a4ed6c37d6c4 --- /dev/null +++ b/stdlib/public/SwiftShims/SwiftStdbool.h @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See http://swift.org/LICENSE.txt for license information +// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +#ifndef SWIFT_STDLIB_SHIMS_SWIFTSTDBOOL_H_ +#define SWIFT_STDLIB_SHIMS_SWIFTSTDBOOL_H_ + +#ifdef __cplusplus +typedef bool __swift_bool; +#else +typedef _Bool __swift_bool; +#endif + +#endif + diff --git a/stdlib/public/SwiftShims/UnicodeShims.h b/stdlib/public/SwiftShims/UnicodeShims.h index 57ae017577eda..6462ca08adbd8 100644 --- a/stdlib/public/SwiftShims/UnicodeShims.h +++ b/stdlib/public/SwiftShims/UnicodeShims.h @@ -18,6 +18,7 @@ #define SWIFT_STDLIB_SHIMS_UNICODESHIMS_H_ #include "SwiftStdint.h" +#include "SwiftStdbool.h" #include "Visibility.h" #if __has_feature(nullability) @@ -83,13 +84,20 @@ _swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *Left, __swift_int32_t RightLength); SWIFT_RUNTIME_STDLIB_INTERFACE -__attribute__((__pure__)) __swift_intptr_t -_swift_stdlib_unicode_hash(const __swift_uint16_t *Str, __swift_int32_t Length); +void *_swift_stdlib_unicodeCollationIterator_create( + const __swift_uint16_t *Str, + __swift_uint32_t Length); SWIFT_RUNTIME_STDLIB_INTERFACE -__attribute__((__pure__)) __swift_intptr_t -_swift_stdlib_unicode_hash_ascii(const unsigned char *Str, - __swift_int32_t Length); +__swift_int32_t _swift_stdlib_unicodeCollationIterator_next( + void *CollationIterator, __swift_bool *HitEnd); + +SWIFT_RUNTIME_STDLIB_INTERFACE +void _swift_stdlib_unicodeCollationIterator_delete( + void *CollationIterator); + +SWIFT_RUNTIME_STDLIB_INTERFACE +const __swift_int32_t *_swift_stdlib_unicode_getASCIICollationTable(); SWIFT_RUNTIME_STDLIB_INTERFACE __swift_int32_t _swift_stdlib_unicode_strToUpper( diff --git a/stdlib/public/SwiftShims/module.modulemap b/stdlib/public/SwiftShims/module.modulemap index 2b3633c7f8f95..e5a358e23730f 100644 --- a/stdlib/public/SwiftShims/module.modulemap +++ b/stdlib/public/SwiftShims/module.modulemap @@ -9,6 +9,7 @@ module SwiftShims { header "RefCount.h" header "RuntimeShims.h" header "RuntimeStubs.h" + header "SwiftStdbool.h" header "SwiftStddef.h" header "SwiftStdint.h" header "UnicodeShims.h" diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt index 61ed2b775eb1b..9cdfe9a93f155 100644 --- a/stdlib/public/core/CMakeLists.txt +++ b/stdlib/public/core/CMakeLists.txt @@ -101,6 +101,7 @@ set(SWIFTLIB_ESSENTIAL REPL.swift Reverse.swift Runtime.swift.gyb + SipHash.swift.gyb Sequence.swift SequenceAlgorithms.swift.gyb SequenceWrapper.swift diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json index f3c893483cd10..4fe3fe6989de4 100644 --- a/stdlib/public/core/GroupInfo.json +++ b/stdlib/public/core/GroupInfo.json @@ -135,6 +135,7 @@ "AnyHashable.swift", "Interval.swift", "Hashing.swift", + "SipHash.swift", "ErrorType.swift", "InputStream.swift", "LifetimeManager.swift", diff --git a/stdlib/public/core/Hashing.swift b/stdlib/public/core/Hashing.swift index c5b63f95bd955..051b943d52f4f 100644 --- a/stdlib/public/core/Hashing.swift +++ b/stdlib/public/core/Hashing.swift @@ -23,6 +23,27 @@ import SwiftShims +public // @testable +struct _Hashing { + // FIXME(ABI): make this an actual public API. + public // SPI + static var secretKey: (UInt64, UInt64) { + get { + // The variable itself is defined in C++ code so that it is initialized + // during static construction. Almost every Swift program uses hash + // tables, so initializing the secret key during the startup seems to be + // the right trade-off. + return ( + _swift_stdlib_Hashing_secretKey.key0, + _swift_stdlib_Hashing_secretKey.key1) + } + set { + (_swift_stdlib_Hashing_secretKey.key0, + _swift_stdlib_Hashing_secretKey.key1) = newValue + } + } +} + public // @testable struct _HashingDetail { diff --git a/stdlib/public/core/SipHash.swift.gyb b/stdlib/public/core/SipHash.swift.gyb new file mode 100644 index 0000000000000..dc188338b46e8 --- /dev/null +++ b/stdlib/public/core/SipHash.swift.gyb @@ -0,0 +1,253 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See http://swift.org/LICENSE.txt for license information +// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// +/// This file implements SipHash-2-4 and SipHash-1-3 +/// (https://131002.net/siphash/). +/// +/// This file is based on the reference C implementation, which was released +/// to public domain by: +/// +/// * Jean-Philippe Aumasson +/// * Daniel J. Bernstein +//===----------------------------------------------------------------------===// + +internal enum _SipHashDetail { + @inline(__always) + internal static func _rotate(_ x: UInt64, leftBy amount: Int) -> UInt64 { + return (x << UInt64(amount)) | (x >> (64 - UInt64(amount))) + } + + @inline(__always) + internal static func _loadUnalignedUInt64LE( + from p: UnsafeRawPointer + ) -> UInt64 { + return + UInt64(p.load(fromByteOffset: 0, as: UInt8.self)) | + (UInt64(p.load(fromByteOffset: 1, as: UInt8.self)) << 8) | + (UInt64(p.load(fromByteOffset: 2, as: UInt8.self)) << 16) | + (UInt64(p.load(fromByteOffset: 3, as: UInt8.self)) << 24) | + (UInt64(p.load(fromByteOffset: 4, as: UInt8.self)) << 32) | + (UInt64(p.load(fromByteOffset: 5, as: UInt8.self)) << 40) | + (UInt64(p.load(fromByteOffset: 6, as: UInt8.self)) << 48) | + (UInt64(p.load(fromByteOffset: 7, as: UInt8.self)) << 56) + } + + @inline(__always) + internal static func _loadPartialUnalignedUInt64LE( + from p: UnsafeRawPointer, + byteCount: Int + ) -> UInt64 { + _sanityCheck((0..<8).contains(byteCount)) + var result: UInt64 = 0 + if byteCount >= 1 { result |= UInt64(p.load(fromByteOffset: 0, as: UInt8.self)) } + if byteCount >= 2 { result |= UInt64(p.load(fromByteOffset: 1, as: UInt8.self)) << 8 } + if byteCount >= 3 { result |= UInt64(p.load(fromByteOffset: 2, as: UInt8.self)) << 16 } + if byteCount >= 4 { result |= UInt64(p.load(fromByteOffset: 3, as: UInt8.self)) << 24 } + if byteCount >= 5 { result |= UInt64(p.load(fromByteOffset: 4, as: UInt8.self)) << 32 } + if byteCount >= 6 { result |= UInt64(p.load(fromByteOffset: 5, as: UInt8.self)) << 40 } + if byteCount >= 7 { result |= UInt64(p.load(fromByteOffset: 6, as: UInt8.self)) << 48 } + return result + } + + @inline(__always) + internal static func _sipRound( + v0: inout UInt64, + v1: inout UInt64, + v2: inout UInt64, + v3: inout UInt64 + ) { + v0 = v0 &+ v1 + v1 = _rotate(v1, leftBy: 13) + v1 ^= v0 + v0 = _rotate(v0, leftBy: 32) + v2 = v2 &+ v3 + v3 = _rotate(v3, leftBy: 16) + v3 ^= v2 + v0 = v0 &+ v3 + v3 = _rotate(v3, leftBy: 21) + v3 ^= v0 + v2 = v2 &+ v1 + v1 = _rotate(v1, leftBy: 17) + v1 ^= v2 + v2 = _rotate(v2, leftBy: 32) + } +} + +% for (c_rounds, d_rounds) in [(2, 4), (1, 3)]: +% Self = '_SipHash{}{}Context'.format(c_rounds, d_rounds) + +public // @testable +struct ${Self} { + // "somepseudorandomlygeneratedbytes" + internal var v0: UInt64 = 0x736f6d6570736575 + internal var v1: UInt64 = 0x646f72616e646f6d + internal var v2: UInt64 = 0x6c7967656e657261 + internal var v3: UInt64 = 0x7465646279746573 + + internal var hashedByteCount: UInt64 = 0 + + internal var dataTail: UInt64 = 0 + internal var dataTailByteCount: Int = 0 + + internal var finalizedHash: UInt64? = nil + + public init(key: (UInt64, UInt64)) { + v3 ^= key.1 + v2 ^= key.0 + v1 ^= key.1 + v0 ^= key.0 + } + + // FIXME(ABI): Use UnsafeRawBufferPointer. + public // @testable + mutating func append(_ data: UnsafeRawPointer, byteCount: Int) { + _append_alwaysInline(data, byteCount: byteCount) + } + + // FIXME(ABI): Use UnsafeRawBufferPointer. + @inline(__always) + internal mutating func _append_alwaysInline( + _ data: UnsafeRawPointer, + byteCount: Int + ) { + precondition(finalizedHash == nil) + _sanityCheck((0..<8).contains(dataTailByteCount)) + + let dataEnd = data + byteCount + + var data = data + var byteCount = byteCount + if dataTailByteCount != 0 { + let restByteCount = min( + MemoryLayout.size - dataTailByteCount, + byteCount) + let rest = _SipHashDetail._loadPartialUnalignedUInt64LE( + from: data, + byteCount: restByteCount) + dataTail |= rest << UInt64(dataTailByteCount * 8) + dataTailByteCount += restByteCount + data += restByteCount + byteCount -= restByteCount + } + + if dataTailByteCount == MemoryLayout.size { + _appendDirectly(dataTail) + dataTail = 0 + dataTailByteCount = 0 + } else if dataTailByteCount != 0 { + _sanityCheck(data == dataEnd) + return + } + + let endOfWords = + data + byteCount - (byteCount % MemoryLayout.size) + while data != endOfWords { + _appendDirectly(_SipHashDetail._loadUnalignedUInt64LE(from: data)) + data += 8 + // No need to update `byteCount`, it is not used beyond this point. + } + + if data != dataEnd { + dataTailByteCount = dataEnd - data + dataTail = _SipHashDetail._loadPartialUnalignedUInt64LE( + from: data, + byteCount: dataTailByteCount) + } + } + + /// This function mixes in the given word directly into the state, + /// ignoring `dataTail`. + @inline(__always) + internal mutating func _appendDirectly(_ m: UInt64) { + v3 ^= m + for _ in 0..<${c_rounds} { + _SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3) + } + v0 ^= m + hashedByteCount += 8 + } + +% for data_type in ['UInt', 'Int', 'UInt64', 'Int64', 'UInt32', 'Int32']: + public // @testable + mutating func append(_ data: ${data_type}) { + var data = data + _append_alwaysInline(&data, byteCount: MemoryLayout.size(ofValue: data)) + } +% end + + public // @testable + mutating func finalizeAndReturnHash() -> UInt64 { + return _finalizeAndReturnHash_alwaysInline() + } + + @inline(__always) + internal mutating func _finalizeAndReturnHash_alwaysInline() -> UInt64 { + if let finalizedHash = finalizedHash { + return finalizedHash + } + + _sanityCheck((0..<8).contains(dataTailByteCount)) + + hashedByteCount += UInt64(dataTailByteCount) + let b: UInt64 = (hashedByteCount << 56) | dataTail + + v3 ^= b + for _ in 0..<${c_rounds} { + _SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3) + } + v0 ^= b + + v2 ^= 0xff + + for _ in 0..<${d_rounds} { + _SipHashDetail._sipRound(v0: &v0, v1: &v1, v2: &v2, v3: &v3) + } + + finalizedHash = v0 ^ v1 ^ v2 ^ v3 + return finalizedHash! + } + + internal mutating func _finalizeAndReturnIntHash() -> Int { + let hash: UInt64 = finalizeAndReturnHash() +#if arch(i386) || arch(arm) + return Int(truncatingBitPattern: hash) +#elseif arch(x86_64) || arch(arm64) || arch(powerpc64) || arch(powerpc64le) || arch(s390x) + return Int(Int64(bitPattern: hash)) +#endif + } + + // FIXME(ABI): Use UnsafeRawBufferPointer. + public // @testable + static func hash( + data: UnsafeRawPointer, + dataByteCount: Int, + key: (UInt64, UInt64) + ) -> UInt64 { + return ${Self}._hash_alwaysInline( + data: data, + dataByteCount: dataByteCount, + key: key) + } + + // FIXME(ABI): Use UnsafeRawBufferPointer. + @inline(__always) + public // @testable + static func _hash_alwaysInline( + data: UnsafeRawPointer, + dataByteCount: Int, + key: (UInt64, UInt64) + ) -> UInt64 { + var context = ${Self}(key: key) + context._append_alwaysInline(data, byteCount: dataByteCount) + return context._finalizeAndReturnHash_alwaysInline() + } +} +% end diff --git a/stdlib/public/core/StringHashable.swift b/stdlib/public/core/StringHashable.swift index e3c58c34f3655..2b115b4e49968 100644 --- a/stdlib/public/core/StringHashable.swift +++ b/stdlib/public/core/StringHashable.swift @@ -20,6 +20,50 @@ func _stdlib_NSStringHashValue(_ str: AnyObject, _ isASCII: Bool) -> Int func _stdlib_NSStringHashValuePointer(_ str: OpaquePointer, _ isASCII: Bool) -> Int #endif +extension _Unicode { + internal static func hashASCII( + _ string: UnsafeBufferPointer + ) -> Int { + let collationTable = _swift_stdlib_unicode_getASCIICollationTable() + var hasher = _SipHash13Context(key: _Hashing.secretKey) + for c in string { + _precondition(c <= 127) + let element = collationTable[Int(c)] + // Ignore zero valued collation elements. They don't participate in the + // ordering relation. + if element != 0 { + hasher.append(element) + } + } + return hasher._finalizeAndReturnIntHash() + } + + internal static func hashUTF16( + _ string: UnsafeBufferPointer + ) -> Int { + let collationIterator = _swift_stdlib_unicodeCollationIterator_create( + string.baseAddress!, + UInt32(string.count)) + defer { _swift_stdlib_unicodeCollationIterator_delete(collationIterator) } + + var hasher = _SipHash13Context(key: _Hashing.secretKey) + while true { + var hitEnd = false + let element = + _swift_stdlib_unicodeCollationIterator_next(collationIterator, &hitEnd) + if hitEnd { + break + } + // Ignore zero valued collation elements. They don't participate in the + // ordering relation. + if element != 0 { + hasher.append(element) + } + } + return hasher._finalizeAndReturnIntHash() + } +} + extension String : Hashable { /// The string's hash value. /// @@ -49,10 +93,12 @@ extension String : Hashable { } #else if let asciiBuffer = self._core.asciiBuffer { - return _swift_stdlib_unicode_hash_ascii( - asciiBuffer.baseAddress!, Int32(asciiBuffer.count)) + return _Unicode.hashASCII(UnsafeBufferPointer( + start: asciiBuffer.baseAddress!, + count: asciiBuffer.count)) } else { - return _swift_stdlib_unicode_hash(_core.startUTF16, Int32(_core.count)) + return _Unicode.hashUTF16( + UnsafeBufferPointer(start: _core.startUTF16, count: _core.count)) } #endif } diff --git a/stdlib/public/core/Unicode.swift b/stdlib/public/core/Unicode.swift index 750cc397bbf4b..c2f3a2948930f 100644 --- a/stdlib/public/core/Unicode.swift +++ b/stdlib/public/core/Unicode.swift @@ -1176,3 +1176,7 @@ extension UTF16 { Builtin.unreachable() } } + +/// A namespace for Unicode utilities. +internal enum _Unicode {} + diff --git a/stdlib/public/stubs/GlobalObjects.cpp b/stdlib/public/stubs/GlobalObjects.cpp index f2d463f03d2be..bc833bade056c 100644 --- a/stdlib/public/stubs/GlobalObjects.cpp +++ b/stdlib/public/stubs/GlobalObjects.cpp @@ -18,6 +18,8 @@ #include "../SwiftShims/GlobalObjects.h" #include "swift/Runtime/Metadata.h" +#include "swift/Runtime/Debug.h" +#include namespace swift { // FIXME(ABI): does this declaration need SWIFT_RUNTIME_STDLIB_INTERFACE? @@ -39,6 +41,32 @@ swift::_SwiftEmptyArrayStorage swift::_swiftEmptyArrayStorage = { } }; +static __swift_uint64_t randomUInt64() { +#if defined(__APPLE__) + return static_cast<__swift_uint64_t>(arc4random()) | + (static_cast<__swift_uint64_t>(arc4random()) << 32); +#else + auto devUrandom = fopen("/dev/urandom", "r"); + if (!devUrandom) { + swift::fatalError(/* flags = */ 0, "Opening \"/dev/urandom\" failed"); + } + uint64_t result; + if (fread(&result, sizeof(result), 1, devUrandom) != 1) { + swift::fatalError(/* flags = */ 0, "Reading from \"/dev/urandom\" failed"); + } + if (fclose(devUrandom)) { + swift::fatalError(/* flags = */ 0, "Closing \"/dev/urandom\" failed"); + } + return result; +#endif +} + +SWIFT_ALLOWED_RUNTIME_GLOBAL_CTOR_BEGIN +swift::_SwiftHashingSecretKey swift::_swift_stdlib_Hashing_secretKey = { + randomUInt64(), randomUInt64() +}; +SWIFT_ALLOWED_RUNTIME_GLOBAL_CTOR_END + __swift_uint64_t swift::_swift_stdlib_HashingDetail_fixedSeedOverride = 0; namespace llvm { namespace hashing { namespace detail { diff --git a/stdlib/public/stubs/UnicodeNormalization.cpp b/stdlib/public/stubs/UnicodeNormalization.cpp index f6cabc78e9831..339fe2d52e630 100644 --- a/stdlib/public/stubs/UnicodeNormalization.cpp +++ b/stdlib/public/stubs/UnicodeNormalization.cpp @@ -57,7 +57,6 @@ static const UCollator *GetRootCollator() { /// This class caches the collation element results for the ASCII subset of /// unicode. class ASCIICollation { - int32_t CollationTable[128]; public: friend class swift::Lazy; @@ -66,6 +65,8 @@ class ASCIICollation { return &theTable.get(); } + int32_t CollationTable[128]; + /// Maps an ASCII character to a collation element priority as would be /// returned by a call to ucol_next(). int32_t map(unsigned char c) const { @@ -189,91 +190,42 @@ swift::_swift_stdlib_unicode_compare_utf8_utf8(const unsigned char *LeftString, return Diff; } -// These functions use murmurhash2 in its 32 and 64bit forms, which are -// differentiated by the constants defined below. This seems like a good choice -// for now because it operates efficiently in blocks rather than bytes, and -// the data returned from the collation iterator comes in 4byte chunks. -#if __arm__ || __i386__ -#define HASH_SEED 0x88ddcc21 -#define HASH_M 0x5bd1e995 -#define HASH_R 24 -#else -#define HASH_SEED 0x429b126688ddcc21 -#define HASH_M 0xc6a4a7935bd1e995 -#define HASH_R 47 -#endif - -static intptr_t hashChunk(const UCollator *Collator, intptr_t HashState, - const uint16_t *Str, uint32_t Length, - UErrorCode *ErrorCode) { +void *swift::_swift_stdlib_unicodeCollationIterator_create( + const __swift_uint16_t *Str, __swift_uint32_t Length) { + UErrorCode ErrorCode = U_ZERO_ERROR; #if defined(__CYGWIN__) || defined(_MSC_VER) UCollationElements *CollationIterator = ucol_openElements( - Collator, reinterpret_cast(Str), Length, ErrorCode); + GetRootCollator(), reinterpret_cast(Str), Length, + &ErrorCode); #else UCollationElements *CollationIterator = ucol_openElements( - Collator, Str, Length, ErrorCode); + GetRootCollator(), Str, Length, &ErrorCode); #endif - while (U_SUCCESS(*ErrorCode)) { - intptr_t Elem = ucol_next(CollationIterator, ErrorCode); - // Ignore zero valued collation elements. They don't participate in the - // ordering relation. - if (Elem == 0) - continue; - if (Elem != UCOL_NULLORDER) { - Elem *= HASH_M; - Elem ^= Elem >> HASH_R; - Elem *= HASH_M; - - HashState *= HASH_M; - HashState ^= Elem; - } else { - break; - } + if (U_FAILURE(ErrorCode)) { + swift::crash("_swift_stdlib_unicodeCollationIterator_create: ucol_openElements() failed."); } - ucol_closeElements(CollationIterator); - return HashState; -} - -static intptr_t hashFinish(intptr_t HashState) { - HashState ^= HashState >> HASH_R; - HashState *= HASH_M; - HashState ^= HashState >> HASH_R; - return HashState; + return CollationIterator; } -intptr_t -swift::_swift_stdlib_unicode_hash(const uint16_t *Str, int32_t Length) { +__swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next( + void *CollationIterator, bool *HitEnd) { UErrorCode ErrorCode = U_ZERO_ERROR; - intptr_t HashState = HASH_SEED; - HashState = hashChunk(GetRootCollator(), HashState, Str, Length, &ErrorCode); - + auto Result = ucol_next( + static_cast(CollationIterator), &ErrorCode); if (U_FAILURE(ErrorCode)) { - swift::crash("hashChunk: Unexpected error hashing unicode string."); + swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed."); } - return hashFinish(HashState); + *HitEnd = (Result == UCOL_NULLORDER); + return Result; } -intptr_t swift::_swift_stdlib_unicode_hash_ascii(const unsigned char *Str, - int32_t Length) { - const ASCIICollation *Table = ASCIICollation::getTable(); - intptr_t HashState = HASH_SEED; - int32_t Pos = 0; - while (Pos < Length) { - const unsigned char c = Str[Pos++]; - assert((c & 0x80) == 0 && "This table only exists for the ASCII subset"); - intptr_t Elem = Table->map(c); - // Ignore zero valued collation elements. They don't participate in the - // ordering relation. - if (Elem == 0) - continue; - Elem *= HASH_M; - Elem ^= Elem >> HASH_R; - Elem *= HASH_M; +void swift::_swift_stdlib_unicodeCollationIterator_delete( + void *CollationIterator) { + ucol_closeElements(static_cast(CollationIterator)); +} - HashState *= HASH_M; - HashState ^= Elem; - } - return hashFinish(HashState); +const __swift_int32_t *swift::_swift_stdlib_unicode_getASCIICollationTable() { + return ASCIICollation::getTable()->CollationTable; } /// Convert the unicode string to uppercase. This function will return the diff --git a/validation-test/stdlib/SipHash.swift b/validation-test/stdlib/SipHash.swift new file mode 100644 index 0000000000000..b0585fa7f5508 --- /dev/null +++ b/validation-test/stdlib/SipHash.swift @@ -0,0 +1,342 @@ +// RUN: %target-run-simple-swiftgyb +// REQUIRES: executable_test + +import StdlibUnittest + +let SipHashTests = TestSuite("SipHashTests") + +struct SipHashTest { + let input: [UInt8] + let key: (UInt64, UInt64) + let output: UInt64 + + /// Test vector from the reference C implementation. + /// + /// SipHash output with + /// + /// key = 00 01 02 ... + /// + /// and + /// + /// input = (empty string) + /// input = 00 (1 byte) + /// input = 00 01 (2 bytes) + /// input = 00 01 02 (3 bytes) + /// ... + /// input = 00 01 02 ... 3e (63 bytes) + init(referenceVectorIndex i: Int, output: UInt64) { + self.input = Array(0.. + // * Daniel J. Bernstein + SipHashTest(referenceVectorIndex: 0, output: 0x726fdb47dd0e0e31), + SipHashTest(referenceVectorIndex: 1, output: 0x74f839c593dc67fd), + SipHashTest(referenceVectorIndex: 2, output: 0x0d6c8009d9a94f5a), + SipHashTest(referenceVectorIndex: 3, output: 0x85676696d7fb7e2d), + SipHashTest(referenceVectorIndex: 4, output: 0xcf2794e0277187b7), + SipHashTest(referenceVectorIndex: 5, output: 0x18765564cd99a68d), + SipHashTest(referenceVectorIndex: 6, output: 0xcbc9466e58fee3ce), + SipHashTest(referenceVectorIndex: 7, output: 0xab0200f58b01d137), + SipHashTest(referenceVectorIndex: 8, output: 0x93f5f5799a932462), + SipHashTest(referenceVectorIndex: 9, output: 0x9e0082df0ba9e4b0), + SipHashTest(referenceVectorIndex: 10, output: 0x7a5dbbc594ddb9f3), + SipHashTest(referenceVectorIndex: 11, output: 0xf4b32f46226bada7), + SipHashTest(referenceVectorIndex: 12, output: 0x751e8fbc860ee5fb), + SipHashTest(referenceVectorIndex: 13, output: 0x14ea5627c0843d90), + SipHashTest(referenceVectorIndex: 14, output: 0xf723ca908e7af2ee), + SipHashTest(referenceVectorIndex: 15, output: 0xa129ca6149be45e5), + SipHashTest(referenceVectorIndex: 16, output: 0x3f2acc7f57c29bdb), + SipHashTest(referenceVectorIndex: 17, output: 0x699ae9f52cbe4794), + SipHashTest(referenceVectorIndex: 18, output: 0x4bc1b3f0968dd39c), + SipHashTest(referenceVectorIndex: 19, output: 0xbb6dc91da77961bd), + SipHashTest(referenceVectorIndex: 20, output: 0xbed65cf21aa2ee98), + SipHashTest(referenceVectorIndex: 21, output: 0xd0f2cbb02e3b67c7), + SipHashTest(referenceVectorIndex: 22, output: 0x93536795e3a33e88), + SipHashTest(referenceVectorIndex: 23, output: 0xa80c038ccd5ccec8), + SipHashTest(referenceVectorIndex: 24, output: 0xb8ad50c6f649af94), + SipHashTest(referenceVectorIndex: 25, output: 0xbce192de8a85b8ea), + SipHashTest(referenceVectorIndex: 26, output: 0x17d835b85bbb15f3), + SipHashTest(referenceVectorIndex: 27, output: 0x2f2e6163076bcfad), + SipHashTest(referenceVectorIndex: 28, output: 0xde4daaaca71dc9a5), + SipHashTest(referenceVectorIndex: 29, output: 0xa6a2506687956571), + SipHashTest(referenceVectorIndex: 30, output: 0xad87a3535c49ef28), + SipHashTest(referenceVectorIndex: 31, output: 0x32d892fad841c342), + SipHashTest(referenceVectorIndex: 32, output: 0x7127512f72f27cce), + SipHashTest(referenceVectorIndex: 33, output: 0xa7f32346f95978e3), + SipHashTest(referenceVectorIndex: 34, output: 0x12e0b01abb051238), + SipHashTest(referenceVectorIndex: 35, output: 0x15e034d40fa197ae), + SipHashTest(referenceVectorIndex: 36, output: 0x314dffbe0815a3b4), + SipHashTest(referenceVectorIndex: 37, output: 0x027990f029623981), + SipHashTest(referenceVectorIndex: 38, output: 0xcadcd4e59ef40c4d), + SipHashTest(referenceVectorIndex: 39, output: 0x9abfd8766a33735c), + SipHashTest(referenceVectorIndex: 40, output: 0x0e3ea96b5304a7d0), + SipHashTest(referenceVectorIndex: 41, output: 0xad0c42d6fc585992), + SipHashTest(referenceVectorIndex: 42, output: 0x187306c89bc215a9), + SipHashTest(referenceVectorIndex: 43, output: 0xd4a60abcf3792b95), + SipHashTest(referenceVectorIndex: 44, output: 0xf935451de4f21df2), + SipHashTest(referenceVectorIndex: 45, output: 0xa9538f0419755787), + SipHashTest(referenceVectorIndex: 46, output: 0xdb9acddff56ca510), + SipHashTest(referenceVectorIndex: 47, output: 0xd06c98cd5c0975eb), + SipHashTest(referenceVectorIndex: 48, output: 0xe612a3cb9ecba951), + SipHashTest(referenceVectorIndex: 49, output: 0xc766e62cfcadaf96), + SipHashTest(referenceVectorIndex: 50, output: 0xee64435a9752fe72), + SipHashTest(referenceVectorIndex: 51, output: 0xa192d576b245165a), + SipHashTest(referenceVectorIndex: 52, output: 0x0a8787bf8ecb74b2), + SipHashTest(referenceVectorIndex: 53, output: 0x81b3e73d20b49b6f), + SipHashTest(referenceVectorIndex: 54, output: 0x7fa8220ba3b2ecea), + SipHashTest(referenceVectorIndex: 55, output: 0x245731c13ca42499), + SipHashTest(referenceVectorIndex: 56, output: 0xb78dbfaf3a8d83bd), + SipHashTest(referenceVectorIndex: 57, output: 0xea1ad565322a1a0b), + SipHashTest(referenceVectorIndex: 58, output: 0x60e61c23a3795013), + SipHashTest(referenceVectorIndex: 59, output: 0x6606d7e446282b93), + SipHashTest(referenceVectorIndex: 60, output: 0x6ca4ecb15c5f91e1), + SipHashTest(referenceVectorIndex: 61, output: 0x9f626da15c9625f3), + SipHashTest(referenceVectorIndex: 62, output: 0xe51b38608ef25f57), + SipHashTest(referenceVectorIndex: 63, output: 0x958a324ceb064572), + // End of reference test vectors. + + SipHashTest( + input: [ + 0x72, 0xdc, 0xde, 0xd4, 0x6d, 0xb4, 0xc8, 0xa1, + 0xcf, 0x22, 0xe2, 0x7f, 0xe3, 0xf6, 0xe5, 0x6d, + 0x8b, 0x66, 0x0b, 0xaf, 0xba, 0x16, 0x25, 0xf3, + 0x63, 0x8e, 0x69, 0x80, 0xf3, 0x7e, 0xd6, 0xe3, + ], + key: (0xa3432fc680796c34, 0x1173946a79aeaae5), + output: 0x058b04535972ff2b), +] + +let sipHash13Tests: [SipHashTest] = [ + SipHashTest(referenceVectorIndex: 0, output: 0xabac0158050fc4dc), + SipHashTest(referenceVectorIndex: 1, output: 0xc9f49bf37d57ca93), + SipHashTest(referenceVectorIndex: 2, output: 0x82cb9b024dc7d44d), + SipHashTest(referenceVectorIndex: 3, output: 0x8bf80ab8e7ddf7fb), + SipHashTest(referenceVectorIndex: 4, output: 0xcf75576088d38328), + SipHashTest(referenceVectorIndex: 5, output: 0xdef9d52f49533b67), + SipHashTest(referenceVectorIndex: 6, output: 0xc50d2b50c59f22a7), + SipHashTest(referenceVectorIndex: 7, output: 0xd3927d989bb11140), + SipHashTest(referenceVectorIndex: 8, output: 0x369095118d299a8e), + SipHashTest(referenceVectorIndex: 9, output: 0x25a48eb36c063de4), + SipHashTest(referenceVectorIndex: 10, output: 0x79de85ee92ff097f), + SipHashTest(referenceVectorIndex: 11, output: 0x70c118c1f94dc352), + SipHashTest(referenceVectorIndex: 12, output: 0x78a384b157b4d9a2), + SipHashTest(referenceVectorIndex: 13, output: 0x306f760c1229ffa7), + SipHashTest(referenceVectorIndex: 14, output: 0x605aa111c0f95d34), + SipHashTest(referenceVectorIndex: 15, output: 0xd320d86d2a519956), + SipHashTest(referenceVectorIndex: 16, output: 0xcc4fdd1a7d908b66), + SipHashTest(referenceVectorIndex: 17, output: 0x9cf2689063dbd80c), + SipHashTest(referenceVectorIndex: 18, output: 0x8ffc389cb473e63e), + SipHashTest(referenceVectorIndex: 19, output: 0xf21f9de58d297d1c), + SipHashTest(referenceVectorIndex: 20, output: 0xc0dc2f46a6cce040), + SipHashTest(referenceVectorIndex: 21, output: 0xb992abfe2b45f844), + SipHashTest(referenceVectorIndex: 22, output: 0x7ffe7b9ba320872e), + SipHashTest(referenceVectorIndex: 23, output: 0x525a0e7fdae6c123), + SipHashTest(referenceVectorIndex: 24, output: 0xf464aeb267349c8c), + SipHashTest(referenceVectorIndex: 25, output: 0x45cd5928705b0979), + SipHashTest(referenceVectorIndex: 26, output: 0x3a3e35e3ca9913a5), + SipHashTest(referenceVectorIndex: 27, output: 0xa91dc74e4ade3b35), + SipHashTest(referenceVectorIndex: 28, output: 0xfb0bed02ef6cd00d), + SipHashTest(referenceVectorIndex: 29, output: 0x88d93cb44ab1e1f4), + SipHashTest(referenceVectorIndex: 30, output: 0x540f11d643c5e663), + SipHashTest(referenceVectorIndex: 31, output: 0x2370dd1f8c21d1bc), + SipHashTest(referenceVectorIndex: 32, output: 0x81157b6c16a7b60d), + SipHashTest(referenceVectorIndex: 33, output: 0x4d54b9e57a8ff9bf), + SipHashTest(referenceVectorIndex: 34, output: 0x759f12781f2a753e), + SipHashTest(referenceVectorIndex: 35, output: 0xcea1a3bebf186b91), + SipHashTest(referenceVectorIndex: 36, output: 0x2cf508d3ada26206), + SipHashTest(referenceVectorIndex: 37, output: 0xb6101c2da3c33057), + SipHashTest(referenceVectorIndex: 38, output: 0xb3f47496ae3a36a1), + SipHashTest(referenceVectorIndex: 39, output: 0x626b57547b108392), + SipHashTest(referenceVectorIndex: 40, output: 0xc1d2363299e41531), + SipHashTest(referenceVectorIndex: 41, output: 0x667cc1923f1ad944), + SipHashTest(referenceVectorIndex: 42, output: 0x65704ffec8138825), + SipHashTest(referenceVectorIndex: 43, output: 0x24f280d1c28949a6), + SipHashTest(referenceVectorIndex: 44, output: 0xc2ca1cedfaf8876b), + SipHashTest(referenceVectorIndex: 45, output: 0xc2164bfc9f042196), + SipHashTest(referenceVectorIndex: 46, output: 0xa16e9c9368b1d623), + SipHashTest(referenceVectorIndex: 47, output: 0x49fb169c8b5114fd), + SipHashTest(referenceVectorIndex: 48, output: 0x9f3143f8df074c46), + SipHashTest(referenceVectorIndex: 49, output: 0xc6fdaf2412cc86b3), + SipHashTest(referenceVectorIndex: 50, output: 0x7eaf49d10a52098f), + SipHashTest(referenceVectorIndex: 51, output: 0x1cf313559d292f9a), + SipHashTest(referenceVectorIndex: 52, output: 0xc44a30dda2f41f12), + SipHashTest(referenceVectorIndex: 53, output: 0x36fae98943a71ed0), + SipHashTest(referenceVectorIndex: 54, output: 0x318fb34c73f0bce6), + SipHashTest(referenceVectorIndex: 55, output: 0xa27abf3670a7e980), + SipHashTest(referenceVectorIndex: 56, output: 0xb4bcc0db243c6d75), + SipHashTest(referenceVectorIndex: 57, output: 0x23f8d852fdb71513), + SipHashTest(referenceVectorIndex: 58, output: 0x8f035f4da67d8a08), + SipHashTest(referenceVectorIndex: 59, output: 0xd89cd0e5b7e8f148), + SipHashTest(referenceVectorIndex: 60, output: 0xf6f4e6bcf7a644ee), + SipHashTest(referenceVectorIndex: 61, output: 0xaec59ad80f1837f2), + SipHashTest(referenceVectorIndex: 62, output: 0xc3b2f6154b6694e0), + SipHashTest(referenceVectorIndex: 63, output: 0x9d199062b7bbb3a8), + + SipHashTest( + input: [ + 0x72, 0xdc, 0xde, 0xd4, 0x6d, 0xb4, 0xc8, 0xa1, + 0xcf, 0x22, 0xe2, 0x7f, 0xe3, 0xf6, 0xe5, 0x6d, + 0x8b, 0x66, 0x0b, 0xaf, 0xba, 0x16, 0x25, 0xf3, + 0x63, 0x8e, 0x69, 0x80, 0xf3, 0x7e, 0xd6, 0xe3, + ], + key: (0xa3432fc680796c34, 0x1173946a79aeaae5), + output: 0x4d457d818f46941d), +] + +let incrementalPatterns: [[Int]] = [ + [1], [2], [3], [4], [5], [6], [7], [8], [9], + [15], [16], [17], + [31], [32], [33], + [0, 1], + [1, 2], + [1, 3, 5], + [1, 7], + [1, 9], + [7, 9], +] + +func loadUnalignedUInt64LE( + from p: UnsafeRawPointer +) -> UInt64 { + return + UInt64(p.load(fromByteOffset: 0, as: UInt8.self)) | + (UInt64(p.load(fromByteOffset: 1, as: UInt8.self)) << 8) | + (UInt64(p.load(fromByteOffset: 2, as: UInt8.self)) << 16) | + (UInt64(p.load(fromByteOffset: 3, as: UInt8.self)) << 24) | + (UInt64(p.load(fromByteOffset: 4, as: UInt8.self)) << 32) | + (UInt64(p.load(fromByteOffset: 5, as: UInt8.self)) << 40) | + (UInt64(p.load(fromByteOffset: 6, as: UInt8.self)) << 48) | + (UInt64(p.load(fromByteOffset: 7, as: UInt8.self)) << 56) +} + +func loadUnalignedUInt32LE( + from p: UnsafeRawPointer +) -> UInt32 { + return + UInt32(p.load(fromByteOffset: 0, as: UInt8.self)) | + (UInt32(p.load(fromByteOffset: 1, as: UInt8.self)) << 8) | + (UInt32(p.load(fromByteOffset: 2, as: UInt8.self)) << 16) | + (UInt32(p.load(fromByteOffset: 3, as: UInt8.self)) << 24) +} + +func loadUnalignedUIntLE( + from p: UnsafeRawPointer +) -> UInt { +#if arch(i386) || arch(arm) + return UInt(loadUnalignedUInt32LE(from: p)) +#elseif arch(x86_64) || arch(arm64) || arch(powerpc64) || arch(powerpc64le) || arch(s390x) + return UInt(loadUnalignedUInt64LE(from: p)) +#endif +} + +% for data_type in ['Int', 'Int64', 'Int32']: +func loadUnaligned${data_type}LE( + from p: UnsafeRawPointer +) -> ${data_type} { + return ${data_type}(bitPattern: loadUnalignedU${data_type}LE(from: p)) +} +% end + +% for data_type in ['UInt', 'Int', 'UInt64', 'Int64', 'UInt32', 'Int32']: +func loadUnaligned${data_type}( + from p: UnsafeRawPointer +) -> ${data_type} { + return ${data_type}(littleEndian: loadUnaligned${data_type}LE(from: p)) +} +% end + +% for (Self, tests) in [ +% ('_SipHash13Context', 'sipHash13Tests'), +% ('_SipHash24Context', 'sipHash24Tests') +% ]: +SipHashTests.test("${Self}/Oneshot").forEach(in: ${tests}) { + test in + + expectEqual( + test.output, + ${Self}.hash( + data: test.input, + dataByteCount: test.input.count, + key: test.key)) +} + +SipHashTests.test("${Self}.append(UnsafeRawPointer)") + .forEach(in: cartesianProduct(${tests}, incrementalPatterns)) { + test_ in + let (test, pattern) = test_ + + var context = ${Self}(key: test.key) + var startIndex = 0 + var chunkSizeIndex = 0 + while startIndex != test.input.endIndex { + let chunkSize = min( + pattern[chunkSizeIndex], + test.input.endIndex - startIndex) + context.append( + Array(test.input[startIndex..<(startIndex+chunkSize)]), + byteCount: chunkSize) + startIndex += chunkSize + chunkSizeIndex += 1 + chunkSizeIndex %= pattern.count + } + expectEqual( + test.output, + context.finalizeAndReturnHash()) + + // Check that we can query the hash value more than once. + expectEqual( + test.output, + context.finalizeAndReturnHash()) +} + +% for data_type in ['UInt', 'Int', 'UInt64', 'Int64', 'UInt32', 'Int32']: +SipHashTests.test("${Self}.append(${data_type})").forEach(in: ${tests}) { + test in + + var context = ${Self}(key: test.key) + + let chunkSize = MemoryLayout<${data_type}>.size + + var startIndex = 0 + let endIndex = test.input.count - (test.input.count % chunkSize) + while startIndex != endIndex { + context.append( + loadUnaligned${data_type}( + from: Array( + test.input[startIndex..<(startIndex+chunkSize)]))) + startIndex += chunkSize + } + context.append( + Array(test.input.suffix(from: endIndex)), + byteCount: test.input.count - endIndex) + + expectEqual( + test.output, + context.finalizeAndReturnHash()) +} +% end + +SipHashTests.test("${Self}/AppendAfterFinalizing") { + var context = ${Self}(key: (0, 0)) + _ = context.finalizeAndReturnHash() + expectCrashLater() + context.append([], byteCount: 0) +} +% end + +runAllTests() +