Skip to content

Commit 3b3820e

Browse files
committed
Make size run in constant time
1 parent 27c37ce commit 3b3820e

File tree

10 files changed

+1079
-497
lines changed

10 files changed

+1079
-497
lines changed

Data/HashMap/Internal.hs

Lines changed: 545 additions & 324 deletions
Large diffs are not rendered by default.

Data/HashMap/Internal/Array.hs

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
{-# LANGUAGE BangPatterns #-}
2-
{-# LANGUAGE CPP #-}
3-
{-# LANGUAGE MagicHash #-}
4-
{-# LANGUAGE Rank2Types #-}
5-
{-# LANGUAGE ScopedTypeVariables #-}
6-
{-# LANGUAGE TemplateHaskellQuotes #-}
7-
{-# LANGUAGE UnboxedTuples #-}
1+
{-# LANGUAGE BangPatterns #-}
2+
{-# LANGUAGE CPP #-}
3+
{-# LANGUAGE DeriveLift #-}
4+
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
5+
{-# LANGUAGE MagicHash #-}
6+
{-# LANGUAGE Rank2Types #-}
7+
{-# LANGUAGE ScopedTypeVariables #-}
8+
{-# LANGUAGE TemplateHaskellQuotes #-}
9+
{-# LANGUAGE UnboxedTuples #-}
810
{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-}
911
{-# OPTIONS_HADDOCK not-home #-}
1012

@@ -28,6 +30,10 @@
2830
module Data.HashMap.Internal.Array
2931
( Array(..)
3032
, MArray(..)
33+
, RunResA (..)
34+
, RunResM (..)
35+
, Size (..)
36+
, Sized (..)
3137

3238
-- * Creation
3339
, new
@@ -47,6 +53,7 @@ module Data.HashMap.Internal.Array
4753
, index#
4854
, update
4955
, updateWith'
56+
, updateWithInternal'
5057
, unsafeUpdateM
5158
, insert
5259
, insertM
@@ -57,6 +64,8 @@ module Data.HashMap.Internal.Array
5764
, unsafeThaw
5865
, unsafeSameArray
5966
, run
67+
, runInternal
68+
, run2
6069
, copy
6170
, copyM
6271
, cloneM
@@ -288,10 +297,27 @@ unsafeThaw ary
288297
(# s', mary #) -> (# s', MArray mary #)
289298
{-# INLINE unsafeThaw #-}
290299

300+
data RunResA e = RunResA !Size !(Array e)
301+
302+
data RunResM s e = RunResM !Size !(MArray s e)
303+
291304
run :: (forall s . ST s (MArray s e)) -> Array e
292305
run act = runST $ act >>= unsafeFreeze
293306
{-# INLINE run #-}
294307

308+
runInternal :: (forall s . ST s (RunResM s e)) -> RunResA e
309+
runInternal act = runST $ do
310+
RunResM s mary <- act
311+
ary <- unsafeFreeze mary
312+
return (RunResA s ary)
313+
{-# INLINE runInternal #-}
314+
315+
run2 :: (forall s. ST s (MArray s e, a)) -> (Array e, a)
316+
run2 k = runST (do
317+
(marr,b) <- k
318+
arr <- unsafeFreeze marr
319+
return (arr,b))
320+
295321
-- | Unsafely copy the elements of an array. Array bounds are not checked.
296322
copy :: Array e -> Int -> MArray s e -> Int -> Int -> ST s ()
297323
copy !src !_sidx@(I# sidx#) !dst !_didx@(I# didx#) _n@(I# n#) =
@@ -360,6 +386,26 @@ updateWith' ary idx f
360386
= update ary idx $! f x
361387
{-# INLINE updateWith' #-}
362388

389+
-- | This newtype wrapper is to avoid confusion when local functions
390+
-- take more than one paramenter of 'Int' type (see 'go' in
391+
-- 'Data.HashMap.Base.unionWithKeyInternal').
392+
newtype Size = Size { unSize :: Int }
393+
deriving (Eq, Ord, Num, Integral, Enum, Real, NFData, TH.Lift)
394+
395+
-- | Helper datatype used in 'updateWithInternal''. Used when a change in
396+
-- a value's size must be returned along with the value itself (typically
397+
-- a hashmap).
398+
data Sized a = Sized {-# UNPACK #-} !Size !a
399+
400+
-- | /O(n)/ Update the element at the given position in this array, by
401+
-- applying a function to it. Evaluates the element to WHNF before
402+
-- inserting it into the array.
403+
updateWithInternal' :: Array e -> Int -> (e -> Sized e) -> RunResA e
404+
updateWithInternal' ary idx f =
405+
let Sized sz e = f (index ary idx)
406+
in RunResA sz (update ary idx e)
407+
{-# INLINE updateWithInternal' #-}
408+
363409
-- | \(O(1)\) Update the element at the given position in this array,
364410
-- without copying.
365411
unsafeUpdateM :: Array e -> Int -> e -> ST s ()

Data/HashMap/Internal/Debug.hs

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ module Data.HashMap.Internal.Debug
2828
import Data.Bits (complement, countTrailingZeros, popCount, shiftL,
2929
unsafeShiftL, (.&.), (.|.))
3030
import Data.Hashable (Hashable)
31-
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..),
31+
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..), Tree (..),
3232
bitsPerSubkey, fullBitmap, hash,
3333
isLeafOrCollision, maxChildren, sparseIndex)
3434
import Data.Semigroup (Sum (..))
@@ -65,6 +65,7 @@ data Error k
6565
| INV8_bad_Full_size !Int
6666
| INV9_Collision_size !Int
6767
| INV10_Collision_duplicate_key k !Hash
68+
| INV11_Negative_HM_Size !Int
6869
deriving (Eq, Show)
6970

7071
-- TODO: Name this 'Index'?!
@@ -95,55 +96,60 @@ hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph
9596
maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l')
9697

9798
valid :: Hashable k => HashMap k v -> Validity k
98-
valid Empty = Valid
99-
valid t = validInternal initialSubHashPath t
99+
valid (HashMap sz hm) = if sz >= 0
100+
then valid' hm
101+
else Invalid (INV11_Negative_HM_Size $ A.unSize sz) initialSubHashPath
100102
where
101-
validInternal p Empty = Invalid INV1_internal_Empty p
102-
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
103-
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
104-
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
105-
validInternal p (Full ary) = validFull p ary
103+
valid' :: Hashable k => Tree k v -> Validity k
104+
valid' Empty = Valid
105+
valid' t = validInternal initialSubHashPath t
106+
where
107+
validInternal p Empty = Invalid INV1_internal_Empty p
108+
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
109+
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
110+
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
111+
validInternal p (Full ary) = validFull p ary
106112

107-
validHash p h | hashMatchesSubHashPath p h = Valid
108-
| otherwise = Invalid (INV6_misplaced_hash h) p
113+
validHash p h | hashMatchesSubHashPath p h = Valid
114+
| otherwise = Invalid (INV6_misplaced_hash h) p
109115

110-
validLeaf p h (L k _) | hash k == h = Valid
111-
| otherwise = Invalid (INV7_key_hash_mismatch k h) p
116+
validLeaf p h (L k _) | hash k == h = Valid
117+
| otherwise = Invalid (INV7_key_hash_mismatch k h) p
112118

113-
validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
114-
where
115-
n = A.length ary
116-
validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p
117-
| otherwise = Valid
118-
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
119-
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
120-
| otherwise = Invalid (INV10_Collision_duplicate_key k h) p
121-
122-
validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary
123-
where
124-
validBitmap | b .&. complement fullBitmap == 0 = Valid
125-
| otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p
126-
n = A.length ary
127-
validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p
128-
| popCount b == n = Valid
129-
| otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p
130-
131-
validSubTrees p b ary
132-
| A.length ary == 1
133-
, isLeafOrCollision (A.index ary 0)
134-
= Invalid INV5_BitmapIndexed_invalid_single_subtree p
135-
| otherwise = go b
136-
where
137-
go 0 = Valid
138-
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
119+
validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
139120
where
140-
c = countTrailingZeros b'
141-
m = 1 `unsafeShiftL` c
142-
i = sparseIndex b m
143-
b'' = b' .&. complement m
144-
145-
validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
146-
where
147-
n = A.length ary
148-
validArraySize | n == maxChildren = Valid
149-
| otherwise = Invalid (INV8_bad_Full_size n) p
121+
n = A.length ary
122+
validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p
123+
| otherwise = Valid
124+
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
125+
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
126+
| otherwise = Invalid (INV10_Collision_duplicate_key k h) p
127+
128+
validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary
129+
where
130+
validBitmap | b .&. complement fullBitmap == 0 = Valid
131+
| otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p
132+
n = A.length ary
133+
validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p
134+
| popCount b == n = Valid
135+
| otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p
136+
137+
validSubTrees p b ary
138+
| A.length ary == 1
139+
, isLeafOrCollision (A.index ary 0)
140+
= Invalid INV5_BitmapIndexed_invalid_single_subtree p
141+
| otherwise = go b
142+
where
143+
go 0 = Valid
144+
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
145+
where
146+
c = countTrailingZeros b'
147+
m = 1 `unsafeShiftL` c
148+
i = sparseIndex b m
149+
b'' = b' .&. complement m
150+
151+
validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
152+
where
153+
n = A.length ary
154+
validArraySize | n == maxChildren = Valid
155+
| otherwise = Invalid (INV8_bad_Full_size n) p

0 commit comments

Comments
 (0)