tahoe-lafs · exarkun · Jan 13, 2023 · Jan 13, 2023 · Jan 18, 2023 · Jan 18, 2023
diff --git a/fec.cabal b/fec.cabal
@@ -1,6 +1,6 @@
 cabal-version:      3.4
 name:               fec
-version:            0.1.1
+version:            0.2.0
 license:            GPL-2.0-or-later
 license-file:       README.rst
 author:             Adam Langley <[email protected]>

diff --git a/haskell/Codec/FEC.hs b/haskell/Codec/FEC.hs
@@ -40,7 +40,6 @@ import Foreign.C.Types
 import Foreign.Marshal.Alloc
 import Foreign.Marshal.Array (withArray, advancePtr)
 import System.IO (withFile, IOMode(..))
-import System.IO.Unsafe (unsafePerformIO)
 
 data CFEC
 data FECParams = FECParams
@@ -82,14 +81,14 @@ isValidConfig k n
 -- | Return a FEC with the given parameters.
 fec :: Int  -- ^ the number of primary blocks
     -> Int  -- ^ the total number blocks, must be < 256
-    -> FECParams
+    -> IO FECParams
 fec k n =
   if not (isValidConfig k n)
      then error $ "Invalid FEC parameters: " ++ show k ++ " " ++ show n
-     else unsafePerformIO (do
+     else do
        cfec <- _new (fromIntegral k) (fromIntegral n)
        params <- newForeignPtr _free cfec
-       return $ FECParams params k n)
+       return $ FECParams params k n
 
 -- | Create a C array of unsigned from an input array
 uintCArray :: [Int] -> ((Ptr CUInt) -> IO a) -> IO a
@@ -129,17 +128,17 @@ createByteStringArray n size f = do
 --   @k@ primary blocks.
 encode :: FECParams
        -> [B.ByteString]  -- ^ a list of @k@ input blocks
-       -> [B.ByteString]  -- ^ (n - k) output blocks
+       -> IO [B.ByteString]  -- ^ (n - k) output blocks
 encode (FECParams params k n) inblocks
   | length inblocks /= k = error "Wrong number of blocks to FEC encode"
   | not (allByteStringsSameLength inblocks) = error "Not all inputs to FEC encode are the same length"
-  | otherwise = unsafePerformIO (do
+  | otherwise = do
       let sz = B.length $ head inblocks
       withForeignPtr params (\cfec -> do
         byteStringsToArray inblocks (\src -> do
           createByteStringArray (n - k) sz (\fecs -> do
             uintCArray [k..(n - 1)] (\block_nums -> do
-              _encode cfec src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz)))))
+              _encode cfec src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz))))
 
 -- | A sort function for tagged assoc lists
 sortTagged :: [(Int, a)] -> [(Int, a)]
@@ -161,13 +160,13 @@ reorderPrimaryBlocks n blocks = inner (sortTagged pBlocks) sBlocks [] where
 --   tagged with its number (see the module comments about block numbering)
 decode :: FECParams
        -> [(Int, B.ByteString)]  -- ^ a list of @k@ blocks and their index
-       -> [B.ByteString]  -- ^ a list the @k@ primary blocks
+       -> IO [B.ByteString]  -- ^ a list the @k@ primary blocks
 decode (FECParams params k n) inblocks
   | length (nub $ map fst inblocks) /= length (inblocks) = error "Duplicate input blocks in FEC decode"
   | any (\f -> f < 0 || f >= n) $ map fst inblocks = error "Invalid block numbers in FEC decode"
   | length inblocks /= k = error "Wrong number of blocks to FEC decode"
   | not (allByteStringsSameLength $ map snd inblocks) = error "Not all inputs to FEC decode are same length"
-  | otherwise = unsafePerformIO (do
+  | otherwise = do
       let sz = B.length $ snd $ head inblocks
           inblocks' = reorderPrimaryBlocks k inblocks
           presentBlocks = map fst inblocks'
@@ -179,7 +178,7 @@ decode (FECParams params k n) inblocks
           let blocks = [0..(n - 1)] \\ presentBlocks
               tagged = zip blocks b
               allBlocks = sortTagged $ tagged ++ inblocks'
-          return $ take k $ map snd allBlocks)))
+          return $ take k $ map snd allBlocks))
 
 -- | Break a ByteString into @n@ parts, equal in length to the original, such
 --   that all @n@ are required to reconstruct the original, but having less
@@ -219,32 +218,38 @@ secureCombine (a : rest) = B.pack $ B.zipWith xor a $ secureCombine rest
 enFEC :: Int  -- ^ the number of blocks required to reconstruct
       -> Int  -- ^ the total number of blocks
       -> B.ByteString  -- ^ the data to divide
-      -> [B.ByteString]  -- ^ the resulting blocks
-enFEC k n input = taggedPrimaryBlocks ++ taggedSecondaryBlocks where
-  taggedPrimaryBlocks = map (uncurry B.cons) $ zip [0..] primaryBlocks
-  taggedSecondaryBlocks = map (uncurry B.cons) $ zip [(fromIntegral k)..] secondaryBlocks
-  remainder = B.length input `mod` k
-  paddingLength = if remainder >= 1 then (k - remainder) else k
-  paddingBytes = (B.replicate (paddingLength - 1) 0) `B.append` (B.singleton $ fromIntegral paddingLength)
-  divide a bs
-    | B.null bs = []
-    | otherwise = (B.take a bs) : (divide a $ B.drop a bs)
-  input' = input `B.append` paddingBytes
-  blockSize = B.length input' `div` k
-  primaryBlocks = divide blockSize input'
-  secondaryBlocks = encode params primaryBlocks
-  params = fec k n
+      -> IO [B.ByteString]  -- ^ the resulting blocks
+enFEC k n input = do
+    params <- fec k n
+    secondaryBlocks <- encode params primaryBlocks
+    pure $ taggedPrimaryBlocks ++ (taggedSecondaryBlocks secondaryBlocks)
+  where
+    taggedPrimaryBlocks = map (uncurry B.cons) $ zip [0..] primaryBlocks
+    taggedSecondaryBlocks sb = map (uncurry B.cons) $ zip [(fromIntegral k)..] sb
+    remainder = B.length input `mod` k
+    paddingLength = if remainder >= 1 then (k - remainder) else k
+    paddingBytes = (B.replicate (paddingLength - 1) 0) `B.append` (B.singleton $ fromIntegral paddingLength)
+    divide a bs
+        | B.null bs = []
+        | otherwise = (B.take a bs) : (divide a $ B.drop a bs)
+    input' = input `B.append` paddingBytes
+    blockSize = B.length input' `div` k
+    primaryBlocks = divide blockSize input'
+
 
 -- | Reverses the operation of @enFEC@.
 deFEC :: Int  -- ^ the number of blocks required (matches call to @enFEC@)
       -> Int  -- ^ the total number of blocks (matches call to @enFEC@)
       -> [B.ByteString]  -- ^ a list of k, or more, blocks from @enFEC@
-      -> B.ByteString
+      -> IO B.ByteString
 deFEC k n inputs
   | length inputs < k = error "Too few inputs to deFEC"
-  | otherwise = B.take (B.length fecOutput - paddingLength) fecOutput where
-      paddingLength = fromIntegral $ B.last fecOutput
+  | otherwise =
+    let
+      paddingLength output = fromIntegral $ B.last output
       inputs' = take k inputs
       taggedInputs = map (\bs -> (fromIntegral $ B.head bs, B.tail bs)) inputs'
-      fecOutput = B.concat $ decode params taggedInputs
-      params = fec k n
+    in do
+      params <- fec k n
+      fecOutput <- B.concat <$> decode params taggedInputs
+      pure $ B.take (B.length fecOutput - paddingLength fecOutput) fecOutput
diff --git a/haskell/test/FECTest.hs b/haskell/test/FECTest.hs
@@ -4,14 +4,18 @@ module Main where
 
 import Test.Hspec
 
+import Control.Monad (replicateM_)
+import Control.Monad.IO.Class (
+    liftIO,
+ )
+
 import qualified Codec.FEC as FEC
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import Data.Int
 import Data.List (sortOn)
 import Data.Serializer
 import Data.Word
-
 import System.IO (IOMode (..), withFile)
 import System.Random
 import Test.QuickCheck
@@ -34,11 +38,6 @@ instance Arbitrary Params where
         total <- choose (required, 255)
         return $ Params required total
 
-instance Arbitrary FEC.FECParams where
-    arbitrary = do
-      (Params required total) <- arbitrary :: Gen Params
-      return $ FEC.fec required total
-
 randomTake :: Int -> Int -> [a] -> [a]
 randomTake seed n values = map snd $ take n sortedValues
   where
@@ -48,71 +47,91 @@ randomTake seed n values = map snd $ take n sortedValues
     rnds = randoms gen
     gen = mkStdGen seed
 
--- | Any combination of the inputs blocks and the output blocks from
--- @FEC.encode@, as long as there are at least @k@ of them, can be recombined
--- using @FEC.decode@ to produce the original input blocks.
---
-testFEC
-  :: FEC.FECParams
-  -- ^ The FEC parameters to exercise.
-  -> Word16
-  -- ^ The length of the blocks to exercise.
-  -> Int
-  -- ^ A random seed to use to be able to vary the choice of which blocks to
-  -- try to decode.
-  -> Bool
-  -- ^ True if the encoded input was reconstructed by decoding, False
-  -- otherwise.
-testFEC fec len seed = FEC.decode fec someTaggedBlocks == origBlocks
-  where
-    -- Construct some blocks.  Each will just be the byte corresponding to the
-    -- block number repeated to satisfy the requested length.
-    origBlocks = B.replicate (fromIntegral len) . fromIntegral <$> [0 .. (FEC.paramK fec - 1)]
-
+{- | Any combination of the inputs blocks and the output blocks from
+ @FEC.encode@, as long as there are at least @k@ of them, can be recombined
+ using @FEC.decode@ to produce the original input blocks.
+-}
+testFEC ::
+    -- | The FEC parameters to exercise.
+    FEC.FECParams ->
+    -- | The length of the blocks to exercise.
+    Word16 ->
+    -- | A random seed to use to be able to vary the choice of which blocks to
+    -- try to decode.
+    Int ->
+    -- | True if the encoded input was reconstructed by decoding, False
+    -- otherwise.
+    Expectation
+testFEC fec len seed = do
     -- Encode the data to produce the "secondary" blocks which (might) add
     -- redundancy to the original blocks.
-    secondaryBlocks = FEC.encode fec origBlocks
+    secondaryBlocks <- FEC.encode fec origBlocks
 
-    -- Tag each block with its block number because the decode API requires
-    -- this information.
-    taggedBlocks = zip [0 ..] (origBlocks ++ secondaryBlocks)
+    let -- Tag each block with its block number because the decode API requires
+        -- this information.
+        taggedBlocks = zip [0 ..] (origBlocks ++ secondaryBlocks)
 
-    -- Choose enough of the tagged blocks (some combination of original and
-    -- secondary) to try to use for decoding.
-    someTaggedBlocks = randomTake seed (FEC.paramK fec) taggedBlocks
+        -- Choose enough of the tagged blocks (some combination of original and
+        -- secondary) to try to use for decoding.
+        someTaggedBlocks = randomTake seed (FEC.paramK fec) taggedBlocks
+
+    decoded <- FEC.decode fec someTaggedBlocks
+    decoded `shouldBe` origBlocks
+  where
+    -- Construct some blocks.  Each will just be the byte corresponding to the
+    -- block number repeated to satisfy the requested length.
+    origBlocks = B.replicate (fromIntegral len) . fromIntegral <$> [0 .. (FEC.paramK fec - 1)]
 
 -- | @FEC.secureDivide@ is the inverse of @FEC.secureCombine@.
 prop_divide :: Word16 -> Word8 -> Word8 -> Property
 prop_divide size byte divisor = monadicIO $ do
-  let input = B.replicate (fromIntegral size + 1) byte
-  parts <- run $ FEC.secureDivide (fromIntegral divisor) input
-  assert (FEC.secureCombine parts == input)
+    let input = B.replicate (fromIntegral size + 1) byte
+    parts <- run $ FEC.secureDivide (fromIntegral divisor) input
+    assert (FEC.secureCombine parts == input)
 
 -- | @FEC.encode@ is the inverse of @FEC.decode@.
-prop_decode :: FEC.FECParams -> Word16 -> Int -> Property
-prop_decode fec len seed = property $ testFEC fec len seed
+prop_decode :: Params -> Word16 -> Int -> Property
+prop_decode (Params required total) len seed =
+    monadicIO . run $ do
+        fec <- FEC.fec required total
+        testFEC fec len seed
+
+prop_primary_copies :: Params -> B.ByteString -> Property
+prop_primary_copies (Params _ total) primary = monadicIO $ do
+    fec <- run $ FEC.fec 1 total
+    secondary <- run $ FEC.encode fec [primary]
+    assert $ all (primary ==) secondary
 
 -- | @FEC.enFEC@ is the inverse of @FEC.deFEC@.
 prop_deFEC :: Params -> B.ByteString -> Property
-prop_deFEC (Params required total) testdata =
-  FEC.deFEC required total minimalShares === testdata
-  where
-    allShares = FEC.enFEC required total testdata
-    minimalShares = take required allShares
+prop_deFEC (Params required total) testdata = monadicIO $ do
+    encoded <- run $ FEC.enFEC required total testdata
+    decoded <- run $ FEC.deFEC required total (take required encoded)
+    assert $ testdata == decoded
 
 main :: IO ()
-main = hspec $ do
-    describe "secureCombine" $ do
-        -- secureDivide is insanely slow and memory hungry for large inputs,
-        -- like QuickCheck will find with it as currently defined.  Just pass
-        -- some small inputs.  It's not clear it's worth fixing (or even
-        -- keeping) thesefunctions.  They don't seem to be used by anything.
-        -- Why are they here?
-        it "is the inverse of secureDivide n" $ once $ prop_divide 1024 65 3
-
-    describe "deFEC" $ do
-        it "is the inverse of enFEC" $ (withMaxSuccess 2000 prop_deFEC)
-
-    describe "decode" $ do
-        it "is (nearly) the inverse of encode" $ (withMaxSuccess 2000 prop_decode)
-        it "works with required=255" $ property $ prop_decode (FEC.fec 255 255)
+main = hspec $
+    parallel $ do
+        describe "secureCombine" $ do
+            -- secureDivide is insanely slow and memory hungry for large inputs,
+            -- like QuickCheck will find with it as currently defined.  Just pass
+            -- some small inputs.  It's not clear it's worth fixing (or even
+            -- keeping) thesefunctions.  They don't seem to be used by anything.
+            -- Why are they here?
+            it "is the inverse of secureDivide n" $ once $ prop_divide 1024 65 3
+
+        describe "deFEC" $ do
+            it "is the inverse of enFEC" $ withMaxSuccess 2000 prop_deFEC
+
+        describe "decode" $ do
+            it "is (nearly) the inverse of encode" $ withMaxSuccess 2000 prop_decode
+            it "works with total=255" $ property $ prop_decode (Params 1 255)
+            it "works with required=255" $ property $ prop_decode (Params 255 255)
+
+        describe "encode" $ do
+            -- Since a single property won't result in parallel execution, add a
+            -- few of these.
+            replicateM_ 10 $
+                it "returns copies of the primary block for all 1 of N encodings" $
+                    property $
+                        withMaxSuccess 10000 prop_primary_copies