From 5c867db4ee42a9f599b1c934e368c618680c562e Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Sun, 15 Dec 2024 20:11:34 +0000 Subject: [PATCH] Use getDirectoryContentsRecursive from directory-ospath-streaming --- .github/workflows/haskell-ci.yml | 10 ++-- .github/workflows/large-files.yml | 4 ++ Codec/Archive/Tar/Entry.hs | 1 + Codec/Archive/Tar/Pack.hs | 69 ++++------------------------ cabal.haskell-ci | 2 +- htar/htar.cabal | 2 +- tar.cabal | 3 +- test/Codec/Archive/Tar/Pack/Tests.hs | 3 +- 8 files changed, 25 insertions(+), 69 deletions(-) diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml index 0e14cf4..56626a4 100644 --- a/.github/workflows/haskell-ci.yml +++ b/.github/workflows/haskell-ci.yml @@ -8,9 +8,9 @@ # # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.19.20241121 +# version: 0.19.20241202 # -# REGENDATA ("0.19.20241121",["github","cabal.project"]) +# REGENDATA ("0.19.20241202",["github","cabal.project"]) # name: Haskell-CI on: @@ -28,9 +28,9 @@ jobs: strategy: matrix: include: - - compiler: ghc-9.12.0.20241114 + - compiler: ghc-9.12.0.20241128 compilerKind: ghc - compilerVersion: 9.12.0.20241114 + compilerVersion: 9.12.0.20241128 setup-method: ghcup-prerelease allow-failure: false - compiler: ghc-9.10.1 @@ -232,7 +232,7 @@ jobs: if $HEADHACKAGE; then echo "allow-newer: $($HCPKG list --simple-output | sed -E 's/([a-zA-Z-]+)-[0-9.]+/*:\1,/g')" >> cabal.project fi - $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: any.$_ installed\n" unless /^(bytestring|directory|filepath|htar|tar|unix)$/; }' >> cabal.project.local + $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: any.$_ installed\n" unless /^(bytestring|directory|filepath|htar|tar|text|unix)$/; }' >> cabal.project.local cat cabal.project cat cabal.project.local - name: dump install plan diff --git a/.github/workflows/large-files.yml b/.github/workflows/large-files.yml index f3a7196..4d3be70 100644 --- a/.github/workflows/large-files.yml +++ b/.github/workflows/large-files.yml @@ -16,6 +16,10 @@ jobs: id: setup-haskell-cabal with: ghc-version: 'latest' + - name: Install system dependencies + run: | + sudo apt-get update -y + sudo apt-get install -y libbz2-dev - name: Update cabal package database run: cabal update - uses: actions/cache@v4 diff --git a/Codec/Archive/Tar/Entry.hs b/Codec/Archive/Tar/Entry.hs index 1fda874..ba5d2ca 100644 --- a/Codec/Archive/Tar/Entry.hs +++ b/Codec/Archive/Tar/Entry.hs @@ -77,3 +77,4 @@ module Codec.Archive.Tar.Entry ( import Codec.Archive.Tar.Types import Codec.Archive.Tar.Pack +import System.Directory.OsPath.Streaming (getDirectoryContentsRecursive) diff --git a/Codec/Archive/Tar/Pack.hs b/Codec/Archive/Tar/Pack.hs index 984f293..0c0d345 100644 --- a/Codec/Archive/Tar/Pack.hs +++ b/Codec/Archive/Tar/Pack.hs @@ -25,15 +25,12 @@ module Codec.Archive.Tar.Pack ( packDirectoryEntry, packSymlinkEntry, longLinkEntry, - - getDirectoryContentsRecursive, ) where import Codec.Archive.Tar.LongNames import Codec.Archive.Tar.PackAscii (filePathToOsPath, osPathToFilePath) import Codec.Archive.Tar.Types -import Control.Monad (join, when, forM, (>=>)) import Data.Bifunctor (bimap) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL @@ -42,22 +39,19 @@ import System.File.OsPath import System.OsPath ( OsPath, () ) import qualified System.OsPath as FilePath.Native - ( addTrailingPathSeparator, hasTrailingPathSeparator, splitDirectories ) + ( addTrailingPathSeparator, hasTrailingPathSeparator ) import System.Directory.OsPath - ( listDirectory, doesDirectoryExist, getModificationTime + ( doesDirectoryExist, getModificationTime , pathIsSymbolicLink, getSymbolicLinkTarget , Permissions(..), getPermissions, getFileSize ) -import System.Directory.OsPath.FileType as FT -import System.Directory.OsPath.Streaming -import Data.Time.Clock - ( UTCTime ) +import System.Directory.OsPath.Types as FT +import System.Directory.OsPath.Streaming (getDirectoryContentsRecursive) import Data.Time.Clock.POSIX ( utcTimeToPOSIXSeconds ) import System.IO ( IOMode(ReadMode), hFileSize ) import System.IO.Unsafe (unsafeInterleaveIO) import Control.Exception (throwIO, SomeException) -import Codec.Archive.Tar.Check.Internal (checkEntrySecurity) -- | Creates a tar archive from a list of directory or files. Any directories -- specified will have their contents included recursively. Paths in the @@ -106,12 +100,16 @@ preparePaths baseDir = fmap concat . interleave . map go isSymlink <- pathIsSymbolicLink abspath if isDir && not isSymlink then do entries <- getDirectoryContentsRecursive abspath - let entries' = map (relpath ) entries + let entries' = map ((relpath ) . addSeparatorIfDir) entries return $ if relpath == mempty then entries' else FilePath.Native.addTrailingPathSeparator relpath : entries' else return [relpath] + addSeparatorIfDir (fn, ty) = case ty of + FT.Directory{} -> FilePath.Native.addTrailingPathSeparator fn + _ -> fn + -- | Pack paths while accounting for overlong filepaths. packPaths :: OsPath @@ -208,8 +206,6 @@ packDirectoryEntry' filepath tarpath = do -- | Construct a tar entry based on a local symlink. -- --- This automatically checks symlink safety via 'checkEntrySecurity'. --- -- @since 0.6.0.0 packSymlinkEntry :: FilePath -- ^ Full path to find the file on the local disk @@ -225,53 +221,6 @@ packSymlinkEntry' filepath tarpath = do linkTarget <- getSymbolicLinkTarget filepath pure $ symlinkEntry tarpath linkTarget --- | This is a utility function, much like 'listDirectory'. The --- difference is that it includes the contents of subdirectories. --- --- The paths returned are all relative to the top directory. Directory paths --- are distinguishable by having a trailing path separator --- (see 'FilePath.Native.hasTrailingPathSeparator'). --- --- All directories are listed before the files that they contain. Amongst the --- contents of a directory, subdirectories are listed after normal files. The --- overall result is that files within a directory will be together in a single --- contiguous group. This tends to improve file layout and IO performance when --- creating or extracting tar archives. --- --- * This function returns results lazily. Subdirectories are not scanned --- until the files entries in the parent directory have been consumed. --- If the source directory structure changes before the result is used in full, --- the behaviour is undefined. --- -getDirectoryContentsRecursive :: OsPath -> IO [OsPath] -getDirectoryContentsRecursive base = recurseDirectories [mempty] - where - recurseDirectories :: [OsPath] -> IO [OsPath] - recurseDirectories [] = pure [] - recurseDirectories (path : paths) = do - stream <- openDirStream (base path) - recurseStream path stream paths - - recurseStream :: OsPath -> DirStream -> [OsPath] -> IO [OsPath] - recurseStream currPath currStream rest = go - where - go = unsafeInterleaveIO $ do - mfn <- readDirStream currStream - case mfn of - Nothing -> do - closeDirStream currStream - recurseDirectories rest - Just fn -> do - ty <- getFileType basePathFn - case ty of - FT.Directory -> - (FilePath.Native.addTrailingPathSeparator pathFn :) <$> - recurseStream currPath currStream (pathFn : rest) - _ -> (pathFn :) <$> go - where - pathFn = currPath fn - basePathFn = base pathFn - getModTime :: OsPath -> IO EpochTime getModTime path = do -- The directory package switched to the new time package diff --git a/cabal.haskell-ci b/cabal.haskell-ci index ce76db7..d23d87b 100644 --- a/cabal.haskell-ci +++ b/cabal.haskell-ci @@ -1 +1 @@ -installed: -directory -unix -bytestring -filepath +installed: -directory -unix -bytestring -filepath -text diff --git a/htar/htar.cabal b/htar/htar.cabal index 1ff38e1..14a8838 100644 --- a/htar/htar.cabal +++ b/htar/htar.cabal @@ -30,7 +30,7 @@ executable htar base >= 4.12 && < 5, time >= 1.1, directory >= 1.0, - filepath >= 1.0, + filepath >= 1.4.100, bytestring >= 0.9, tar >= 0.4.2, zlib >= 0.4 && < 0.8, diff --git a/tar.cabal b/tar.cabal index cbb0794..54ab040 100644 --- a/tar.cabal +++ b/tar.cabal @@ -55,7 +55,7 @@ library tar-internal containers >= 0.2 && < 0.8, deepseq >= 1.1 && < 1.6, directory >= 1.3.1 && < 1.4, - directory-ospath-streaming < 0.2, + directory-ospath-streaming >= 0.2 && < 0.3, file-io < 0.2, filepath >= 1.4.100 && < 1.6, os-string >= 2.0 && < 2.1, @@ -99,6 +99,7 @@ test-suite properties containers, deepseq, directory >= 1.2, + directory-ospath-streaming >= 0.2 && < 0.3, file-embed, filepath, QuickCheck == 2.*, diff --git a/test/Codec/Archive/Tar/Pack/Tests.hs b/test/Codec/Archive/Tar/Pack/Tests.hs index 0c6d4a8..1c04196 100644 --- a/test/Codec/Archive/Tar/Pack/Tests.hs +++ b/test/Codec/Archive/Tar/Pack/Tests.hs @@ -31,6 +31,7 @@ import qualified Data.List as L import Data.List.NonEmpty (NonEmpty(..)) import GHC.IO.Encoding import System.Directory +import System.Directory.OsPath.Streaming (getDirectoryContentsRecursive) import System.FilePath import qualified System.FilePath.Posix as Posix import qualified System.Info @@ -110,7 +111,7 @@ prop_roundtrip n' xss cnt pure $ cnt === cnt' else do -- Forcing the result, otherwise lazy IO misbehaves. - recFiles <- Pack.getDirectoryContentsRecursive (filePathToOsPath baseDir) >>= evaluate . force + recFiles <- getDirectoryContentsRecursive (filePathToOsPath baseDir) >>= evaluate . force pure $ counterexample ("File " ++ absFile ++ " does not exist; instead found\n" ++ unlines (map show recFiles)) False | otherwise = discard