44{-# LANGUAGE ScopedTypeVariables #-}
55{-# LANGUAGE RecordWildCards #-}
66{-# LANGUAGE RankNTypes #-}
7- module Network.HTTP.Download.Verified where
7+ {-# LANGUAGE GADTs #-}
8+ {-# LANGUAGE StandaloneDeriving #-}
9+ module Network.HTTP.Download.Verified
10+ ( verifiedDownload
11+ , DownloadRequest (.. )
12+ , HashCheck (.. )
13+ , LengthCheck
14+ , VerifiedDownloadException (.. )
15+ ) where
816
917import qualified Data.List as List
18+ import qualified Data.ByteString as ByteString
1019import qualified Data.ByteString.Base64 as B64
1120import qualified Data.ByteString.Char8 as BC
1221import qualified Data.Conduit.Binary as CB
22+ import qualified Data.Conduit.List as CL
1323import qualified Data.Text as Text
1424import qualified Data.Text.Encoding as Text
1525
@@ -23,6 +33,8 @@ import Crypto.Hash.Conduit (sinkHash)
2333import Data.ByteString (ByteString )
2434import Data.Conduit
2535import Data.Conduit.Binary (sourceHandle , sinkHandle )
36+ import Data.Foldable (traverse_ )
37+ import Data.Monoid
2638import Data.Typeable (Typeable )
2739import Network.HTTP.Client.Conduit
2840import Network.HTTP.Types.Header (hContentLength , hContentMD5 )
@@ -31,22 +43,30 @@ import System.FilePath((<.>))
3143import System.Directory
3244import System.IO
3345
34- -- | A request together with the hash algorithm to use
35- -- to verify the response.
36- -- The type parameter specifies the algorithm.
37- data VerifiedRequest a = VerifiedRequest
38- { vrHashAlgorithm :: a
39- , vrExpectedHexDigest :: String
40- , vrDownloadBytes :: Int
41- , vrRequest :: Request
46+ -- | A request together with some checks to perform.
47+ data DownloadRequest = DownloadRequest
48+ { drRequest :: Request
49+ , drHashChecks :: [HashCheck ]
50+ , drLengthCheck :: Maybe LengthCheck
4251 }
4352 deriving Show
4453
54+ data HashCheck = forall a . (Show a , HashAlgorithm a ) => HashCheck
55+ { hashCheckAlgorithm :: a
56+ , hashCheckHexDigest :: String
57+ }
58+ deriving instance Show HashCheck
59+
60+ type LengthCheck = Int
61+
4562-- | An exception regarding verification of a download.
4663data VerifiedDownloadException
4764 = WrongContentLength
4865 Int -- expected
4966 ByteString -- actual (as listed in the header)
67+ | WrongStreamLength
68+ Int -- expected
69+ Int -- actual
5070 | WrongDigest
5171 String -- algorithm
5272 String -- expected
@@ -65,41 +85,49 @@ instance Exception VerifyFileException
6585-- is as expected.
6686--
6787-- Throws WrongDigest (VerifiedDownloadException)
68- sinkCheckHash
69- :: forall a m . (MonadThrow m , Show a , HashAlgorithm a )
70- => a -- ^ The algorithm (e.g. MD5)
71- -> String -- ^ The expected digest, rendered as a String (hexadecimal)
88+ sinkCheckHash :: MonadThrow m
89+ => HashCheck
7290 -> Consumer ByteString m ()
73- sinkCheckHash a expectedDigestString = do
74- ( digest :: Digest a ) <- sinkHash
91+ sinkCheckHash HashCheck { .. } = do
92+ digest <- sinkHashUsing hashCheckAlgorithm
7593 let actualDigestString = show digest
76- when (actualDigestString /= expectedDigestString) $
77- throwM $ WrongDigest (show a) expectedDigestString actualDigestString
94+ when (actualDigestString /= hashCheckHexDigest) $
95+ throwM $ WrongDigest (show hashCheckAlgorithm) hashCheckHexDigest actualDigestString
96+
97+ assertLengthSink :: MonadThrow m
98+ => LengthCheck
99+ -> ZipSink ByteString m ()
100+ assertLengthSink expectedStreamLength = ZipSink $ do
101+ Sum actualStreamLength <- CL. foldMap (Sum . ByteString. length )
102+ when (actualStreamLength /= expectedStreamLength) $
103+ throwM $ WrongStreamLength expectedStreamLength actualStreamLength
78104
105+ -- | A more explicitly type-guided sinkHash.
106+ sinkHashUsing :: (Monad m , HashAlgorithm a ) => a -> Consumer ByteString m (Digest a )
107+ sinkHashUsing _ = sinkHash
108+
109+ -- | Turns a list of hash checks into a ZipSink that checks all of them.
110+ hashChecksToZipSink :: MonadThrow m => [HashCheck ] -> ZipSink ByteString m ()
111+ hashChecksToZipSink = traverse_ (ZipSink . sinkCheckHash)
79112
80113-- | Copied and extended version of Network.HTTP.Download.download.
81114--
82115-- Has the following additional features:
83116-- * Verifies that response content-length header (if present)
84117-- matches expected length
85- -- * Only downloads expected length # of bytes
118+ -- * Limits the download to (close to) the expected # of bytes
119+ -- * Verifies that the expected # bytes were downloaded (not too few)
86120-- * Verifies md5 if response includes content-md5 header
87- -- * Verifies the expected hash
88- --
89- -- Further work ideas:
90- -- * Check existing file for the given length & hash
91- -- and redownload if it doesn't match
92- -- * Check the downloaded file isn't too small.
93- -- (Currently behavior only prevents it from being too large.)
94- -- * Add a "progress" hook so that long downloads don't look like they've hung.
121+ -- * Verifies the expected hashes
95122--
96123-- Throws VerifiedDownloadException, and whatever else "download" throws.
97- verifiedDownload :: (HashAlgorithm a , Show a , MonadReader env m , HasHttpManager env , MonadIO m , MonadThrow m )
98- => VerifiedRequest a
124+ verifiedDownload :: (MonadReader env m , HasHttpManager env , MonadIO m )
125+ => DownloadRequest
99126 -> Path Abs File -- ^ destination
127+ -> Sink ByteString (ReaderT env IO ) () -- ^ custom hook to observe progress
100128 -> m Bool -- ^ Whether a download was performed
101- verifiedDownload VerifiedRequest {.. } destpath = do
102- let req = vrRequest
129+ verifiedDownload DownloadRequest {.. } destpath progressSink = do
130+ let req = drRequest
103131 env <- ask
104132 liftIO $ whenM' getShouldDownload $ do
105133 createDirectoryIfMissing True dir
@@ -130,38 +158,51 @@ verifiedDownload VerifiedRequest{..} destpath = do
130158 (checkExpectations >> return True )
131159 `catch` \ (_ :: VerifyFileException ) -> return False
132160 `catch` \ (_ :: VerifiedDownloadException ) -> return False
133- where
134- checkExpectations = bracket (openFile fp ReadMode ) hClose $ \ h -> do
135- fileSizeInteger <- hFileSize h
136- when (fileSizeInteger > toInteger (maxBound :: Int )) $
137- throwM $ WrongFileSize vrDownloadBytes fileSizeInteger
138- let fileSize = fromInteger fileSizeInteger
139- when (fileSize /= vrDownloadBytes) $
140- throwM $ WrongFileSize vrDownloadBytes fileSizeInteger
141- sourceHandle h $$ getZipSink sinkCheckGivenHash
142-
143- sinkCheckGivenHash :: MonadThrow m => ZipSink ByteString m ()
144- sinkCheckGivenHash = ZipSink $
145- sinkCheckHash vrHashAlgorithm vrExpectedHexDigest
146161
147- go h res = do
148- let headers = responseHeaders res
162+ whenJust :: Monad m => Maybe a -> (a -> m () ) -> m ()
163+ whenJust (Just a) f = f a
164+ whenJust _ _ = return ()
165+
166+ checkExpectations = bracket (openFile fp ReadMode ) hClose $ \ h -> do
167+ whenJust drLengthCheck $ checkFileSizeExpectations h
168+ sourceHandle h $$ getZipSink (hashChecksToZipSink drHashChecks)
169+
170+ -- doesn't move the handle
171+ checkFileSizeExpectations h expectedFileSize = do
172+ fileSizeInteger <- hFileSize h
173+ when (fileSizeInteger > toInteger (maxBound :: Int )) $
174+ throwM $ WrongFileSize expectedFileSize fileSizeInteger
175+ let fileSize = fromInteger fileSizeInteger
176+ when (fileSize /= expectedFileSize) $
177+ throwM $ WrongFileSize expectedFileSize fileSizeInteger
178+
179+ checkContentLengthHeader headers expectedContentLength = do
149180 case List. lookup hContentLength headers of
150181 Just lengthBS -> do
151182 let lengthText = Text. strip $ Text. decodeUtf8 lengthBS
152183 lengthStr = Text. unpack lengthText
153- when (lengthStr /= show vrDownloadBytes ) $
154- throwM $ WrongContentLength vrDownloadBytes lengthBS
184+ when (lengthStr /= show expectedContentLength ) $
185+ throwM $ WrongContentLength expectedContentLength lengthBS
155186 _ -> return ()
156- let checkHash = (case List. lookup hContentMD5 headers of
187+
188+ go h res = do
189+ let headers = responseHeaders res
190+ whenJust drLengthCheck $ checkContentLengthHeader headers
191+ let hashChecks = (case List. lookup hContentMD5 headers of
157192 Just md5BS ->
158193 let md5ExpectedHexDigest = BC. unpack (B64. decodeLenient md5BS)
159- in ZipSink (sinkCheckHash MD5 md5ExpectedHexDigest)
160- Nothing ->
161- pure ()
162- ) *> sinkCheckGivenHash
194+ in [ HashCheck
195+ { hashCheckAlgorithm = MD5
196+ , hashCheckHexDigest = md5ExpectedHexDigest
197+ }
198+ ]
199+ Nothing -> []
200+ ) ++ drHashChecks
163201
164202 responseBody res
165- $= CB. isolate vrDownloadBytes
166- -- TODO: $= progressHook
167- $$ getZipSink (checkHash *> ZipSink (sinkHandle h))
203+ $= maybe (awaitForever yield) CB. isolate drLengthCheck
204+ $$ getZipSink
205+ ( hashChecksToZipSink hashChecks
206+ *> maybe (pure () ) assertLengthSink drLengthCheck
207+ *> ZipSink (sinkHandle h)
208+ *> ZipSink progressSink)
0 commit comments