Skip to content

Commit b8ad168

Browse files
committed
Enhancements to verifiedDownload, ported Fetch and Download to use it
1 parent f5167b8 commit b8ad168

4 files changed

Lines changed: 246 additions & 139 deletions

File tree

src/Network/HTTP/Download.hs

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,13 @@
22
{-# LANGUAGE MultiParamTypeClasses #-}
33
{-# LANGUAGE OverloadedStrings #-}
44
module Network.HTTP.Download
5-
( download
5+
( verifiedDownload
6+
, DownloadRequest(..)
7+
, HashCheck(..)
8+
, LengthCheck
9+
, VerifiedDownloadException(..)
10+
11+
, download
612
, redownload
713
, downloadJSON
814
, parseUrl
@@ -36,6 +42,7 @@ import Network.HTTP.Client.Conduit (HasHttpManager, Manager, Request,
3642
requestHeaders, responseBody,
3743
responseHeaders, responseStatus,
3844
withResponse)
45+
import Network.HTTP.Download.Verified
3946
import Network.HTTP.Types (status200, status304)
4047
import Path (Abs, File, Path, parent,
4148
toFilePath)
@@ -56,24 +63,33 @@ import System.IO (IOMode (WriteMode),
5663
download :: (MonadReader env m, HasHttpManager env, MonadIO m)
5764
=> Request
5865
-> Path Abs File -- ^ destination
59-
-> m ()
66+
-> m Bool -- ^ Was a downloaded performed (True) or did the file already exist (False)?
6067
download req destpath = do
61-
env <- ask
62-
liftIO $ unlessM (doesFileExist fp) $ do
63-
createDirectoryIfMissing True dir
64-
withBinaryFile fptmp WriteMode $ \h ->
65-
flip runReaderT env $
66-
withResponse req $ \res ->
67-
responseBody res $$ sinkHandle h
68-
renameFile fptmp fp
69-
where
70-
unlessM mp m = do
71-
p <- mp
72-
if p then return () else m
73-
74-
fp = toFilePath destpath
75-
fptmp = fp <.> "tmp"
76-
dir = toFilePath $ parent destpath
68+
let downloadReq = DownloadRequest
69+
{ drRequest = req
70+
, drHashChecks = []
71+
, drLengthCheck = Nothing
72+
}
73+
let progressHook = return ()
74+
verifiedDownload downloadReq destpath progressHook
75+
76+
-- env <- ask
77+
-- liftIO $ unlessM (doesFileExist fp) $ do
78+
-- createDirectoryIfMissing True dir
79+
-- withBinaryFile fptmp WriteMode $ \h ->
80+
-- flip runReaderT env $
81+
-- withResponse req $ \res ->
82+
-- responseBody res $$ sinkHandle h
83+
-- renameFile fptmp fp
84+
--where
85+
-- unlessM mp m = do
86+
-- p <- mp
87+
-- if p then return () else m
88+
89+
-- fp = toFilePath destpath
90+
-- fptmp = fp <.> "tmp"
91+
-- dir = toFilePath $ parent destpath
92+
7793

7894
-- | Same as 'download', but will download a file a second time if it is already present.
7995
--

src/Network/HTTP/Download/Verified.hs

Lines changed: 96 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,22 @@
44
{-# LANGUAGE ScopedTypeVariables #-}
55
{-# LANGUAGE RecordWildCards #-}
66
{-# LANGUAGE RankNTypes #-}
7-
module Network.HTTP.Download.Verified where
7+
{-# LANGUAGE GADTs #-}
8+
{-# LANGUAGE StandaloneDeriving #-}
9+
module Network.HTTP.Download.Verified
10+
( verifiedDownload
11+
, DownloadRequest(..)
12+
, HashCheck(..)
13+
, LengthCheck
14+
, VerifiedDownloadException(..)
15+
) where
816

917
import qualified Data.List as List
18+
import qualified Data.ByteString as ByteString
1019
import qualified Data.ByteString.Base64 as B64
1120
import qualified Data.ByteString.Char8 as BC
1221
import qualified Data.Conduit.Binary as CB
22+
import qualified Data.Conduit.List as CL
1323
import qualified Data.Text as Text
1424
import qualified Data.Text.Encoding as Text
1525

@@ -23,6 +33,8 @@ import Crypto.Hash.Conduit (sinkHash)
2333
import Data.ByteString (ByteString)
2434
import Data.Conduit
2535
import Data.Conduit.Binary (sourceHandle, sinkHandle)
36+
import Data.Foldable (traverse_)
37+
import Data.Monoid
2638
import Data.Typeable (Typeable)
2739
import Network.HTTP.Client.Conduit
2840
import Network.HTTP.Types.Header (hContentLength, hContentMD5)
@@ -31,22 +43,30 @@ import System.FilePath((<.>))
3143
import System.Directory
3244
import System.IO
3345

34-
-- | A request together with the hash algorithm to use
35-
-- to verify the response.
36-
-- The type parameter specifies the algorithm.
37-
data VerifiedRequest a = VerifiedRequest
38-
{ vrHashAlgorithm :: a
39-
, vrExpectedHexDigest :: String
40-
, vrDownloadBytes :: Int
41-
, vrRequest :: Request
46+
-- | A request together with some checks to perform.
47+
data DownloadRequest = DownloadRequest
48+
{ drRequest :: Request
49+
, drHashChecks :: [HashCheck]
50+
, drLengthCheck :: Maybe LengthCheck
4251
}
4352
deriving Show
4453

54+
data HashCheck = forall a. (Show a, HashAlgorithm a) => HashCheck
55+
{ hashCheckAlgorithm :: a
56+
, hashCheckHexDigest :: String
57+
}
58+
deriving instance Show HashCheck
59+
60+
type LengthCheck = Int
61+
4562
-- | An exception regarding verification of a download.
4663
data VerifiedDownloadException
4764
= WrongContentLength
4865
Int -- expected
4966
ByteString -- actual (as listed in the header)
67+
| WrongStreamLength
68+
Int -- expected
69+
Int -- actual
5070
| WrongDigest
5171
String -- algorithm
5272
String -- expected
@@ -65,41 +85,49 @@ instance Exception VerifyFileException
6585
-- is as expected.
6686
--
6787
-- Throws WrongDigest (VerifiedDownloadException)
68-
sinkCheckHash
69-
:: forall a m. (MonadThrow m, Show a, HashAlgorithm a)
70-
=> a -- ^ The algorithm (e.g. MD5)
71-
-> String -- ^ The expected digest, rendered as a String (hexadecimal)
88+
sinkCheckHash :: MonadThrow m
89+
=> HashCheck
7290
-> Consumer ByteString m ()
73-
sinkCheckHash a expectedDigestString = do
74-
(digest :: Digest a) <- sinkHash
91+
sinkCheckHash HashCheck{..} = do
92+
digest <- sinkHashUsing hashCheckAlgorithm
7593
let actualDigestString = show digest
76-
when (actualDigestString /= expectedDigestString) $
77-
throwM $ WrongDigest (show a) expectedDigestString actualDigestString
94+
when (actualDigestString /= hashCheckHexDigest) $
95+
throwM $ WrongDigest (show hashCheckAlgorithm) hashCheckHexDigest actualDigestString
96+
97+
assertLengthSink :: MonadThrow m
98+
=> LengthCheck
99+
-> ZipSink ByteString m ()
100+
assertLengthSink expectedStreamLength = ZipSink $ do
101+
Sum actualStreamLength <- CL.foldMap (Sum . ByteString.length)
102+
when (actualStreamLength /= expectedStreamLength) $
103+
throwM $ WrongStreamLength expectedStreamLength actualStreamLength
78104

105+
-- | A more explicitly type-guided sinkHash.
106+
sinkHashUsing :: (Monad m, HashAlgorithm a) => a -> Consumer ByteString m (Digest a)
107+
sinkHashUsing _ = sinkHash
108+
109+
-- | Turns a list of hash checks into a ZipSink that checks all of them.
110+
hashChecksToZipSink :: MonadThrow m => [HashCheck] -> ZipSink ByteString m ()
111+
hashChecksToZipSink = traverse_ (ZipSink . sinkCheckHash)
79112

80113
-- | Copied and extended version of Network.HTTP.Download.download.
81114
--
82115
-- Has the following additional features:
83116
-- * Verifies that response content-length header (if present)
84117
-- matches expected length
85-
-- * Only downloads expected length # of bytes
118+
-- * Limits the download to (close to) the expected # of bytes
119+
-- * Verifies that the expected # bytes were downloaded (not too few)
86120
-- * Verifies md5 if response includes content-md5 header
87-
-- * Verifies the expected hash
88-
--
89-
-- Further work ideas:
90-
-- * Check existing file for the given length & hash
91-
-- and redownload if it doesn't match
92-
-- * Check the downloaded file isn't too small.
93-
-- (Currently behavior only prevents it from being too large.)
94-
-- * Add a "progress" hook so that long downloads don't look like they've hung.
121+
-- * Verifies the expected hashes
95122
--
96123
-- Throws VerifiedDownloadException, and whatever else "download" throws.
97-
verifiedDownload :: (HashAlgorithm a, Show a, MonadReader env m, HasHttpManager env, MonadIO m, MonadThrow m)
98-
=> VerifiedRequest a
124+
verifiedDownload :: (MonadReader env m, HasHttpManager env, MonadIO m)
125+
=> DownloadRequest
99126
-> Path Abs File -- ^ destination
127+
-> Sink ByteString (ReaderT env IO) () -- ^ custom hook to observe progress
100128
-> m Bool -- ^ Whether a download was performed
101-
verifiedDownload VerifiedRequest{..} destpath = do
102-
let req = vrRequest
129+
verifiedDownload DownloadRequest{..} destpath progressSink = do
130+
let req = drRequest
103131
env <- ask
104132
liftIO $ whenM' getShouldDownload $ do
105133
createDirectoryIfMissing True dir
@@ -130,38 +158,51 @@ verifiedDownload VerifiedRequest{..} destpath = do
130158
(checkExpectations >> return True)
131159
`catch` \(_ :: VerifyFileException) -> return False
132160
`catch` \(_ :: VerifiedDownloadException) -> return False
133-
where
134-
checkExpectations = bracket (openFile fp ReadMode) hClose $ \h -> do
135-
fileSizeInteger <- hFileSize h
136-
when (fileSizeInteger > toInteger (maxBound :: Int)) $
137-
throwM $ WrongFileSize vrDownloadBytes fileSizeInteger
138-
let fileSize = fromInteger fileSizeInteger
139-
when (fileSize /= vrDownloadBytes) $
140-
throwM $ WrongFileSize vrDownloadBytes fileSizeInteger
141-
sourceHandle h $$ getZipSink sinkCheckGivenHash
142-
143-
sinkCheckGivenHash :: MonadThrow m => ZipSink ByteString m ()
144-
sinkCheckGivenHash = ZipSink $
145-
sinkCheckHash vrHashAlgorithm vrExpectedHexDigest
146161

147-
go h res = do
148-
let headers = responseHeaders res
162+
whenJust :: Monad m => Maybe a -> (a -> m ()) -> m ()
163+
whenJust (Just a) f = f a
164+
whenJust _ _ = return ()
165+
166+
checkExpectations = bracket (openFile fp ReadMode) hClose $ \h -> do
167+
whenJust drLengthCheck $ checkFileSizeExpectations h
168+
sourceHandle h $$ getZipSink (hashChecksToZipSink drHashChecks)
169+
170+
-- doesn't move the handle
171+
checkFileSizeExpectations h expectedFileSize = do
172+
fileSizeInteger <- hFileSize h
173+
when (fileSizeInteger > toInteger (maxBound :: Int)) $
174+
throwM $ WrongFileSize expectedFileSize fileSizeInteger
175+
let fileSize = fromInteger fileSizeInteger
176+
when (fileSize /= expectedFileSize) $
177+
throwM $ WrongFileSize expectedFileSize fileSizeInteger
178+
179+
checkContentLengthHeader headers expectedContentLength = do
149180
case List.lookup hContentLength headers of
150181
Just lengthBS -> do
151182
let lengthText = Text.strip $ Text.decodeUtf8 lengthBS
152183
lengthStr = Text.unpack lengthText
153-
when (lengthStr /= show vrDownloadBytes) $
154-
throwM $ WrongContentLength vrDownloadBytes lengthBS
184+
when (lengthStr /= show expectedContentLength) $
185+
throwM $ WrongContentLength expectedContentLength lengthBS
155186
_ -> return ()
156-
let checkHash = (case List.lookup hContentMD5 headers of
187+
188+
go h res = do
189+
let headers = responseHeaders res
190+
whenJust drLengthCheck $ checkContentLengthHeader headers
191+
let hashChecks = (case List.lookup hContentMD5 headers of
157192
Just md5BS ->
158193
let md5ExpectedHexDigest = BC.unpack (B64.decodeLenient md5BS)
159-
in ZipSink (sinkCheckHash MD5 md5ExpectedHexDigest)
160-
Nothing ->
161-
pure ()
162-
) *> sinkCheckGivenHash
194+
in [ HashCheck
195+
{ hashCheckAlgorithm = MD5
196+
, hashCheckHexDigest = md5ExpectedHexDigest
197+
}
198+
]
199+
Nothing -> []
200+
) ++ drHashChecks
163201

164202
responseBody res
165-
$= CB.isolate vrDownloadBytes
166-
-- TODO: $= progressHook
167-
$$ getZipSink (checkHash *> ZipSink (sinkHandle h))
203+
$= maybe (awaitForever yield) CB.isolate drLengthCheck
204+
$$ getZipSink
205+
( hashChecksToZipSink hashChecks
206+
*> maybe (pure ()) assertLengthSink drLengthCheck
207+
*> ZipSink (sinkHandle h)
208+
*> ZipSink progressSink)

0 commit comments

Comments
 (0)