Skip to content

Commit d9fc80b

Browse files
authored
cleanup(GCS+gRPC): compute MD5 hashes directly (#6959)
Avoid going through the base64 representation used in the REST protocol, as this requires weaving error handling through many layers.
1 parent e327a5a commit d9fc80b

8 files changed

Lines changed: 68 additions & 25 deletions

File tree

google/cloud/storage/hashing_options.cc

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,14 @@
1616
#include "google/cloud/storage/internal/openssl_util.h"
1717
#include "google/cloud/internal/big_endian.h"
1818
#include <crc32c/crc32c.h>
19-
#include <openssl/md5.h>
20-
#include <cstring>
2119

2220
namespace google {
2321
namespace cloud {
2422
namespace storage {
2523
inline namespace STORAGE_CLIENT_NS {
26-
std::string ComputeMD5Hash(std::string const& payload) {
27-
MD5_CTX md5;
28-
MD5_Init(&md5);
29-
MD5_Update(&md5, payload.c_str(), payload.size());
3024

31-
std::string hash(MD5_DIGEST_LENGTH, ' ');
32-
MD5_Final(reinterpret_cast<unsigned char*>(&hash[0]), &md5);
33-
return internal::Base64Encode(hash);
25+
std::string ComputeMD5Hash(std::string const& payload) {
26+
return internal::Base64Encode(internal::MD5Hash(payload));
3427
}
3528

3629
std::string ComputeCrc32cChecksum(std::string const& payload) {

google/cloud/storage/internal/grpc_client.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1762,7 +1762,7 @@ google::storage::v1::InsertObjectRequest GrpcClient::ToProto(
17621762
// TODO(#4156) - use the crc32c value in the request options.
17631763
checksums.mutable_crc32c()->set_value(crc32c::Crc32c(request.contents()));
17641764
// TODO(#4157) - use the MD5 hash value in the request options.
1765-
checksums.set_md5_hash(MD5ToProto(ComputeMD5Hash(request.contents())));
1765+
checksums.set_md5_hash(ComputeMD5Hash(request.contents()));
17661766

17671767
return r;
17681768
}
@@ -1877,6 +1877,10 @@ std::string GrpcClient::MD5ToProto(std::string const& v) {
18771877
return internal::HexEncode(binary);
18781878
}
18791879

1880+
std::string GrpcClient::ComputeMD5Hash(const std::string& payload) {
1881+
return internal::HexEncode(internal::MD5Hash(payload));
1882+
}
1883+
18801884
} // namespace internal
18811885
} // namespace STORAGE_CLIENT_NS
18821886
} // namespace storage

google/cloud/storage/internal/grpc_client.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ class GrpcClient : public RawClient,
323323
static std::uint32_t Crc32cToProto(std::string const&);
324324
static std::string MD5FromProto(std::string const&);
325325
static std::string MD5ToProto(std::string const&);
326+
static std::string ComputeMD5Hash(std::string const& payload);
326327

327328
protected:
328329
explicit GrpcClient(Options const& opts);

google/cloud/storage/internal/grpc_client_test.cc

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -208,15 +208,21 @@ TEST(GrpcClientFromProto, Crc32cRoundtrip) {
208208
}
209209

210210
TEST(GrpcClientFromProto, MD5Roundtrip) {
211-
std::string const values[] = {
212-
"1B2M2Y8AsgTpgAmY7PhCfg==",
213-
"nhB9nTcrtoJr2B01QqQZ1g==",
214-
"96HF9K981B+JfoQuTVnyCg==",
211+
// As usual, get the values using openssl, e.g.,
212+
// TEXT="The quick brown fox jumps over the lazy dog"
213+
// /bin/echo -n $TEXT | openssl md5 -binary | openssl base64
214+
// /bin/echo -n $TEXT | openssl md5
215+
struct Test {
216+
std::string rest;
217+
std::string proto;
218+
} cases[] = {
219+
{"1B2M2Y8AsgTpgAmY7PhCfg==", "d41d8cd98f00b204e9800998ecf8427e"},
220+
{"nhB9nTcrtoJr2B01QqQZ1g==", "9e107d9d372bb6826bd81d3542a419d6"},
221+
{"96HF9K981B+JfoQuTVnyCg==", "f7a1c5f4af7cd41f897e842e4d59f20a"},
215222
};
216-
for (auto const& start : values) {
217-
auto proto_form = GrpcClient::MD5ToProto(start);
218-
auto end = GrpcClient::MD5FromProto(proto_form);
219-
EXPECT_EQ(start, end) << " proto_form=" << proto_form;
223+
for (auto const& test : cases) {
224+
EXPECT_EQ(GrpcClient::MD5FromProto(test.proto), test.rest);
225+
EXPECT_EQ(GrpcClient::MD5ToProto(test.rest), test.proto);
220226
}
221227
}
222228

google/cloud/storage/internal/grpc_object_read_source_test.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,10 @@ TEST(GrpcObjectReadSource, UseSpillBufferMany) {
190190

191191
TEST(GrpcObjectReadSource, PreserveChecksums) {
192192
auto mock = absl::make_unique<MockStream>();
193-
std::string const expected_md5 = "nhB9nTcrtoJr2B01QqQZ1g==";
194-
std::string const expected_crc32c = "ImIEBA==";
193+
std::string const expected_payload =
194+
"The quick brown fox jumps over the lazy dog";
195+
auto const expected_md5 = ComputeMD5Hash(expected_payload);
196+
auto const expected_crc32c = ComputeCrc32cChecksum(expected_payload);
195197
EXPECT_CALL(*mock, Read)
196198
.WillOnce([&]() {
197199
storage_proto::GetObjectMediaResponse response;
@@ -216,14 +218,13 @@ TEST(GrpcObjectReadSource, PreserveChecksums) {
216218
})
217219
.WillOnce(Return(Status{}));
218220
GrpcObjectReadSource tested(std::move(mock));
219-
std::string const expected = "The quick brown fox jumps over the lazy dog";
220221
std::vector<char> buffer(1024);
221222
auto response = tested.Read(buffer.data(), buffer.size());
222223
ASSERT_STATUS_OK(response);
223-
EXPECT_EQ(expected.size(), response->bytes_received);
224+
EXPECT_EQ(expected_payload.size(), response->bytes_received);
224225
EXPECT_EQ(100, response->response.status_code);
225-
std::string actual(buffer.data(), expected.size());
226-
EXPECT_EQ(expected, actual);
226+
auto const actual = std::string(buffer.data(), response->bytes_received);
227+
EXPECT_EQ(expected_payload, actual);
227228
auto const& headers = response->response.headers;
228229
EXPECT_FALSE(headers.find("x-goog-hash") == headers.end());
229230
auto const values = [&headers] {

google/cloud/storage/internal/openssl_util.cc

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414

1515
#include "google/cloud/storage/internal/openssl_util.h"
1616
#include "google/cloud/internal/base64_transforms.h"
17-
#include "google/cloud/internal/throw_delegate.h"
1817
#include <openssl/bio.h>
1918
#include <openssl/buffer.h>
2019
#include <openssl/evp.h>
20+
#include <openssl/md5.h>
2121
#include <openssl/opensslv.h>
2222
#include <openssl/pem.h>
2323
#include <memory>
@@ -172,6 +172,20 @@ std::vector<std::uint8_t> UrlsafeBase64Decode(std::string const& str) {
172172
return Base64Decode(b64str);
173173
}
174174

175+
std::vector<std::uint8_t> MD5Hash(std::string const& payload) {
176+
MD5_CTX md5;
177+
MD5_Init(&md5);
178+
MD5_Update(&md5, payload.c_str(), payload.size());
179+
180+
std::vector<std::uint8_t> hash(MD5_DIGEST_LENGTH, 0);
181+
// Note: MD5_Final consumes a `unsigned char*` in its first parameter, on some
182+
// platforms (PowerPC and ARM I read), the default `char` is unsigned. In
183+
// those platforms it is possible that `std::uint8_t != unsigned char` and
184+
// the `reinterpret_cast<>` is not trivial (but still safe I think).
185+
MD5_Final(reinterpret_cast<unsigned char*>(hash.data()), &md5);
186+
return hash;
187+
}
188+
175189
} // namespace internal
176190
} // namespace STORAGE_CLIENT_NS
177191
} // namespace storage

google/cloud/storage/internal/openssl_util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ inline std::string UrlsafeBase64Encode(Collection const& bytes) {
7878
*/
7979
std::vector<std::uint8_t> UrlsafeBase64Decode(std::string const& str);
8080

81+
/// Compute the MD5 hash of @p payload
82+
std::vector<std::uint8_t> MD5Hash(std::string const& payload);
83+
8184
} // namespace internal
8285
} // namespace STORAGE_CLIENT_NS
8386
} // namespace storage

google/cloud/storage/internal/openssl_util_test.cc

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,27 @@ TEST(OpensslUtilTest, Base64DecodePadding) {
6868
EXPECT_THAT(UrlsafeBase64Decode("QUJDRA=="), ElementsAre('A', 'B', 'C', 'D'));
6969
}
7070

71+
TEST(OpensslUtilTest, MD5HashEmpty) {
72+
auto const actual = MD5Hash("");
73+
// I used this command to get the expected value:
74+
// /bin/echo -n "" | openssl md5
75+
auto const expected =
76+
std::vector<std::uint8_t>{0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
77+
0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e};
78+
EXPECT_THAT(actual, ElementsAreArray(expected));
79+
}
80+
81+
TEST(OpensslUtilTest, MD5HashSimple) {
82+
auto const actual = MD5Hash("The quick brown fox jumps over the lazy dog");
83+
// I used this command to get the expected value:
84+
// /bin/echo -n "The quick brown fox jumps over the lazy dog" |
85+
// openssl md5 -binary | openssl base64
86+
auto const expected =
87+
std::vector<std::uint8_t>{0x9e, 0x10, 0x7d, 0x9d, 0x37, 0x2b, 0xb6, 0x82,
88+
0x6b, 0xd8, 0x1d, 0x35, 0x42, 0xa4, 0x19, 0xd6};
89+
EXPECT_THAT(actual, ElementsAreArray(expected));
90+
}
91+
7192
} // namespace
7293
} // namespace internal
7394
} // namespace STORAGE_CLIENT_NS

0 commit comments

Comments
 (0)