XRootD
XrdClS3Factory.cc
Go to the documentation of this file.
1 /******************************************************************************/
2 /* Copyright (C) 2025, Pelican Project, Morgridge Institute for Research */
3 /* */
4 /* This file is part of the XrdClS3 client plugin for XRootD. */
5 /* */
6 /* XRootD is free software: you can redistribute it and/or modify it under */
7 /* the terms of the GNU Lesser General Public License as published by the */
8 /* Free Software Foundation, either version 3 of the License, or (at your */
9 /* option) any later version. */
10 /* */
11 /* XRootD is distributed in the hope that it will be useful, but WITHOUT */
12 /* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
13 /* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
14 /* License for more details. */
15 /* */
16 /* The copyright holder's institutional names and contributor's names may not */
17 /* be used to endorse or promote products derived from this software without */
18 /* specific prior written permission of the institution or contributor. */
19 /******************************************************************************/
20 
21 #include "XrdClS3Factory.hh"
22 #include "XrdClS3File.hh"
23 #include "XrdClS3Filesystem.hh"
24 
25 #include <openssl/evp.h>
26 #include <openssl/hmac.h>
27 #include <XrdCl/XrdClDefaultEnv.hh>
28 #include <XrdCl/XrdClLog.hh>
29 
30 #include <fcntl.h>
31 
33 
34 using namespace XrdClS3;
35 
36 std::shared_mutex Factory::m_bucket_auth_map_mutex;
37 bool Factory::m_initialized = false;
38 XrdCl::Log *Factory::m_log{nullptr};
39 std::once_flag Factory::m_init_once;
40 std::string Factory::m_endpoint = "";
41 std::string Factory::m_service = "s3";
42 std::string Factory::m_region = "";
43 std::string Factory::m_url_style = "path";
44 std::string Factory::m_mkdir_sentinel;
45 Factory::Credentials Factory::m_default_creds;
46 std::unordered_map<std::string, Factory::Credentials> Factory::m_bucket_location_map;
47 std::unordered_map<std::string, std::pair<Factory::Credentials, std::chrono::steady_clock::time_point>> Factory::m_bucket_auth_map;
48 
49 
50 namespace {
51 
52 std::string
53 AmazonURLEncode(const std::string &input) {
54  /*
55  * See
56  * http://docs.amazonwebservices.com/AWSEC2/2010-11-15/DeveloperGuide/using-query-api.html
57  *
58  */
59  std::string output;
60  output.reserve(input.size());
61  for (const auto & val : input) {
62  // "Do not URL encode ... A-Z, a-z, 0-9, hyphen ( - ),
63  // underscore ( _ ), period ( . ), and tilde ( ~ ). Percent
64  // encode all other characters with %XY, where X and Y are hex
65  // characters 0-9 and uppercase A-F. Percent encode extended
66  // UTF-8 characters in the form %XY%ZA..."
67  if (('A' <= val && val <= 'Z') ||
68  ('a' <= val && val <= 'z') ||
69  ('0' <= val && val <= '9') || val == '-' ||
70  val == '_' || val == '.' || val == '~') {
71  output.append(1, val);
72  } else {
73  char percentEncode[4];
74  snprintf(percentEncode, 4, "%%%.2hhX", val);
75  output.append(percentEncode);
76  }
77  }
78  return output;
79 }
80 
81 }
82 
84  std::call_once(m_init_once, [&] {
85  m_log = XrdCl::DefaultEnv::GetLog();
86  if (!m_log) {
87  return;
88  }
89  m_log->SetTopicName(kLogXrdClS3, "XrdClS3");
90 
91  auto env = XrdCl::DefaultEnv::GetEnv();
92  if (!env) {
93  return;
94  }
95  InitS3Config();
96  m_initialized = true;
97  });
98 }
99 
100 std::string
101 Factory::CanonicalizeQueryString(const std::string &url) {
102  auto loc = url.find("://");
103  if (loc == std::string::npos) {
104  return "";
105  }
106  loc += 3; // Skip the "://"
107  loc = url.find('?', loc);
108  if (loc == std::string::npos) {
109  return "";
110  }
111  std::vector<std::pair<std::string, std::string>> query_parameters;
112  auto param_end = url.find('&', loc);
113  while (loc != std::string::npos) {
114  auto param_start = loc + 1; // Skip the '?' / '&'
115  loc = url.find('=', param_start);
116  if (loc == param_start) {
117  // Empty parameter name, skip
118  }
119  else if (loc >= param_end) {
120  auto param = url.substr(param_start, param_end - param_start);
121  if (!param.empty()) {
122  // No '=' found, treat as a parameter without value
123  query_parameters.emplace_back(AmazonURLEncode(param), "");
124  }
125  } else {
126  std::string name = url.substr(param_start, loc - param_start);
127  loc++; // Move past '='
128  auto value_start = loc;
129  std::string value;
130  if (param_end == std::string::npos) {
131  value = url.substr(value_start);
132  } else {
133  value = url.substr(value_start, param_end - value_start);
134  }
135  if (!value.empty()) {
136  query_parameters.emplace_back(AmazonURLEncode(name), AmazonURLEncode(value));
137  }
138  }
139  loc = param_end;
140  if (loc != std::string::npos) {
141  param_end = url.find('&', loc + 1);
142  }
143  }
144  std::sort(query_parameters.begin(), query_parameters.end(),
145  [](const auto &a, const auto &b) { return a.first < b.first; });
146 
147  size_t string_size = 0;
148  for (const auto &param : query_parameters) {
149  string_size += param.first.size() + param.second.size() + 2;
150  }
151  std::string canonicalQueryString;
152  if (string_size) {
153  canonicalQueryString.reserve(string_size);
154  }
155  for (const auto &param : query_parameters) {
156 
157  // Step 1C: Separate parameter names from values with '='.
158  canonicalQueryString += param.first + '=' + param.second;
159 
160  // Step 1D: Separate name-value pairs with '&';
161  canonicalQueryString += '&';
162  }
163  // We'll always have a superflous trailing ampersand.
164  if (!canonicalQueryString.empty()) {
165  canonicalQueryString.erase(canonicalQueryString.end() - 1);
166  }
167  return canonicalQueryString;
168 }
169 
171 Factory::CreateFile(const std::string & /*url*/) {
172  if (!m_initialized) {return nullptr;}
173  return new File(m_log);
174 }
175 
177 Factory::CreateFileSystem(const std::string & url) {
178  if (!m_initialized) {return nullptr;}
179  return new Filesystem(url, m_log);
180 }
181 
182 namespace {
183 
184 void SetDefault(XrdCl::Env *env, const std::string &optName, const std::string &envName, std::string &location, const std::string &def) {
185  std::string val;
186  if (!env->GetString(optName, val) || val.empty()) {
187  env->PutString(optName, "");
188  env->ImportString(optName, envName);
189  }
190  if (env->GetString(optName, val) && !val.empty()) {
191  location = val;
192  } else {
193  location = def;
194  }
195 }
196 
197 // Trim the left side of a string_view for space
198 std::string_view ltrim_view(const std::string_view input_view) {
199  for (size_t idx = 0; idx < input_view.size(); idx++) {
200  if (!isspace(input_view[idx])) {
201  return input_view.substr(idx);
202  }
203  }
204  return "";
205 }
206 
207 bool ComputeSHA256(const std::string_view payload, std::vector<unsigned char> &messageDigest) {
208  EVP_MD_CTX *mdctx = EVP_MD_CTX_create();
209  if (mdctx == NULL) {
210  return false;
211  }
212 
213  if (!EVP_DigestInit_ex(mdctx, EVP_sha256(), NULL)) {
214  EVP_MD_CTX_destroy(mdctx);
215  return false;
216  }
217 
218  if (!EVP_DigestUpdate(mdctx, payload.data(), payload.length())) {
219  EVP_MD_CTX_destroy(mdctx);
220  return false;
221  }
222 
223  unsigned int mdLength;
224  if (!EVP_DigestFinal_ex(mdctx, messageDigest.data(), &mdLength)) {
225  EVP_MD_CTX_destroy(mdctx);
226  return false;
227  }
228  messageDigest.resize(mdLength);
229 
230  EVP_MD_CTX_destroy(mdctx);
231  return true;
232 }
233 
234 void MessageDigestAsHex(const std::vector<unsigned char> messageDigest,
235  std::string &hexEncoded) {
236  hexEncoded.resize(messageDigest.size() * 2);
237  char *ptr = hexEncoded.data();
238  for (unsigned int idx = 0; idx < messageDigest.size(); ++idx, ptr += 2) {
239  snprintf(ptr, 3, "%02x", messageDigest[idx]);
240  }
241 }
242 
243 // Helper function to read a file descriptor until EOF or
244 // `nbytes` bytes have been read.
245 // Includes appropriate handling of EINTR.
246 ssize_t FullRead(int fd, void *ptr, size_t nbytes) {
247  ssize_t nleft, nread;
248 
249  nleft = nbytes;
250  while (nleft > 0) {
251  REISSUE_READ:
252  nread = read(fd, ptr, nleft);
253  if (nread < 0) {
254  if (errno == EINTR) {
255  goto REISSUE_READ;
256  }
257  return -1;
258  } else if (nread == 0) {
259  break;
260  }
261  nleft -= nread;
262  ptr = static_cast<char *>(ptr) + nread;
263  }
264  return (nbytes - nleft);
265 }
266 
267 // Read a file into a string.
268 // If the file is larger than 32k, it will return false.
269 bool
270 ReadShortFile(const std::string &fileName, std::string &contents, std::string &err_msg) {
271  int fd = open(fileName.c_str(), O_RDONLY, 0600);
272  if (fd < 0) {
273  err_msg = "Failed to open file '" + fileName + "': " + std::string(strerror(errno));
274  return false;
275  }
276  contents.resize(32*1024);
277 
278  auto totalRead = FullRead(fd, contents.data(), contents.size());
279  close(fd);
280  if (totalRead == -1) {
281  err_msg = "Failed to read file '" + fileName + "': " + std::string(strerror(errno));
282  return false;
283  }
284  contents.resize(totalRead);
285  return true;
286 }
287 
288 } // namespace
289 
290 std::string
291 Factory::CleanObjectName(const std::string & input_obj) {
292  std::string obj = input_obj;
293  auto loc = input_obj.find('?');
294  if (loc != std::string::npos) {
295  auto query = std::string_view(input_obj).substr(loc + 1);
296  obj = obj.substr(0, loc);
297  bool added_query = false;
298  while (!query.empty()) {
299  auto next_query_loc = query.find('&');
300  auto current_query = (next_query_loc == std::string::npos) ? query : query.substr(0, next_query_loc);
301  query = (next_query_loc == std::string::npos) ? "" : query.substr(next_query_loc + 1);
302  if (current_query.empty()) {
303  continue;
304  }
305  auto equal_loc = current_query.find('=');
306  if (equal_loc != std::string::npos) {
307  auto key = current_query.substr(0, equal_loc);
308  if (key != "authz") {
309  obj += (added_query ? "&" : "?") + std::string(current_query);
310  added_query = true;
311  }
312  } else if (current_query != "authz") {
313  obj += (added_query ? "&" : "?") + std::string(current_query);
314  added_query = true;
315  }
316  }
317  } else {
318  obj = input_obj;
319  }
320  return obj;
321 }
322 
323 std::string_view
324 Factory::ExtractHostname(const std::string_view url) {
325  auto loc = url.find("://");
326  if (loc == std::string_view::npos) {
327  return {};
328  }
329  loc += 3; // Move past "://"
330  auto slash_loc = url.find('/', loc);
331  auto query_loc = url.find('?', loc);
332  if (query_loc != std::string_view::npos && (slash_loc == std::string_view::npos || query_loc < slash_loc)) {
333  slash_loc = query_loc; // If there's a query, we stop at it
334  }
335  auto authority = url.substr(loc, slash_loc - loc);
336  if (authority.empty()) {
337  return {};
338  }
339  auto at_loc = authority.find('@');
340  if (at_loc != std::string_view::npos) {
341  // If there's an '@', we have user info, so we skip it
342  authority = authority.substr(at_loc + 1);
343  }
344  // If the authority contains a port, we need to strip it
345  auto colon_loc = authority.find(':');
346  if (colon_loc != std::string_view::npos) {
347  authority = authority.substr(0, colon_loc);
348  }
349  return authority;
350 }
351 
352 void
353 Factory::InitS3Config()
354 {
355  auto env = XrdCl::DefaultEnv::GetEnv();
356  SetDefault(env, "XrdClS3MkdirSentinel", "XRDCLS3_MKDIRSENTINEL", m_mkdir_sentinel, ".xrdcls3.dirsentinel");
357  SetDefault(env, "XrdClS3Endpoint", "XRDCLS3_ENDPOINT", m_endpoint, "");
358  SetDefault(env, "XrdClS3UrlStyle", "XRDCLS3_URLSTYLE", m_url_style, "path");
359  SetDefault(env, "XrdClS3Region", "XRDCLS3_REGION", m_region, "");
360  std::string access_key;
361  SetDefault(env, "XrdClS3AccessKeyLocation", "XRDCLS3_ACCESSKEYLOCATION", access_key, "");
362  std::string secret_key;
363  SetDefault(env, "XrdClS3SecretKeyLocation", "XRDCLS3_SECRETKEYLOCATION", secret_key, "");
364  if (!access_key.empty() && !secret_key.empty()) {
365  m_default_creds = {access_key, secret_key};
366  } else if (access_key.empty() && secret_key.empty()) {
367  m_log->Info(kLogXrdClS3, "Defaulting to public bucket access");
368  } else if (access_key.empty() && !secret_key.empty()) {
369  m_log->Warning(kLogXrdClS3, "Secret key location set (%s) but access key location is empty; authorization will not work.", secret_key.c_str());
370  } else if (!access_key.empty() && secret_key.empty()) {
371  m_log->Warning(kLogXrdClS3, "Access key location set (%s) but secret key location is empty; authorization will not work.", access_key.c_str());
372  }
373 
374  // Parse the per-bucket configuration of credentials.
375  std::string bucket_configs;
376  SetDefault(env, "XrdClS3BucketConfigs", "XRDCLS3_BUCKETCONFIGS", bucket_configs, "");
377  if (!bucket_configs.empty()) {
378  std::stringstream ss(bucket_configs);
379  std::string config_name;
380  while (std::getline(ss, config_name)) {
381  auto name = TrimView(config_name);
382  auto bucket_name_key = std::string("XrdClS3") + std::string(name) + "BucketName";
383  std::string bucket_name_val;
384  if (!env->GetString(bucket_name_key, bucket_name_val) || bucket_name_val.empty()) {
385  m_log->Warning(kLogXrdClS3, "Per-bucket config includes entry '%s' but XrdClS3%sBucketName is not set", std::string(name).c_str(), std::string(name).c_str());
386  continue;
387  }
388  auto access_key_location_key = std::string("XrdClS3") + std::string(name) + "AccessKeyLocation";
389  std::string access_key_location_val;
390  auto has_access_key = env->GetString(access_key_location_key, access_key_location_val) && !access_key_location_val.empty();
391 
392  auto secret_key_location_key = std::string("XrdClS3") + std::string(name) + "SecretKeyLocation";
393  std::string secret_key_location_val;
394  auto has_secret_key = env->GetString(secret_key_location_key, secret_key_location_val) && !secret_key_location_val.empty();
395 
396  if (has_access_key && has_secret_key) {
397  m_bucket_location_map[bucket_name_val] = {access_key_location_val, secret_key_location_val};
398  } else if (!has_access_key && !has_secret_key) {
399  // If both are empty, then it is implicitly a public bucket.
400  m_bucket_location_map[bucket_name_val] = {"", ""};
401  } else if (has_access_key && !has_secret_key) {
402  m_log->Warning(kLogXrdClS3, "Per-bucket config for entry '%s' has an access key location set (%s) but no secret key", std::string(name).c_str(), access_key_location_val.c_str());
403  } else {
404  m_log->Warning(kLogXrdClS3, "Per-bucket config for entry '%s' has an secret key location set (%s) but no access key", std::string(name).c_str(), secret_key_location_val.c_str());
405  }
406  }
407  }
408 }
409 
410 bool
411 Factory::GenerateHttpUrl(const std::string &s3_url, std::string &https_url, std::string *obj_result, std::string &err_msg) {
412  if (s3_url.substr(0, 5) != "s3://") {
413  err_msg = "Provided URL does not start with s3://";
414  return false;
415  }
416  auto loc = s3_url.find('/', 5);
417  auto bucket = s3_url.substr(5, loc - 5);
418  auto at_loc = bucket.find('@');
419  if (at_loc != std::string::npos) {
420  std::string login = "";
421  login = bucket.substr(0, at_loc);
422  bucket = bucket.substr(at_loc + 1);
423  }
424  std::string endpoint = m_endpoint;
425  std::string region = m_region;
426  if ((bucket == m_endpoint) || m_endpoint.empty()) {
427  endpoint = bucket;
428  auto old_loc = loc + 1;
429  loc = s3_url.find('/', loc + 1);
430  if (loc == std::string::npos) {
431  err_msg = "Provided S3 URL does not contain a bucket in path";
432  return false;
433  }
434  bucket = s3_url.substr(old_loc, loc - old_loc);
435  } else {
436  auto authority = ExtractHostname(s3_url);
437  std::string test_endpoint = "." + endpoint;
438  if (!m_region.empty()) {
439  auto bucket_loc = authority.rfind("." + m_region + test_endpoint);
440  if (bucket_loc != std::string::npos) {
441  bucket = authority.substr(0, bucket_loc);
442  } else {
443  auto bucket_loc = authority.rfind(test_endpoint);
444  if (bucket_loc != std::string::npos) {
445  bucket = authority.substr(0, bucket_loc);
446  }
447  }
448  } else {
449  auto bucket_loc = authority.rfind(test_endpoint);
450  if (bucket_loc != std::string::npos) {
451  bucket = authority.substr(0, bucket_loc);
452  }
453  }
454  }
455  std::string obj;
456  if (loc != std::string::npos) {
457  obj = s3_url.substr(loc + 1);
458  }
459  // Strip out "authz" query parameters; those are internal to XRootD.
460  obj = CleanObjectName(obj);
461  if (obj_result) {
462  *obj_result = obj;
463  }
464  if (m_url_style == "virtual" || m_url_style.empty()) {
465  https_url = "https://" + bucket + "." + m_region + "." + endpoint + (obj_result ? "" : ("/" + obj));
466  return true;
467  } else if (m_url_style == "path") {
468  if (!m_region.empty()) {
469  https_url = "https://" + m_region + "." + endpoint + "/" + bucket + (obj_result ? "" : ("/" + obj));
470  } else {
471  https_url = "https://" + endpoint + "/" + bucket + (obj_result ? "" : ("/" + obj));
472  }
473  return true;
474  } else {
475  err_msg = "Server configuration has invalid setting for URL style";
476  return false;
477  }
478 }
479 
480 bool
481 Factory::GenerateV4Signature(const std::string &url, const std::string &verb, std::vector<std::pair<std::string, std::string>> &headers, std::string &auth_token, std::string &err_msg) {
482  auto bucket = GetBucketFromHttpsUrl(url);
483 
484  // If we're using temporary credentials, we need to add the token
485  // header here as well. We set saKey and keyID here (well before
486  // necessary) since we'll get them for free when we get the token.
487  auto [keyId, secretKey, ok] = GetCredentialsForBucket(bucket, err_msg);
488  if (!ok) {
489  return false;
490  }
491 
492  if (secretKey.empty()) {
493  auth_token = "";
494  return true;
495  }
496 
497  //
498  // Create task 1's inputs.
499  //
500 
501  auto canonicalURI = PathEncode(url);
502 
503  // The canonical query string is the alphabetically sorted list of
504  // URI-encoded parameter names '=' values, separated by '&'s.
505  auto canonicalQueryString = CanonicalizeQueryString(url);
506 
507  // The canonical headers must include the Host header, so add that
508  // now if we don't have it.
509  if (std::find_if(headers.begin(), headers.end(),
510  [](const auto &pair) { return pair.first == "Host"; }) == headers.end()) {
511  auto host = ExtractHostname(url);
512  if (host.empty()) {
513  err_msg = "Unable to extract hostname from URL: " + url;
514  return false;
515  }
516  headers.emplace_back("Host", host);
517  }
518 
519  // S3 complains if x-amz-date isn't signed, so do this early.
520  auto iter = std::find_if(headers.begin(), headers.end(),
521  [](const auto &pair) { return !strcasecmp(pair.first.c_str(), "X-Amz-Date"); });
522  std::string date_time;
523  char date_char[] = "YYYYMMDD";
524  if (iter == headers.end()) {
525  time_t now;
526  time(&now);
527  struct tm brokenDownTime;
528  gmtime_r(&now, &brokenDownTime);
529 
530  date_time = "YYYYMMDDThhmmssZ";
531  strftime(date_time.data(), date_time.size(), "%Y%m%dT%H%M%SZ", &brokenDownTime);
532  headers.emplace_back("X-Amz-Date", date_time);
533  strftime(date_char, sizeof(date_char), "%Y%m%d", &brokenDownTime);
534  } else {
535  date_time = iter->second;
536  auto loc = date_time.find('T', 0);
537  if (loc != 8) {
538  err_msg = "Invalid value for X-Amz-Date";
539  return false;
540  }
541  memcpy(date_char, date_time.c_str(), 8);
542  }
543 
544  // The canonical payload hash is the lowercase hexadecimal string of the
545  // (SHA256) hash value of the payload or "UNSIGNED-PAYLOAD" if
546  // we are not signing the payload.
547  std::string payload_hash = "UNSIGNED-PAYLOAD";
548  iter = std::find_if(headers.begin(), headers.end(),
549  [](const auto &pair) { return !strcasecmp(pair.first.c_str(), "X-Amz-Content-Sha256"); });
550  if (iter == headers.end()) {
551  headers.emplace_back("X-Amz-Content-Sha256", payload_hash);
552  } else {
553  payload_hash = iter->second;
554  }
555 
556  // The canonical list of headers is a sorted list of lowercase header
557  // names paired via ':' with the trimmed header value, each pair
558  // terminated with a newline.
559  std::vector<std::pair<std::string, std::string>> transformed_headers;
560  transformed_headers.reserve(headers.size());
561  for (const auto &info : headers) {
562  std::string header = info.first;
563  std::transform(header.begin(), header.end(), header.begin(), &tolower);
564 
565  std::string value = info.second;
566  if (value.empty()) {
567  continue;
568  }
569  auto value_trimmed = std::string(TrimView(value));
570 
571  // Convert internal runs of spaces into single spaces.
572  unsigned left = 1;
573  unsigned right = 1;
574  bool inSpaces = false;
575  while (right < value_trimmed.length()) {
576  if (!inSpaces) {
577  if (value_trimmed[right] == ' ') {
578  inSpaces = true;
579  left = right;
580  ++right;
581  } else {
582  ++right;
583  }
584  } else {
585  if (value_trimmed[right] == ' ') {
586  ++right;
587  } else {
588  inSpaces = false;
589  value_trimmed.erase(left, right - left - 1);
590  right = left + 1;
591  }
592  }
593  }
594 
595  transformed_headers.emplace_back(header, value);
596  }
597  std::sort(transformed_headers.begin(), transformed_headers.end(),
598  [](const auto &a, const auto &b) { return a.first < b.first; });
599 
600  // The canonical list of signed headers is trivial to generate while
601  // generating the list of headers.
602  std::string signedHeaders, canonicalHeaders;
603  for (const auto &info : transformed_headers) {
604  canonicalHeaders += info.first + ":" + info.second + "\n";
605  signedHeaders += info.first + ";";
606  }
607  signedHeaders.erase(signedHeaders.end() - 1);
608 
609  // Task 1: create the canonical request.
610  auto canonicalRequest =
611  verb + "\n" + canonicalURI + "\n" + canonicalQueryString + "\n" +
612  canonicalHeaders + "\n" + signedHeaders + "\n" + payload_hash;
613 
614  //
615  // Create task 2's inputs.
616  //
617 
618  // Hash the canonical request the way we did the payload.
619  std::string canonicalRequestHash;
620  std::vector<unsigned char> messageDigest;
621  messageDigest.resize(EVP_MAX_MD_SIZE);
622  if (!ComputeSHA256(canonicalRequest, messageDigest)) {
623  err_msg = "Unable to hash canonical request.";
624  return false;
625  }
626  MessageDigestAsHex(messageDigest, canonicalRequestHash);
627 
628  // Task 2: create the string to sign.
629  auto credentialScope = std::string(date_char) + "/" + m_region + "/" + m_service + "/aws4_request";
630  auto stringToSign = std::string("AWS4-HMAC-SHA256\n") + date_time + "\n" + credentialScope + "\n" + canonicalRequestHash;
631 
632  //
633  // Creating task 3's inputs was done when we checked to see if we needed
634  // to get the security token, since they come along for free when we do.
635  //
636 
637  // Task 3: calculate the signature.
638  auto saKey = std::string("AWS4") + secretKey;
639  unsigned int mdLength = 0;
640  const unsigned char *hmac =
641  HMAC(EVP_sha256(), saKey.c_str(), saKey.length(), (unsigned char *)date_char,
642  sizeof(date_char) - 1, messageDigest.data(), &mdLength);
643  if (hmac == NULL) {
644  err_msg = "Unable to calculate HMAC for date.";
645  return false;
646  }
647 
648  unsigned int md2Length = 0;
649  unsigned char messageDigest2[EVP_MAX_MD_SIZE];
650  hmac = HMAC(EVP_sha256(), messageDigest.data(), mdLength,
651  reinterpret_cast<unsigned char *>(m_region.data()), m_region.size(), messageDigest2,
652  &md2Length);
653  if (hmac == NULL) {
654  err_msg = "Unable to calculate HMAC for region.";
655  return false;
656  }
657 
658  hmac = HMAC(EVP_sha256(), messageDigest2, md2Length,
659  reinterpret_cast<unsigned char *>(m_service.data()), m_service.size(), messageDigest.data(),
660  &mdLength);
661  if (hmac == NULL) {
662  err_msg = "Unable to calculate HMAC for service.";
663  return false;
664  }
665 
666  const char request_char[] = "aws4_request";
667  hmac = HMAC(EVP_sha256(), messageDigest.data(), messageDigest.size(), reinterpret_cast<const unsigned char *>(request_char),
668  sizeof(request_char) - 1, messageDigest2, &md2Length);
669  if (hmac == NULL) {
670  err_msg = "Unable to calculate HMAC for request.";
671  return false;
672  }
673 
674  hmac = HMAC(EVP_sha256(), messageDigest2, md2Length,
675  reinterpret_cast<unsigned char *>(stringToSign.data()),
676  stringToSign.size(), messageDigest.data(), &mdLength);
677  if (hmac == NULL) {
678  err_msg = "Unable to calculate HMAC for request string.";
679  return false;
680  }
681 
682  std::string signature;
683  MessageDigestAsHex(messageDigest, signature);
684 
685  auth_token =
686  std::string("AWS4-HMAC-SHA256 Credential=") + keyId + "/" + credentialScope +
687  ",SignedHeaders=" + signedHeaders + ",Signature=" + signature;
688  return true;
689 }
690 
691 std::string
692 Factory::GetBucketFromHttpsUrl(const std::string &url) {
693  if (m_url_style == "virtual" || m_url_style.empty()) {
694  // Virtual-hosted-style URLs are of the form https://bucket.region.endpoint/object
695  auto hostname = ExtractHostname(url);
696  if (hostname.empty()) {
697  return {};
698  }
699  auto test_endpoint = "." + m_endpoint;
700  if (!m_region.empty()) test_endpoint = "." + m_region + test_endpoint;
701  auto loc = hostname.rfind(test_endpoint);
702  if (loc == std::string::npos) {
703  if (!m_region.empty()) {
704  loc = hostname.rfind("." + m_endpoint);
705  if (loc != std::string::npos) {
706  return std::string(hostname.substr(0, loc));
707  }
708  }
709  return {};
710  }
711  return std::string(hostname.substr(0, loc));
712  } else if (m_url_style == "path") {
713  // Path style URLs are of the form https://region.endpoint/bucket/object
714  auto loc = url.find("://");
715  if (loc == std::string::npos) {
716  return {};
717  }
718  loc += 3; // Move past "://"
719  auto slash_loc = url.find('/', loc);
720  if (slash_loc == std::string::npos) {
721  return {};
722  }
723  auto bucket_start = slash_loc + 1;
724  auto bucket_end = url.find('/', bucket_start);
725  if (bucket_end == std::string::npos) {
726  return url.substr(bucket_start);
727  }
728  return url.substr(bucket_start, bucket_end - bucket_start);
729  } else {
730  // Invalid URL style
731  return {};
732  }
733 }
734 
735 std::tuple<std::string, std::string, bool>
736 Factory::GetCredentialsForBucket(const std::string &bucket, std::string &err_msg)
737 {
738  auto now = std::chrono::steady_clock::now();
739  {
740  std::shared_lock lock(m_bucket_auth_map_mutex);
741  auto iter = m_bucket_auth_map.find(bucket);
742  if (iter != m_bucket_auth_map.end()) {
743  // If we have credentials for this bucket, check if they are still valid.
744  auto &creds = iter->second.first;
745  auto &expiration = iter->second.second;
746  if (now < expiration) {
747  // Credentials are still valid, return them.
748  return {creds.m_accesskey, creds.m_secretkey, true};
749  }
750  }
751  }
752 
753  std::unique_lock lock(m_bucket_auth_map_mutex);
754  auto iter = m_bucket_location_map.find(bucket);
755  std::string access_key_location, secret_key_location;
756  if (iter == m_bucket_location_map.end()) {
757  // If we don't have credentials for this bucket, use the default.
758  if (m_default_creds.m_accesskey.empty() || m_default_creds.m_secretkey.empty()) {
759  // No credentials at all, so we assume public access.
760  m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::minutes(1)};
761  return {"", "", true};
762  }
763  access_key_location = m_default_creds.m_accesskey;
764  secret_key_location = m_default_creds.m_secretkey;
765  } else {
766  access_key_location = iter->second.m_accesskey;
767  secret_key_location = iter->second.m_secretkey;
768  }
769  if (access_key_location.empty() && secret_key_location.empty()) {
770  // If both are empty, we assume public access.
771  m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::minutes(1)};
772  return {"", "", true};
773  }
774  if (access_key_location.empty() || secret_key_location.empty()) {
775  err_msg = "No credentials available for bucket: " + bucket;
776  m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
777  return {"", "", false};
778  }
779 
780  std::string access_key, secret_key;
781  if (!ReadShortFile(access_key_location, access_key, err_msg)) {
782  m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
783  return {"", "", false};
784  }
785  access_key = TrimView(access_key);
786 
787  if (!ReadShortFile(secret_key_location, secret_key, err_msg)) {
788  m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
789  return {"", "", false};
790  }
791  secret_key = TrimView(secret_key);
792 
793  if (access_key.empty() || secret_key.empty()) {
794  err_msg = "Credentials for bucket '" + bucket + "' are empty.";
795  m_bucket_auth_map[bucket] = {{"", ""}, now + std::chrono::seconds(10)};
796  return {"", "", false};
797  }
798  m_bucket_auth_map[bucket] = {{access_key, secret_key}, now + std::chrono::minutes(1)};
799  return {access_key, secret_key, true};
800 }
801 
802 std::string
803 Factory::PathEncode(const std::string_view url) {
804  auto loc = url.find("://");
805  if (loc == std::string_view::npos) {
806  return "";
807  }
808  auto path_loc = url.find("/", loc + 3);
809  auto query_loc = url.find("?", loc + 3);
810  if (query_loc != std::string_view::npos && (path_loc == std::string_view::npos || query_loc < path_loc)) {
811  // No path, just a query string
812  return "/";
813  }
814  auto path = url.substr(path_loc, query_loc - path_loc);
815  std::string segment;
816  std::string encoded;
817 
818  size_t next = 0;
819  size_t offset = 0;
820  const auto length = path.size();
821  while (offset < length) {
822  next = strcspn(path.data() + offset, "/");
823  if (next == 0) {
824  encoded += "/";
825  offset += 1;
826  continue;
827  }
828  if (offset + next >= length) {
829  next = length - offset;
830  }
831 
832  segment = std::string(path.data() + offset, next);
833  encoded += AmazonURLEncode(segment);
834 
835  offset += next;
836  }
837  return encoded;
838 }
839 
840 // Trim left and right side of a string_view for space characters
841 std::string_view
842 Factory::TrimView(const std::string_view input_view) {
843  auto view = ltrim_view(input_view);
844  for (size_t idx = 0; idx < input_view.size(); idx++) {
845  if (!isspace(view[view.size() - 1 - idx])) {
846  return view.substr(0, view.size() - idx);
847  }
848  }
849  return "";
850 }
851 
852 extern "C"
853 {
854  void *XrdClGetPlugIn(const void*)
855  {
856  return static_cast<void*>(new Factory());
857  }
858 }
XrdVERSIONINFO(XrdClGetPlugIn, XrdClGetPlugIn) using namespace XrdClS3
void * XrdClGetPlugIn(const void *)
ssize_t read(int fildes, void *buf, size_t nbyte)
#define close(a)
Definition: XrdPosix.hh:48
#define open
Definition: XrdPosix.hh:78
XrdOucString File
void getline(uchar *buff, int blen)
virtual XrdCl::FilePlugIn * CreateFile(const std::string &url) override
Create a file plug-in for the given URL.
static std::string_view ExtractHostname(const std::string_view url)
static std::string PathEncode(const std::string_view url)
static std::string CleanObjectName(const std::string &object)
static bool GenerateHttpUrl(const std::string &s3_url, std::string &https_url, std::string *obj_result, std::string &err_msg)
static bool GenerateV4Signature(const std::string &url, const std::string &verb, std::vector< std::pair< std::string, std::string >> &headers, std::string &auth_token, std::string &err_msg)
static std::tuple< std::string, std::string, bool > GetCredentialsForBucket(const std::string &bucket, std::string &err_msg)
static std::string_view TrimView(const std::string_view str)
virtual XrdCl::FileSystemPlugIn * CreateFileSystem(const std::string &url) override
Create a file system plug-in for the given URL.
static std::string GetBucketFromHttpsUrl(const std::string &url)
static Log * GetLog()
Get default log.
static Env * GetEnv()
Get default client environment.
bool PutString(const std::string &key, const std::string &value)
Definition: XrdClEnv.cc:52
bool ImportString(const std::string &key, const std::string &shellKey)
Definition: XrdClEnv.cc:214
bool GetString(const std::string &key, std::string &value)
Definition: XrdClEnv.cc:31
An interface for file plug-ins.
An interface for file plug-ins.
Handle diagnostics.
Definition: XrdClLog.hh:101
void SetTopicName(uint64_t topic, std::string name)
Map a topic number to a string.
Definition: XrdClLog.cc:163
void Warning(uint64_t topic, const char *format,...)
Report a warning.
Definition: XrdClLog.cc:248
void Info(uint64_t topic, const char *format,...)
Print an info.
Definition: XrdClLog.cc:265
std::string_view ltrim_view(const std::string_view &input_view)
const uint64_t kLogXrdClS3