XRootD
XrdClHttpOpListdir.cc
Go to the documentation of this file.
1 /******************************************************************************/
2 /* Copyright (C) 2025, Pelican Project, Morgridge Institute for Research */
3 /* */
4 /* This file is part of the XrdClHttp client plugin for XRootD. */
5 /* */
6 /* XRootD is free software: you can redistribute it and/or modify it under */
7 /* the terms of the GNU Lesser General Public License as published by the */
8 /* Free Software Foundation, either version 3 of the License, or (at your */
9 /* option) any later version. */
10 /* */
11 /* XRootD is distributed in the hope that it will be useful, but WITHOUT */
12 /* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or */
13 /* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public */
14 /* License for more details. */
15 /* */
16 /* The copyright holder's institutional names and contributor's names may not */
17 /* be used to endorse or promote products derived from this software without */
18 /* specific prior written permission of the institution or contributor. */
19 /******************************************************************************/
20 
21 #include "XrdClHttpOps.hh"
22 #include "XrdClHttpResponses.hh"
23 #include "XrdClHttpUtil.hh"
24 
25 #include <XrdCl/XrdClLog.hh>
27 
28 #include <tinyxml.h>
29 
30 using namespace XrdClHttp;
31 
32 CurlListdirOp::CurlListdirOp(XrdCl::ResponseHandler *handler, const std::string &url, const std::string &host_addr,
33  bool set_response_info, struct timespec timeout, XrdCl::Log *logger, CreateConnCalloutType callout,
34  HeaderCallout *header_callout) :
35  CurlOperation(handler, url, timeout, logger, callout, header_callout),
36  m_response_info(set_response_info),
37  m_host_addr(host_addr)
38 {
39  m_minimum_rate = 1024.0 * 1;
40 }
41 
42 bool
44 {
45  if (!CurlOperation::Setup(curl, worker)) return false;
46  curl_easy_setopt(m_curl.get(), CURLOPT_WRITEFUNCTION, CurlListdirOp::WriteCallback);
47  curl_easy_setopt(m_curl.get(), CURLOPT_WRITEDATA, this);
48  curl_easy_setopt(m_curl.get(), CURLOPT_CUSTOMREQUEST, "PROPFIND");
49  m_headers_list.emplace_back("Depth", "1");
50 
51  return true;
52 }
53 
54 void
56 {
57  if (m_curl == nullptr) return;
58  curl_easy_setopt(m_curl.get(), CURLOPT_WRITEFUNCTION, nullptr);
59  curl_easy_setopt(m_curl.get(), CURLOPT_WRITEDATA, nullptr);
60  curl_easy_setopt(m_curl.get(), CURLOPT_CUSTOMREQUEST, nullptr);
61  curl_easy_setopt(m_curl.get(), CURLOPT_HTTPHEADER, nullptr);
63 }
64 
65 size_t
66 CurlListdirOp::WriteCallback(char *buffer, size_t size, size_t nitems, void *this_ptr)
67 {
68  auto me = static_cast<CurlListdirOp*>(this_ptr);
69  if (size * nitems + me->m_response.size() > 10'000'000) {
70  return me->FailCallback(kXR_ServerError, "Response too large for PROPFIND operation");
71  }
72  me->UpdateBytes(size * nitems);
73  me->m_response.append(buffer, size * nitems);
74  return size * nitems;
75 }
76 
77 bool CurlListdirOp::ParseProp(DavEntry &entry, TiXmlElement *prop)
78 {
79  for (auto child = prop->FirstChildElement(); child != nullptr; child = child->NextSiblingElement()) {
80  if (!strcasecmp(child->Value(), "D:resourcetype") || !strcasecmp(child->Value(), "lp1:resourcetype")) {
81  auto collection = child->FirstChildElement("D:collection");
82  entry.m_isdir = collection != nullptr;
83  if (entry.m_isdir && entry.m_size < 0) {
84  entry.m_size = 0;
85  }
86  } else if (!strcasecmp(child->Value(), "D:getcontentlength") || !strcasecmp(child->Value(), "lp1:getcontentlength")) {
87  auto size = child->GetText();
88  if (size == nullptr) {
89  return false;
90  }
91  try {
92  entry.m_size = std::stoll(size);
93  } catch (std::invalid_argument &e) {
94  return false;
95  }
96  } else if (!strcasecmp(child->Value(), "D:getlastmodified") || !strcasecmp(child->Value(), "lp1:getlastmodified")) {
97  auto lastmod = child->GetText();
98  if (lastmod == nullptr) {
99  return false;
100  }
101  struct tm tm;
102  if (strptime(lastmod, "%a, %d %b %Y %H:%M:%S", &tm) == nullptr) {
103  return false;
104  }
105  entry.m_lastmodified = timegm(&tm);
106  } else if (strcasecmp(child->Value(), "D:href") == 0) {
107  auto href = child->GetText();
108  if (href == nullptr) {
109  return false;
110  }
111  entry.m_name = href;
112  } else if (!strcasecmp(child->Value(), "D:executable") || !strcasecmp(child->Value(), "lp1:executable")) {
113  auto val = child->GetText();
114  if (val == nullptr) {
115  return false;
116  }
117  if (strcasecmp(val, "T") == 0) {
118  entry.m_isexec = true;
119  }
120  }
121  }
122  return true;
123 }
124 
125 std::pair<CurlListdirOp::DavEntry, bool>
126 CurlListdirOp::ParseResponse(TiXmlElement *response)
127 {
128  DavEntry entry;
129  bool success = false;
130  for (auto child = response->FirstChildElement(); child != nullptr; child = child->NextSiblingElement()) {
131  if (!strcasecmp(child->Value(), "D:href")) {
132  auto href = child->GetText();
133  if (href == nullptr) {
134  return {entry, false};
135  }
136  // NOTE: This is not particularly robust; it assumes that the server is only returning
137  // a depth of exactly one.
138  std::string_view href_str(href);
139  auto first_non_slash = href_str.find_last_not_of('/');
140  if (first_non_slash != std::string_view::npos) {
141  href_str = href_str.substr(0, first_non_slash + 1);
142  }
143  auto last_slash = href_str.find_last_of('/');
144  if (last_slash != std::string_view::npos) {
145  entry.m_name = href_str.substr(last_slash + 1);
146  } else {
147  entry.m_name = href;
148  }
149  continue;
150  }
151  if (strcasecmp(child->Value(), "D:propstat")) {
152  continue;
153  }
154  for (auto propstat = child->FirstChildElement(); propstat != nullptr; propstat = propstat->NextSiblingElement()) {
155  if (strcasecmp(propstat->Value(), "D:prop")) {
156  continue;
157  }
158  success = ParseProp(entry, propstat);
159  if (!success) {
160  return {entry, success};
161  }
162  }
163  }
164  return {entry, success};
165 }
166 
167 void
169 {
170  SetDone(false);
171  m_logger->Debug(kLogXrdClHttp, "CurlListdirOp::Success");
172 
173  std::unique_ptr<XrdCl::DirectoryList> dirlist(m_response_info ? new DirectoryListResponse() : new XrdCl::DirectoryList());
174 
175  TiXmlDocument doc;
176  doc.Parse(m_response.c_str());
177  if (doc.Error()) {
178  m_logger->Error(kLogXrdClHttp, "Failed to parse XML response: %s", m_response.substr(0, 1024).c_str());
179  Fail(XrdCl::errErrorResponse, kXR_FSError, "Server responded to directory listing with invalid XML");
180  return;
181  }
182 
183  auto elem = doc.RootElement();
184  if (strcasecmp(elem->Value(), "D:multistatus")) {
185  m_logger->Error(kLogXrdClHttp, "Unexpected XML response: %s", m_response.substr(0, 1024).c_str());
186  Fail(XrdCl::errErrorResponse, kXR_FSError, "Server responded to directory listing unexpected XML root");
187  return;
188  }
189  bool skip = true;
190  for (auto response = elem->FirstChildElement(); response != nullptr; response = response->NextSiblingElement()) {
191  if (strcasecmp(response->Value(), "D:response")) {
192  continue;
193  }
194 
195  auto [entry, success] = ParseResponse(response);
196  if (!success) {
197  m_logger->Error(kLogXrdClHttp, "Failed to parse response element in XML response: %s", m_response.substr(0, 1024).c_str());
198  Fail(XrdCl::errErrorResponse, kXR_FSError, "Server responded with invalid directory listing");
199  return;
200  }
201  // Skip the first entry in the response, which is the directory itself
202  if (skip) {
203  skip = false;
204  } else {
205  uint32_t flags = XrdCl::StatInfo::Flags::IsReadable;
206  if (entry.m_isdir) {
207  flags |= XrdCl::StatInfo::Flags::IsDir;
208  }
209  if (entry.m_isexec) {
210  flags |= XrdCl::StatInfo::Flags::XBitSet;
211  }
212  dirlist->Add(new XrdCl::DirectoryList::ListEntry(m_host_addr, entry.m_name, new XrdCl::StatInfo("nobody", entry.m_size, flags, entry.m_lastmodified)));
213  }
214  }
215 
216  m_logger->Debug(kLogXrdClHttp, "Successful propfind directory listing operation on %s (%u items)", m_url.c_str(), static_cast<unsigned>(dirlist->GetSize()));
217  if (m_handler == nullptr) {return;}
218 
219  if (m_response_info) {
220  static_cast<DirectoryListResponse*>(dirlist.get())->SetResponseInfo(MoveResponseInfo());
221  }
222  auto obj = new XrdCl::AnyObject();
223  obj->Set(dirlist.release());
224 
225  auto handle = m_handler;
226  m_handler = nullptr;
227  handle->HandleResponse(new XrdCl::XRootDStatus(), obj);
228 }
@ kXR_ServerError
Definition: XProtocol.hh:1044
@ kXR_FSError
Definition: XProtocol.hh:1037
static void child()
void CURL
bool Setup(CURL *curl, CurlWorker &) override
CurlListdirOp(XrdCl::ResponseHandler *handler, const std::string &url, const std::string &host_addr, bool response_info, struct timespec timeout, XrdCl::Log *logger, CreateConnCalloutType callout, HeaderCallout *header_callout)
void SetDone(bool has_failed)
int FailCallback(XErrorCode ecode, const std::string &emsg)
const std::string m_url
std::unique_ptr< CURL, void(*)(CURL *)> m_curl
virtual void Fail(uint16_t errCode, uint32_t errNum, const std::string &)
virtual void ReleaseHandle()
std::vector< std::pair< std::string, std::string > > m_headers_list
XrdCl::ResponseHandler * m_handler
std::unique_ptr< ResponseInfo > MoveResponseInfo()
virtual bool Setup(CURL *curl, CurlWorker &)
Handle diagnostics.
Definition: XrdClLog.hh:101
void Error(uint64_t topic, const char *format,...)
Report an error.
Definition: XrdClLog.cc:231
void Debug(uint64_t topic, const char *format,...)
Print a debug message.
Definition: XrdClLog.cc:282
Handle an async response.
virtual void HandleResponse(XRootDStatus *status, AnyObject *response)
Object stat info.
const uint16_t errErrorResponse
Definition: XrdClStatus.hh:105
ConnectionCallout *(*)(const std::string &, const ResponseInfo &) CreateConnCalloutType
const uint64_t kLogXrdClHttp