libdap++
Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 00027 #include "config.h" 00028 00029 static char rcsid[] not_used = 00030 { "$Id: HTTPConnect.cc 24380 2011-03-28 21:47:15Z jimg $" 00031 }; 00032 00033 #ifdef HAVE_UNISTD_H 00034 #include <unistd.h> 00035 #endif 00036 00037 #include <sys/stat.h> 00038 00039 #ifdef WIN32 00040 #include <io.h> 00041 #endif 00042 00043 #include <string> 00044 #include <vector> 00045 #include <functional> 00046 #include <algorithm> 00047 #include <sstream> 00048 #include <iterator> 00049 #include <cstdlib> 00050 #include <cstring> 00051 00052 // #define DODS_DEBUG 00053 //#define DODS_DEBUG2 00054 //#define HTTP_TRACE 00055 //#define DODS_DEBUG 00056 00057 #undef USE_GETENV 00058 00059 00060 #include "debug.h" 00061 #include "mime_util.h" 00062 #include "GNURegex.h" 00063 #include "HTTPCache.h" 00064 #include "HTTPConnect.h" 00065 #include "RCReader.h" 00066 #include "HTTPResponse.h" 00067 #include "HTTPCacheResponse.h" 00068 00069 using namespace std; 00070 00071 namespace libdap { 00072 00073 // These global variables are not MT-Safe, but I'm leaving them as is because 00074 // they are used only for debugging (set them in a debugger like gdb or ddd). 00075 // They are not static because I think that many debuggers cannot access 00076 // static variables. 08/07/02 jhrg 00077 00078 // Set this to 1 to turn on libcurl's verbose mode (for debugging). 00079 int www_trace = 0; 00080 00081 // Keep the temporary files; useful for debugging. 00082 int dods_keep_temps = 0; 00083 00084 #define CLIENT_ERR_MIN 400 00085 #define CLIENT_ERR_MAX 417 00086 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] = 00087 { 00088 "Bad Request:", 00089 "Unauthorized: Contact the server administrator.", 00090 "Payment Required.", 00091 "Forbidden: Contact the server administrator.", 00092 "Not Found: The data source or server could not be found.\n\ 00093 Often this means that the OPeNDAP server is missing or needs attention;\n\ 00094 Please contact the server administrator.", 00095 "Method Not Allowed.", 00096 "Not Acceptable.", 00097 "Proxy Authentication Required.", 00098 "Request Time-out.", 00099 "Conflict.", 00100 "Gone:.", 00101 "Length Required.", 00102 "Precondition Failed.", 00103 "Request Entity Too Large.", 00104 "Request URI Too Large.", 00105 "Unsupported Media Type.", 00106 "Requested Range Not Satisfiable.", 00107 "Expectation Failed." 00108 }; 00109 00110 #define SERVER_ERR_MIN 500 00111 #define SERVER_ERR_MAX 505 00112 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = 00113 { 00114 "Internal Server Error.", 00115 "Not Implemented.", 00116 "Bad Gateway.", 00117 "Service Unavailable.", 00118 "Gateway Time-out.", 00119 "HTTP Version Not Supported." 00120 }; 00121 00124 static string 00125 http_status_to_string(int status) 00126 { 00127 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX) 00128 return string(http_client_errors[status - CLIENT_ERR_MIN]); 00129 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX) 00130 return string(http_server_errors[status - SERVER_ERR_MIN]); 00131 else 00132 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org."); 00133 } 00134 00139 class ParseHeader : public unary_function<const string &, void> 00140 { 00141 ObjectType type; // What type of object is in the stream? 00142 string server; // Server's version string. 00143 string protocol; // Server's protocol version. 00144 string location; // Url returned by server 00145 00146 public: 00147 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0") 00148 { } 00149 00150 void operator()(const string &line) 00151 { 00152 string name, value; 00153 parse_mime_header(line, name, value); 00154 if (name == "content-description") { 00155 DBG2(cerr << name << ": " << value << endl); 00156 type = get_description_type(value); 00157 } 00158 // The second test (== "dods/0.0") tests if xopendap-server has already 00159 // been seen. If so, use that header in preference to the old 00160 // XDODS-Server header. jhrg 2/7/06 00161 else if (name == "xdods-server" && server == "dods/0.0") { 00162 DBG2(cerr << name << ": " << value << endl); 00163 server = value; 00164 } 00165 else if (name == "xopendap-server") { 00166 DBG2(cerr << name << ": " << value << endl); 00167 server = value; 00168 } 00169 else if (name == "xdap") { 00170 DBG2(cerr << name << ": " << value << endl); 00171 protocol = value; 00172 } 00173 else if (server == "dods/0.0" && name == "server") { 00174 DBG2(cerr << name << ": " << value << endl); 00175 server = value; 00176 } 00177 else if (name == "location") { 00178 DBG2(cerr << name << ": " << value << endl); 00179 location = value; 00180 } 00181 else if (type == unknown_type && name == "content-type" 00182 && line.find("text/html") != string::npos) { 00183 DBG2(cerr << name << ": text/html..." << endl); 00184 type = web_error; 00185 } 00186 } 00187 00188 ObjectType get_object_type() 00189 { 00190 return type; 00191 } 00192 00193 string get_server() 00194 { 00195 return server; 00196 } 00197 00198 string get_protocol() 00199 { 00200 return protocol; 00201 } 00202 00203 string get_location() { 00204 return location; 00205 } 00206 }; 00207 00224 static size_t 00225 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) 00226 { 00227 DBG2(cerr << "Inside the header parser." << endl); 00228 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs); 00229 00230 // Grab the header, minus the trailing newline. Or \r\n pair. 00231 string complete_line; 00232 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r') 00233 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2)); 00234 else 00235 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1)); 00236 00237 // Store all non-empty headers that are not HTTP status codes 00238 if (complete_line != "" && complete_line.find("HTTP") == string::npos) { 00239 DBG(cerr << "Header line: " << complete_line << endl); 00240 hdrs->push_back(complete_line); 00241 } 00242 00243 return size * nmemb; 00244 } 00245 00247 static int 00248 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) 00249 { 00250 string message(msg, size); 00251 00252 switch (info) { 00253 case CURLINFO_TEXT: 00254 cerr << "Text: " << message; break; 00255 case CURLINFO_HEADER_IN: 00256 cerr << "Header in: " << message; break; 00257 case CURLINFO_HEADER_OUT: 00258 cerr << "Header out: " << message; break; 00259 case CURLINFO_DATA_IN: 00260 cerr << "Data in: " << message; break; 00261 case CURLINFO_DATA_OUT: 00262 cerr << "Data out: " << message; break; 00263 case CURLINFO_END: 00264 cerr << "End: " << message; break; 00265 #ifdef CURLINFO_SSL_DATA_IN 00266 case CURLINFO_SSL_DATA_IN: 00267 cerr << "SSL Data in: " << message; break; 00268 #endif 00269 #ifdef CURLINFO_SSL_DATA_OUT 00270 case CURLINFO_SSL_DATA_OUT: 00271 cerr << "SSL Data out: " << message; break; 00272 #endif 00273 default: 00274 cerr << "Curl info: " << message; break; 00275 } 00276 return 0; 00277 } 00278 00282 void 00283 HTTPConnect::www_lib_init() 00284 { 00285 d_curl = curl_easy_init(); 00286 if (!d_curl) 00287 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl."); 00288 00289 // Now set options that will remain constant for the duration of this 00290 // CURL object. 00291 00292 // Set the proxy host. 00293 if (!d_rcr->get_proxy_server_host().empty()) { 00294 DBG(cerr << "Setting up a proxy server." << endl); 00295 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() 00296 << endl); 00297 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() 00298 << endl); 00299 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() 00300 << endl); 00301 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00302 d_rcr->get_proxy_server_host().c_str()); 00303 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, 00304 d_rcr->get_proxy_server_port()); 00305 00306 // As of 4/21/08 only NTLM, Digest and Basic work. 00307 #ifdef CURLOPT_PROXYAUTH 00308 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY); 00309 #endif 00310 00311 // Password might not be required. 06/21/04 jhrg 00312 if (!d_rcr->get_proxy_server_userpw().empty()) 00313 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, 00314 d_rcr->get_proxy_server_userpw().c_str()); 00315 } 00316 00317 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer); 00318 // We have to set FailOnError to false for any of the non-Basic 00319 // authentication schemes to work. 07/28/03 jhrg 00320 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0); 00321 00322 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM, 00323 // choosing the the 'safest' one supported by the server. 00324 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg 00325 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY); 00326 00327 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1); 00328 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1); 00329 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers); 00330 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth 00331 // param of save_raw_http_headers to a vector<string> object. 00332 00333 // Follow 302 (redirect) responses 00334 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1); 00335 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5); 00336 00337 // If the user turns off SSL validation... 00338 if (!d_rcr->get_validate_ssl() == 0) { 00339 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0); 00340 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0); 00341 } 00342 00343 // Look to see if cookies are turned on in the .dodsrc file. If so, 00344 // activate here. We honor 'session cookies' (cookies without an 00345 // expiration date) here so that session-base SSO systems will work as 00346 // expected. 00347 if (!d_cookie_jar.empty()) { 00348 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl); 00349 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str()); 00350 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1); 00351 } 00352 00353 if (www_trace) { 00354 cerr << "Curl version: " << curl_version() << endl; 00355 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1); 00356 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug); 00357 } 00358 } 00359 00363 class BuildHeaders : public unary_function<const string &, void> 00364 { 00365 struct curl_slist *d_cl; 00366 00367 public: 00368 BuildHeaders() : d_cl(0) 00369 {} 00370 00371 void operator()(const string &header) 00372 { 00373 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." 00374 << endl); 00375 d_cl = curl_slist_append(d_cl, header.c_str()); 00376 } 00377 00378 struct curl_slist *get_headers() 00379 { 00380 return d_cl; 00381 } 00382 }; 00383 00398 long 00399 HTTPConnect::read_url(const string &url, FILE *stream, 00400 vector<string> *resp_hdrs, 00401 const vector<string> *headers) 00402 { 00403 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str()); 00404 00405 #ifdef WIN32 00406 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA) 00407 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as 00408 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the 00409 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of 00410 // this issue is that one should not pass a FILE * to a windows DLL. Close 00411 // inspection of libcurl yields that their default write function when using 00412 // the CURLOPT_WRITEDATA is just "fwrite". 00413 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00414 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite); 00415 #else 00416 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00417 #endif 00418 00419 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00420 ostream_iterator<string>(cerr, "\n"))); 00421 00422 BuildHeaders req_hdrs; 00423 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), 00424 req_hdrs); 00425 if (headers) 00426 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs); 00427 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers()); 00428 00429 // Turn off the proxy for this URL? 00430 bool temporary_proxy = false; 00431 if ((temporary_proxy = url_uses_no_proxy_for(url))) { 00432 DBG(cerr << "Suppress proxy for url: " << url << endl); 00433 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0); 00434 } 00435 00436 string::size_type at_sign = url.find('@'); 00437 // Assume username:password present *and* assume it's an HTTP URL; it *is* 00438 // HTTPConnect, after all. 7 is position after "http://"; the second arg 00439 // to substr() is the sub string length. 00440 if (at_sign != url.npos) 00441 d_upstring = url.substr(7, at_sign - 7); 00442 00443 if (!d_upstring.empty()) 00444 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str()); 00445 00446 // Pass save_raw_http_headers() a pointer to the vector<string> where the 00447 // response headers may be stored. Callers can use the resp_hdrs 00448 // value/result parameter to get the raw response header information . 00449 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs); 00450 00451 CURLcode res = curl_easy_perform(d_curl); 00452 00453 // Free the header list and null the value in d_curl. 00454 curl_slist_free_all(req_hdrs.get_headers()); 00455 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0); 00456 00457 // Reset the proxy? 00458 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty()) 00459 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00460 d_rcr->get_proxy_server_host().c_str()); 00461 00462 if (res != 0) 00463 throw Error(d_error_buffer); 00464 00465 long status; 00466 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status); 00467 if (res != 0) 00468 throw Error(d_error_buffer); 00469 00470 return status; 00471 } 00472 00476 bool 00477 HTTPConnect::url_uses_proxy_for(const string &url) throw() 00478 { 00479 if (d_rcr->is_proxy_for_used()) { 00480 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str()); 00481 int index = 0, matchlen; 00482 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1; 00483 } 00484 00485 return false; 00486 } 00487 00491 bool 00492 HTTPConnect::url_uses_no_proxy_for(const string &url) throw() 00493 { 00494 return d_rcr->is_no_proxy_for_used() 00495 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos; 00496 } 00497 00498 // Public methods. Mostly... 00499 00506 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""), 00507 d_cookie_jar(""), 00508 d_dap_client_protocol_major(2), 00509 d_dap_client_protocol_minor(0) 00510 00511 { 00512 d_accept_deflate = rcr->get_deflate(); 00513 d_rcr = rcr; 00514 00515 // Load in the default headers to send with a request. The empty Pragma 00516 // headers overrides libcurl's default Pragma: no-cache header (which 00517 // will disable caching by Squid, et c.). The User-Agent header helps 00518 // make server logs more readable. 05/05/03 jhrg 00519 d_request_headers.push_back(string("Pragma:")); 00520 string user_agent = string("User-Agent: ") + string(CNAME) 00521 + string("/") + string(CVER); 00522 d_request_headers.push_back(user_agent); 00523 if (d_accept_deflate) 00524 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00525 00526 // HTTPCache::instance returns a valid ptr or 0. 00527 if (d_rcr->get_use_cache()) 00528 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(), 00529 true); 00530 else 00531 d_http_cache = 0; 00532 00533 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec 00534 << ")" << endl); 00535 00536 if (d_http_cache) { 00537 d_http_cache->set_cache_enabled(d_rcr->get_use_cache()); 00538 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0); 00539 d_http_cache->set_max_size(d_rcr->get_max_cache_size()); 00540 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj()); 00541 d_http_cache->set_default_expiration(d_rcr->get_default_expires()); 00542 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0); 00543 } 00544 00545 d_cookie_jar = rcr->get_cookie_jar(); 00546 00547 www_lib_init(); // This may throw either Error or InternalErr 00548 } 00549 00550 HTTPConnect::~HTTPConnect() 00551 { 00552 DBG2(cerr << "Entering the HTTPConnect dtor" << endl); 00553 00554 curl_easy_cleanup(d_curl); 00555 00556 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl); 00557 } 00558 00571 HTTPResponse * 00572 HTTPConnect::fetch_url(const string &url) 00573 { 00574 #ifdef HTTP_TRACE 00575 cout << "GET " << url << " HTTP/1.0" << endl; 00576 #endif 00577 00578 HTTPResponse *stream; 00579 00580 if (d_http_cache && d_http_cache->is_cache_enabled()) { 00581 stream = caching_fetch_url(url); 00582 } 00583 else { 00584 stream = plain_fetch_url(url); 00585 } 00586 00587 #ifdef HTTP_TRACE 00588 stringstream ss; 00589 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl; 00590 for (size_t i = 0; i < stream->get_headers()->size(); i++) { 00591 ss << stream->get_headers()->at(i) << endl; 00592 } 00593 cout << ss.str(); 00594 #endif 00595 00596 ParseHeader parser; 00597 00598 parser = for_each(stream->get_headers()->begin(), 00599 stream->get_headers()->end(), ParseHeader()); 00600 00601 #ifdef HTTP_TRACE 00602 cout << endl << endl; 00603 #endif 00604 00605 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu) 00606 if (parser.get_location() != "" && 00607 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) { 00608 delete stream; 00609 return fetch_url(parser.get_location()); 00610 } 00611 00612 stream->set_type(parser.get_object_type()); 00613 stream->set_version(parser.get_server()); 00614 stream->set_protocol(parser.get_protocol()); 00615 00616 return stream; 00617 } 00618 00619 // Look around for a reasonable place to put a temporary file. Check first 00620 // the value of the TMPDIR env var. If that does not yeild a path that's 00621 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as 00622 // defined in stdio.h. If both come up empty, then use `./'. 00623 00624 // Change this to a version that either returns a string or an open file 00625 // descriptor. Use information from https://buildsecurityin.us-cert.gov/ 00626 // (see open()) to make it more secure. Ideal solution: get deserialize() 00627 // methods to read from a stream returned by libcurl, not from a temporary 00628 // file. 9/21/07 jhrg Updated to use strings, so other misc changes. 3/22/11 00629 static string 00630 get_tempfile_template(const string &file_template) 00631 { 00632 string c; 00633 00634 // Windows has one idea of the standard name(s) for a temporary files dir 00635 #ifdef WIN32 00636 // white list for a WIN32 directory 00637 Regex directory("[-a-zA-Z0-9_:\\]*"); 00638 00639 // If we're OK to use getenv(), try it. 00640 #ifdef USE_GETENV 00641 c = getenv("TEMP"); 00642 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00643 goto valid_temp_directory; 00644 00645 c= getenv("TMP"); 00646 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00647 goto valid_temp_directory; 00648 #endif // USE_GETENV 00649 00650 // The windows default 00651 c = "c:\tmp"; 00652 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00653 goto valid_temp_directory; 00654 00655 #else // Unix/Linux/OSX has another... 00656 // white list for a directory 00657 Regex directory("[-a-zA-Z0-9_/]*"); 00658 #ifdef USE_GETENV 00659 c = getenv("TMPDIR"); 00660 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0)) 00661 goto valid_temp_directory; 00662 #endif // USE_GETENV 00663 00664 // Unix defines this sometimes - if present, use it. 00665 #ifdef P_tmpdir 00666 if (access(P_tmpdir, W_OK | R_OK) == 0) { 00667 c = P_tmpdir; 00668 goto valid_temp_directory; 00669 } 00670 #endif 00671 00672 // The Unix default 00673 c = "/tmp"; 00674 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0)) 00675 goto valid_temp_directory; 00676 00677 #endif // WIN32 00678 00679 // If we found nothing useful, use the current directory 00680 c = "."; 00681 00682 valid_temp_directory: 00683 00684 #ifdef WIN32 00685 c += "\\" + file_template; 00686 #else 00687 c += "/" + file_template; 00688 #endif 00689 00690 return c; 00691 } 00692 00711 string 00712 get_temp_file(FILE *&stream) throw(InternalErr) 00713 { 00714 string dods_temp = get_tempfile_template((string)"dodsXXXXXX"); 00715 00716 vector<char> pathname(dods_temp.length() + 1); 00717 00718 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length()); 00719 00720 DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl); 00721 00722 // Open truncated for update. NB: mkstemp() returns a file descriptor. 00723 #if defined(WIN32) || defined(TEST_WIN32_TEMPS) 00724 stream = fopen(_mktemp(&pathname[0]), "w+b"); 00725 #else 00726 // Make sure that temp files are accessible only by the owner. 00727 umask(077); 00728 stream = fdopen(mkstemp(&pathname[0]), "w+"); 00729 #endif 00730 00731 if (!stream) { 00732 throw InternalErr(__FILE__, __LINE__, 00733 "Failed to open a temporary file for the data values (" 00734 + dods_temp + ")"); 00735 } 00736 00737 dods_temp = &pathname[0]; 00738 return dods_temp; 00739 } 00740 00742 void 00743 close_temp(FILE *s, const string &name) 00744 { 00745 int res = fclose(s); 00746 if (res) 00747 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res)); 00748 00749 res = unlink(name.c_str()); 00750 if (res != 0) 00751 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res)); 00752 } 00753 00775 HTTPResponse * 00776 HTTPConnect::caching_fetch_url(const string &url) 00777 { 00778 DBG(cerr << "Is this URL (" << url << ") in the cache?... "); 00779 00780 vector<string> *headers = new vector<string>; 00781 string file_name; 00782 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name); 00783 if (!s) { 00784 // url not in cache; get it and cache it 00785 DBGN(cerr << "no; getting response and caching." << endl); 00786 delete headers; headers = 0; 00787 time_t now = time(0); 00788 HTTPResponse *rs = plain_fetch_url(url); 00789 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream()); 00790 00791 return rs; 00792 } 00793 else { // url in cache 00794 DBGN(cerr << "yes... "); 00795 00796 if (d_http_cache->is_url_valid(url)) { // url in cache and valid 00797 DBGN(cerr << "and it's valid; using cached response." << endl); 00798 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache); 00799 return crs; 00800 } 00801 else { // url in cache but not valid; validate 00802 DBGN(cerr << "but it's not valid; validating... "); 00803 00804 d_http_cache->release_cached_response(s); // This closes 's' 00805 headers->clear(); 00806 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url); 00807 FILE *body = 0; 00808 string dods_temp = get_temp_file(body); 00809 time_t now = time(0); // When was the request made (now). 00810 long http_status; 00811 00812 try { 00813 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs); 00814 rewind(body); 00815 } 00816 catch (Error &e) { 00817 close_temp(body, dods_temp); 00818 delete headers; 00819 throw ; 00820 } 00821 00822 switch (http_status) { 00823 case 200: { // New headers and new body 00824 DBGN(cerr << "read a new response; caching." << endl); 00825 00826 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body); 00827 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp); 00828 00829 return rs; 00830 } 00831 00832 case 304: { // Just new headers, use cached body 00833 DBGN(cerr << "cached response valid; updating." << endl); 00834 00835 close_temp(body, dods_temp); 00836 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers); 00837 string file_name; 00838 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name); 00839 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache); 00840 return crs; 00841 } 00842 00843 default: { // Oops. 00844 close_temp(body, dods_temp); 00845 if (http_status >= 400) { 00846 delete headers; headers = 0; 00847 string msg = "Error while reading the URL: "; 00848 msg += url; 00849 msg 00850 += ".\nThe OPeNDAP server returned the following message:\n"; 00851 msg += http_status_to_string(http_status); 00852 throw Error(msg); 00853 } 00854 else { 00855 delete headers; headers = 0; 00856 throw InternalErr(__FILE__, __LINE__, 00857 "Bad response from the HTTP server: " + long_to_string(http_status)); 00858 } 00859 } 00860 } 00861 } 00862 } 00863 00864 throw InternalErr(__FILE__, __LINE__, "Should never get here"); 00865 } 00866 00878 HTTPResponse * 00879 HTTPConnect::plain_fetch_url(const string &url) 00880 { 00881 DBG(cerr << "Getting URL: " << url << endl); 00882 FILE *stream = 0; 00883 string dods_temp = get_temp_file(stream); 00884 vector<string> *resp_hdrs = new vector<string>; 00885 00886 int status = -1; 00887 try { 00888 status = read_url(url, stream, resp_hdrs); // Throws Error. 00889 if (status >= 400) { 00890 delete resp_hdrs; 00891 string msg = "Error while reading the URL: "; 00892 msg += url; 00893 msg += ".\nThe OPeNDAP server returned the following message:\n"; 00894 msg += http_status_to_string(status); 00895 throw Error(msg); 00896 } 00897 } 00898 00899 catch (Error &e) { 00900 delete resp_hdrs; 00901 close_temp(stream, dods_temp); 00902 throw; 00903 } 00904 00905 rewind(stream); 00906 00907 return new HTTPResponse(stream, status, resp_hdrs, dods_temp); 00908 } 00909 00921 void 00922 HTTPConnect::set_accept_deflate(bool deflate) 00923 { 00924 d_accept_deflate = deflate; 00925 00926 if (d_accept_deflate) { 00927 if (find(d_request_headers.begin(), d_request_headers.end(), 00928 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end()) 00929 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00930 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00931 ostream_iterator<string>(cerr, "\n"))); 00932 } 00933 else { 00934 vector<string>::iterator i; 00935 i = remove_if(d_request_headers.begin(), d_request_headers.end(), 00936 bind2nd(equal_to<string>(), 00937 string("Accept-Encoding: deflate, gzip, compress"))); 00938 d_request_headers.erase(i, d_request_headers.end()); 00939 } 00940 } 00941 00943 class HeaderMatch : public unary_function<const string &, bool> { 00944 const string &d_header; 00945 public: 00946 HeaderMatch(const string &header) : d_header(header) {} 00947 bool operator()(const string &arg) { return arg.find(d_header) == 0; } 00948 }; 00949 00958 void 00959 HTTPConnect::set_xdap_protocol(int major, int minor) 00960 { 00961 // Look for, and remove if one exists, an XDAP-Accept header 00962 vector<string>::iterator i; 00963 i = find_if(d_request_headers.begin(), d_request_headers.end(), 00964 HeaderMatch("XDAP-Accept:")); 00965 if (i != d_request_headers.end()) 00966 d_request_headers.erase(i); 00967 00968 // Record and add the new header value 00969 d_dap_client_protocol_major = major; 00970 d_dap_client_protocol_minor = minor; 00971 ostringstream xdap_accept; 00972 xdap_accept << "XDAP-Accept: " << major << "." << minor; 00973 00974 d_request_headers.push_back(xdap_accept.str()); 00975 00976 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00977 ostream_iterator<string>(cerr, "\n"))); 00978 } 00979 00995 void 00996 HTTPConnect::set_credentials(const string &u, const string &p) 00997 { 00998 if (u.empty()) 00999 return; 01000 01001 // Store the credentials locally. 01002 d_username = u; 01003 d_password = p; 01004 01005 d_upstring = u + ":" + p; 01006 } 01007 01008 } // namespace libdap