BESUtil.cc

Go to the documentation of this file.
00001 // BESUtil.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 //
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 //
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025 
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include "config.h"
00034 
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037 
00038 #if HAVE_UNISTD_H
00039 #include <unistd.h>
00040 #endif
00041 
00042 #include <cstdio>
00043 #include <cerrno>
00044 #include <cstring>
00045 #include <cstdlib>
00046 #include <sstream>
00047 #include <iostream>
00048 
00049 using std::istringstream ;
00050 using std::cout ;
00051 using std::endl ;
00052 
00053 #include "BESUtil.h"
00054 #include "BESForbiddenError.h"
00055 #include "BESNotFoundError.h"
00056 #include "BESInternalError.h"
00057 
00058 #define CRLF "\r\n"
00059 
00064 void
00065 BESUtil::set_mime_text( ostream &strm )
00066 {
00067     strm << "HTTP/1.0 200 OK" << CRLF ;
00068     strm << "XBES-Server: " << PACKAGE_STRING << CRLF ;
00069 
00070     const time_t t = time(0);
00071     strm << "Date: " << rfc822_date(t).c_str() << CRLF ;
00072     strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF ;
00073 
00074     strm << "Content-Type: text/plain" << CRLF ;
00075     // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
00076     strm << "Content-Description: unknown" << CRLF ;
00077     strm << CRLF ;
00078 }
00079 
00084 void
00085 BESUtil::set_mime_html( ostream &strm )
00086 {
00087     strm << "HTTP/1.0 200 OK" << CRLF ;
00088     strm << "XBES-Server: " << PACKAGE_STRING << CRLF ;
00089 
00090     const time_t t = time(0);
00091     strm << "Date: " << rfc822_date(t).c_str() << CRLF ;
00092     strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF ;
00093 
00094     strm << "Content-type: text/html" << CRLF ;
00095     // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
00096     strm << "Content-Description: unknown" << CRLF ;
00097     strm << CRLF ;
00098 }
00099 
00100 // Return a MIME rfc-822 date. The grammar for this is:
00101 //       date-time   =  [ day "," ] date time        ; dd mm yy
00102 //                                                   ;  hh:mm:ss zzz
00103 //
00104 //       day         =  "Mon"  / "Tue" /  "Wed"  / "Thu"
00105 //                   /  "Fri"  / "Sat" /  "Sun"
00106 //
00107 //       date        =  1*2DIGIT month 2DIGIT        ; day month year
00108 //                                                   ;  e.g. 20 Jun 82
00109 //                   NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
00110 //
00111 //       month       =  "Jan"  /  "Feb" /  "Mar"  /  "Apr"
00112 //                   /  "May"  /  "Jun" /  "Jul"  /  "Aug"
00113 //                   /  "Sep"  /  "Oct" /  "Nov"  /  "Dec"
00114 //
00115 //       time        =  hour zone                    ; ANSI and Military
00116 //
00117 //       hour        =  2DIGIT ":" 2DIGIT [":" 2DIGIT]
00118 //                                                   ; 00:00:00 - 23:59:59
00119 //
00120 //       zone        =  "UT"  / "GMT"                ; Universal Time
00121 //                                                   ; North American : UT
00122 //                   /  "EST" / "EDT"                ;  Eastern:  - 5/ - 4
00123 //                   /  "CST" / "CDT"                ;  Central:  - 6/ - 5
00124 //                   /  "MST" / "MDT"                ;  Mountain: - 7/ - 6
00125 //                   /  "PST" / "PDT"                ;  Pacific:  - 8/ - 7
00126 //                   /  1ALPHA                       ; Military: Z = UT;
00127 //                                                   ;  A:-1; (J not used)
00128 //                                                   ;  M:-12; N:+1; Y:+12
00129 //                   / ( ("+" / "-") 4DIGIT )        ; Local differential
00130 //                                                   ;  hours+min. (HHMM)
00131 
00132 static const char *days[]={"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
00133 static const char *months[]={"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
00134                         "Aug", "Sep", "Oct", "Nov", "Dec"};
00135 
00145 string
00146 BESUtil::rfc822_date(const time_t t)
00147 {
00148     struct tm *stm = gmtime(&t);
00149     char d[256];
00150 
00151     snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm->tm_wday],
00152             stm->tm_mday, months[stm->tm_mon],
00153             1900 + stm->tm_year,
00154             stm->tm_hour, stm->tm_min, stm->tm_sec);
00155     d[255] = '\0';
00156     return string(d);
00157 }
00158 
00159 string
00160 BESUtil::unhexstring( string s )
00161 {
00162     int val;
00163     istringstream ss( s ) ;
00164     ss >> std::hex >> val;
00165     char tmp_str[2];
00166     tmp_str[0] = static_cast<char>(val);
00167     tmp_str[1] = '\0';
00168     return string(tmp_str);
00169 }
00170 
00171 // I modified this to mirror the version in libdap. The change allows several
00172 // escape sequences to by listed in 'except'. jhrg 2/18/09
00173 string
00174 BESUtil::www2id(const string &in, const string &escape, const string &except)
00175 {
00176     string::size_type i = 0;
00177     string res = in;
00178     while ((i = res.find_first_of(escape, i)) != string::npos) {
00179         if (except.find(res.substr(i, 3)) != string::npos) {
00180             i += 3;
00181             continue;
00182         }
00183         res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00184     }
00185 
00186     return res;
00187 }
00188 
00189 string
00190 BESUtil::lowercase( const string &s )
00191 {
00192     string return_string = s ;
00193     for( int j = 0; j < return_string.length(); j++ )
00194     {
00195         return_string[j] = (char)tolower( return_string[j] ) ;
00196     }
00197 
00198     return return_string ;
00199 }
00200 
00201 string
00202 BESUtil::unescape( const string &s )
00203 {
00204     bool done = false ;
00205     string::size_type index = 0 ;
00206     string::size_type new_index = 0 ;
00207     string new_str ;
00208     while( !done )
00209     {
00210         string::size_type bs = s.find( '\\', index ) ;
00211         if( bs == string::npos )
00212         {
00213             new_str += s.substr( index, s.length() - index ) ;
00214             done = true ;
00215         }
00216         else
00217         {
00218             new_str += s.substr( index, bs - index ) ;
00219             new_str += s[bs+1] ;
00220             index = bs+2 ;
00221         }
00222     }
00223 
00224     return new_str ;
00225 }
00226 
00248 void
00249 BESUtil::check_path( const string &path,
00250                      const string &root,
00251                      bool follow_sym_links )
00252 {
00253     // if nothing is passed in path, then the path checks out since root is
00254     // assumed to be valid.
00255     if( path == "" )
00256         return ;
00257 
00258     // make sure there are no ../ in the directory, backing up in any way is
00259     // not allowed.
00260     string::size_type dotdot = path.find( ".." ) ;
00261     if( dotdot != string::npos )
00262     {
00263         string s = (string)"You are not allowed to access the node " + path;
00264         throw BESForbiddenError( s, __FILE__, __LINE__ ) ;
00265     }
00266 
00267     // What I want to do is to take each part of path and check to see if it
00268     // is a symbolic link and it is accessible. If everything is ok, add the
00269     // next part of the path.
00270     bool done = false ;
00271 
00272     // what is remaining to check
00273     string rem = path ;
00274     if( rem[0] == '/' )
00275         rem = rem.substr( 1, rem.length() - 1 ) ;
00276     if( rem[rem.length()-1] == '/' )
00277         rem = rem.substr( 0, rem.length() - 1 ) ;
00278 
00279     // full path of the thing to check
00280     string fullpath = root ;
00281     if( fullpath[fullpath.length()-1] == '/' )
00282     {
00283         fullpath = fullpath.substr( 0, fullpath.length() - 1 ) ;
00284     }
00285 
00286     // path checked so far
00287     string checked ;
00288 
00289     while( !done )
00290     {
00291         size_t slash = rem.find( '/' ) ;
00292         if( slash == string::npos )
00293         {
00294             fullpath = fullpath + "/" + rem ;
00295             checked = checked + "/" + rem ;
00296             done = true ;
00297         }
00298         else
00299         {
00300             fullpath = fullpath + "/" + rem.substr( 0, slash ) ;
00301             checked = checked + "/" + rem.substr( 0, slash ) ;
00302             rem = rem.substr( slash + 1, rem.length() - slash ) ;
00303         }
00304 
00305         if( !follow_sym_links )
00306         {
00307             struct stat buf;
00308             int statret = lstat( fullpath.c_str(), &buf ) ;
00309             if( statret == -1 )
00310             {
00311                 int errsv = errno ;
00312                 // stat failed, so not accessible. Get the error string,
00313                 // store in error, and throw exception
00314                 char *s_err = strerror( errsv ) ;
00315                 string error = "Unable to access node " + checked + ": " ;
00316                 if( s_err )
00317                 {
00318                     error = error + s_err ;
00319                 }
00320                 else
00321                 {
00322                     error = error + "unknow access error" ;
00323                 }
00324                 // ENOENT means that the node wasn't found. Otherise, access
00325                 // is denied for some reason
00326                 if( errsv == ENOENT )
00327                 {
00328                     throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
00329                 }
00330                 else
00331                 {
00332                     throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
00333                 }
00334             }
00335             else
00336             {
00337                 // lstat was successful, now check if sym link
00338                 if( S_ISLNK( buf.st_mode ) )
00339                 {
00340                     string error = "You do not have permission to access "
00341                                    + checked ;
00342                     throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
00343                 }
00344             }
00345         }
00346         else
00347         {
00348             // just do a stat and see if we can access the thing. If we
00349             // can't, get the error information and throw an exception
00350             struct stat buf ;
00351             int statret = stat( fullpath.c_str(), &buf ) ;
00352             if( statret == -1 )
00353             {
00354                 int errsv = errno ;
00355                 // stat failed, so not accessible. Get the error string,
00356                 // store in error, and throw exception
00357                 char *s_err = strerror( errsv ) ;
00358                 string error = "Unable to access node " + checked + ": " ;
00359                 if( s_err )
00360                 {
00361                     error = error + s_err ;
00362                 }
00363                 else
00364                 {
00365                     error = error + "unknow access error" ;
00366                 }
00367                 // ENOENT means that the node wasn't found. Otherise, access
00368                 // is denied for some reason
00369                 if( errsv == ENOENT )
00370                 {
00371                     throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
00372                 }
00373                 else
00374                 {
00375                     throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
00376                 }
00377             }
00378         }
00379     }
00380 }
00381 
00382 char *
00383 BESUtil::fastpidconverter( char *buf, int base )
00384 {
00385     return fastpidconverter( getpid(), buf, base ) ;
00386 }
00387 
00388 char *
00389 BESUtil::fastpidconverter(
00390       long val,                                 /* value to be converted */
00391       char *buf,                                /* output string         */
00392       int base)                                 /* conversion base       */
00393 {
00394       ldiv_t r;                                 /* result of val / base  */
00395 
00396       if (base > 36 || base < 2)          /* no conversion if wrong base */
00397       {
00398             *buf = '\0';
00399             return buf;
00400       }
00401       if (val < 0)
00402             *buf++ = '-';
00403       r = ldiv (labs(val), base);
00404 
00405       /* output digits of val/base first */
00406 
00407       if (r.quot > 0)
00408             buf = fastpidconverter ( r.quot, buf, base);
00409       /* output last digit */
00410 
00411       *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int)r.rem];
00412       *buf   = '\0';
00413       return buf;
00414 }
00415 
00416 void
00417 BESUtil::removeLeadingAndTrailingBlanks( string &key )
00418 {
00419     if( !key.empty() )
00420     {
00421         string::size_type first = key.find_first_not_of( " \t\n\r" ) ;
00422         string::size_type last = key.find_last_not_of( " \t\n\r" ) ;
00423         if( first == string::npos ) key = "" ;
00424         else
00425         {
00426             string::size_type num = last - first + 1 ;
00427             string new_key = key.substr( first, num ) ;
00428             key = new_key ;
00429         }
00430     }
00431 }
00432 
00433 string
00434 BESUtil::entity( char c )
00435 {
00436     switch( c )
00437     {
00438         case '>': return "&gt;";
00439         case '<': return "&lt;";
00440         case '&': return "&amp;";
00441         case '\'': return "&apos;";
00442         case '\"': return "&quot;";
00443     }
00444 }
00445 
00452 string
00453 BESUtil::id2xml( string in, const string &not_allowed )
00454 {
00455     string::size_type i = 0 ;
00456 
00457     while( ( i = in.find_first_of( not_allowed, i ) ) != string::npos )
00458     {
00459         in.replace( i, 1, entity( in[i] ) ) ;
00460         i++ ;
00461     }
00462 
00463     return in ;
00464 }
00465 
00471 string
00472 BESUtil::xml2id(string in)
00473 {
00474     string::size_type i = 0;
00475 
00476     while ((i = in.find("&gt;", i)) != string::npos)
00477         in.replace(i, 4, ">");
00478 
00479     i = 0;
00480     while ((i = in.find("&lt;", i)) != string::npos)
00481         in.replace(i, 4, "<");
00482 
00483     i = 0;
00484     while ((i = in.find("&amp;", i)) != string::npos)
00485         in.replace(i, 5, "&");
00486 
00487     i = 0;
00488     while ((i = in.find("&apos;", i)) != string::npos)
00489         in.replace(i, 6, "'");
00490 
00491     i = 0;
00492     while ((i = in.find("&quot;", i)) != string::npos)
00493         in.replace(i, 6, "\"");
00494 
00495     return in;
00496 }
00497 
00511 void
00512 BESUtil::explode( char delim, const string &str, list<string> &values )
00513 {
00514     std::string::size_type start = 0 ;
00515     std::string::size_type qstart = 0 ;
00516     std::string::size_type adelim = 0 ;
00517     std::string::size_type aquote = 0 ;
00518     bool done = false ;
00519     while( !done )
00520     {
00521         string aval ;
00522         if( str[start] == '"' )
00523         {
00524             bool endquote = false ;
00525             qstart = start+1 ;
00526             while( !endquote )
00527             {
00528                 aquote = str.find( '"', qstart ) ;
00529                 if( aquote == string::npos )
00530                 {
00531                     string currval = str.substr( start, str.length() - start ) ;
00532                     string err = "BESUtil::explode - No end quote after value "
00533                                  + currval ;
00534                     throw BESInternalError( err, __FILE__, __LINE__ ) ;
00535                 }
00536                 // could be an escaped escape character and an escaped
00537                 // quote, or an escaped escape character and a quote
00538                 if( str[aquote-1] == '\\' )
00539                 {
00540                     if( str[aquote-2] == '\\' )
00541                     {
00542                         endquote = true ;
00543                         qstart = aquote + 1 ;
00544                     }
00545                     else
00546                     {
00547                         qstart = aquote+1 ;
00548                     }
00549                 }
00550                 else
00551                 {
00552                     endquote = true ;
00553                     qstart = aquote + 1 ;
00554                 }
00555             }
00556             if( str[qstart] != delim && qstart != str.length() )
00557             {
00558                 string currval = str.substr( start, qstart - start ) ;
00559                 string err = "BESUtil::explode - No delim after end quote "
00560                              + currval ;
00561                 throw BESInternalError( err, __FILE__, __LINE__ ) ;
00562             }
00563             if( qstart == str.length() )
00564             {
00565                 adelim = string::npos ;
00566             }
00567             else
00568             {
00569                 adelim = qstart ;
00570             }
00571         }
00572         else
00573         {
00574             adelim = str.find( delim, start ) ;
00575         }
00576         if( adelim == string::npos )
00577         {
00578             aval = str.substr( start, str.length() - start ) ;
00579             done = true ;
00580         }
00581         else
00582         {
00583             aval = str.substr( start, adelim - start ) ;
00584         }
00585         values.push_back( aval ) ;
00586         start = adelim + 1 ;
00587         if( start == str.length() )
00588         {
00589             done = true ;
00590         }
00591     }
00592 }
00593 
00604 string
00605 BESUtil::implode( const list<string> &values, char delim )
00606 {
00607     string result ;
00608     list<string>::const_iterator i = values.begin() ;
00609     list<string>::const_iterator e = values.end() ;
00610     bool first = true ;
00611     string::size_type d; // = string::npos ;
00612     for( ; i != e; i++ )
00613     {
00614         if( !first ) result += delim ;
00615         d = (*i).find( delim ) ;
00616         if( d != string::npos && (*i)[0] != '"' )
00617         {
00618             string err = (string)"BESUtil::implode - delimiter exists in value "
00619                          + (*i) ;
00620             throw BESInternalError( err, __FILE__, __LINE__ ) ;
00621         }
00622         //d = string::npos ;
00623         result += (*i) ;
00624         first = false ;
00625     }
00626     return result ;
00627 }
00628