libdap++  Updated for version 3.8.2
util.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 // (c) COPYRIGHT URI/MIT 1994-1999
27 // Please read the full copyright statement in the file COPYRIGHT_URI.
28 //
29 // Authors:
30 // jhrg,jimg James Gallagher <jgallagher@gso.uri.edu>
31 
32 // Utility functions used by the api.
33 //
34 // jhrg 9/21/94
35 
36 #include "config.h"
37 
38 static char rcsid[] not_used =
39  {"$Id: util.cc 25112 2011-12-29 21:44:54Z jimg $"
40  };
41 
42 #include <cassert>
43 #include <cstring>
44 
45 #include <ctype.h>
46 #ifndef TM_IN_SYS_TIME
47 #include <time.h>
48 #else
49 #include <sys/time.h>
50 #endif
51 
52 #ifndef WIN32
53 #include <unistd.h> // for stat
54 #else
55 #include <io.h>
56 #include <fcntl.h>
57 #include <process.h>
58 #endif
59 
60 #include <sys/types.h>
61 #include <sys/stat.h>
62 
63 #include <string>
64 #include <sstream>
65 #include <vector>
66 #include <algorithm>
67 #include <stdexcept>
68 
69 #include "BaseType.h"
70 #include "Str.h"
71 #include "Url.h"
72 #include "Sequence.h"
73 #include "Error.h"
74 #include "parser.h"
75 #include "util.h"
76 #include "GNURegex.h"
77 #include "debug.h"
78 
79 using namespace std;
80 
81 namespace libdap {
82 
83 // Remove spaces from the start of a URL and from the start of any constraint
84 // expression it contains. 4/7/98 jhrg
85 
94 string
95 prune_spaces(const string &name)
96 {
97  // If the URL does not even have white space return.
98  if (name.find_first_of(' ') == name.npos)
99  return name;
100  else {
101  // Strip leading spaces from http://...
102  unsigned int i = name.find_first_not_of(' ');
103  string tmp_name = name.substr(i);
104 
105  // Strip leading spaces from constraint part (following `?').
106  unsigned int j = tmp_name.find('?') + 1;
107  i = tmp_name.find_first_not_of(' ', j);
108  tmp_name.erase(j, i - j);
109 
110  return tmp_name;
111  }
112 }
113 
114 // Compare elements in a list of (BaseType *)s and return true if there are
115 // no duplicate elements, otherwise return false.
116 
117 bool
118 unique_names(vector<BaseType *> l, const string &var_name,
119  const string &type_name, string &msg)
120 {
121  // copy the identifier names to a vector
122  vector<string> names(l.size());
123 
124  int nelem = 0;
125  typedef std::vector<BaseType *>::const_iterator citer ;
126  for (citer i = l.begin(); i != l.end(); i++) {
127  assert(*i);
128  names[nelem++] = (*i)->name();
129  DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl);
130  }
131 
132  // sort the array of names
133  sort(names.begin(), names.end());
134 
135 #ifdef DODS_DEBUG2
136  cout << "unique:" << endl;
137  for (int ii = 0; ii < nelem; ++ii)
138  cout << "NAMES[" << ii << "]=" << names[ii] << endl;
139 #endif
140 
141  // sort the array of names
142  sort(names.begin(), names.end());
143 
144 #ifdef DODS_DEBUG2
145  cout << "unique:" << endl;
146  for (int ii = 0; ii < nelem; ++ii)
147  cout << "NAMES[" << ii << "]=" << names[ii] << endl;
148 #endif
149 
150  // look for any instance of consecutive names that are ==
151  for (int j = 1; j < nelem; ++j) {
152  if (names[j-1] == names[j]) {
153  ostringstream oss;
154  oss << "The variable `" << names[j]
155  << "' is used more than once in " << type_name << " `"
156  << var_name << "'";
157  msg = oss.str();
158 
159  return false;
160  }
161  }
162 
163  return true;
164 }
165 
166 const char *
168 {
169  return LIBDAP_ROOT;
170 }
171 
172 extern "C"
173  const char *
175 {
176  return PACKAGE_VERSION;
177 }
178 
179 extern "C"
180  const char *
182 {
183  return PACKAGE_NAME;
184 }
185 
186 // Since Server4 can get compressed responses using Tomcat, bail on this
187 // software (which complicates building under Win32). It can be turned on
188 // for use with Server3 in configure.ac.
189 
190 #if COMPRESSION_FOR_SERVER3
191 
192 // Return true if the program deflate exists and is executable by user, group
193 // and world. If this returns false the caller should assume that server
194 // filter programs won't be able to find the deflate program and thus won't
195 // be able to compress the return document.
196 // NB: this works because this function uses the same rules as compressor()
197 // (which follows) to look for deflate. 2/11/98 jhrg
198 
199 bool
201 {
202  DBG(cerr << "Entering deflate_exists...");
203 
204  int status = false;
205  struct stat buf;
206 
207 #ifdef WIN32
208  string deflate = (string)libdap_root() + "\\bin\\deflate";
209 #else
210  string deflate = (string)libdap_root() + "/sbin/deflate";
211 #endif
212 
213  // Check that the file exists...
214  // First look for deflate using DODS_ROOT (compile-time constant subsumed
215  // by an environment variable) and if that fails in the CWD which finds
216  // the program when it is in the same directory as the dispatch script
217  // and other server components. 2/11/98 jhrg
218  status = (stat(deflate.c_str(), &buf) == 0)
219 #ifdef WIN32
220  || (stat(".\\deflate", &buf) == 0);
221 #else
222  || (stat("./deflate", &buf) == 0);
223 #endif
224 
225  // and that it can be executed.
226 #ifdef WIN32
227  status &= (buf.st_mode & _S_IEXEC);
228 #else
229  status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH);
230 #endif
231  DBG(cerr << " returning " << (status ? "true." : "false.") << endl);
232  return (status != 0);
233 }
234 
235 FILE *
236 compressor(FILE *output, int &childpid)
237 {
238 #ifdef WIN32
239  // There is no such thing as a "fork" under win32. This makes it so that
240  // we have to juggle handles more aggressively. This code hasn't been
241  // tested and shown to work as of 07/2000.
242  int pid, data[2];
243  int hStdIn, hStdOut;
244 
245  if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) {
246  cerr << "Could not create IPC channel for compressor process"
247  << endl;
248  return NULL;
249  }
250 
251 
252  // This sets up for the child process, but it has to be reversed for the
253  // parent after the spawn takes place.
254 
255  // Store stdin, stdout so we have something to restore to
256  hStdIn = _dup(_fileno(stdin));
257  hStdOut = _dup(_fileno(stdout));
258 
259  // Child is to read from read end of pipe
260  if (_dup2(data[0], _fileno(stdin)) != 0) {
261  cerr << "dup of child stdin failed" << endl;
262  return NULL;
263  }
264  // Child is to write its's stdout to file
265  if (_dup2(_fileno(output), _fileno(stdout)) != 0) {
266  cerr << "dup of child stdout failed" << endl;
267  return NULL;
268  }
269 
270  // Spawn child process
271  string deflate = "deflate.exe";
272  if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(),
273  "-c", "5", "-s", NULL)) < 0) {
274  cerr << "Could not spawn to create compressor process" << endl;
275  return NULL;
276  }
277 
278  // Restore stdin, stdout for parent and close duplicate copies
279  if (_dup2(hStdIn, _fileno(stdin)) != 0) {
280  cerr << "dup of stdin failed" << endl;
281  return NULL;
282  }
283  if (_dup2(hStdOut, _fileno(stdout)) != 0) {
284  cerr << "dup of stdout failed" << endl;
285  return NULL;
286  }
287  close(hStdIn);
288  close(hStdOut);
289 
290  // Tell the parent that it reads from the opposite end of the
291  // place where the child writes.
292  close(data[0]);
293  FILE *input = fdopen(data[1], "w");
294  setbuf(input, 0);
295  childpid = pid;
296  return input;
297 
298 #else
299  FILE *ret_file = NULL ;
300 
301  int pid, data[2];
302 
303  if (pipe(data) < 0) {
304  cerr << "Could not create IPC channel for compressor process"
305  << endl;
306  return NULL;
307  }
308 
309  if ((pid = fork()) < 0) {
310  cerr << "Could not fork to create compressor process" << endl;
311  return NULL;
312  }
313 
314  // The parent process closes the write end of the Pipe, and creates a
315  // FILE * using fdopen(). The FILE * is used by the calling program to
316  // access the read end of the Pipe.
317 
318  if (pid > 0) { // Parent, pid is that of the child
319  close(data[0]);
320  ret_file = fdopen(data[1], "w");
321  setbuf(ret_file, 0);
322  childpid = pid;
323  }
324  else { // Child
325  close(data[1]);
326  dup2(data[0], 0); // Read from the pipe...
327  dup2(fileno(output), 1); // Write to the FILE *output.
328 
329  DBG(cerr << "Opening compression stream." << endl);
330 
331  // First try to run deflate using DODS_ROOT (the value read from the
332  // DODS_ROOT environment variable takes precedence over the value set
333  // at build time. If that fails, try the CWD.
334  string deflate = (string)libdap_root() + "/sbin/deflate";
335  (void) execl(deflate.c_str(), "deflate", "-c", "5", "-s", NULL);
336  (void) execl("./deflate", "deflate", "-c", "5", "-s", NULL);
337  cerr << "Warning: Could not start compressor!" << endl;
338  cerr << "defalte should be in DODS_ROOT/etc or in the CWD!"
339  << endl;
340  _exit(127); // Only here if an error occurred.
341  }
342 
343  return ret_file ;
344 #endif
345 }
346 
347 #endif // COMPRESSION_FOR_SERVER3
348 
349 // This function returns a pointer to the system time formated for an httpd
350 // log file.
351 
352 string
354 {
355  time_t TimBin;
356 
357  if (time(&TimBin) == (time_t) - 1)
358  return string("time() error");
359  else {
360  string TimStr = ctime(&TimBin);
361  return TimStr.substr(0, TimStr.size() - 2); // remove the \n
362  }
363 }
364 
365 void
366 downcase(string &s)
367 {
368  for (unsigned int i = 0; i < s.length(); i++)
369  s[i] = tolower(s[i]);
370 }
371 
372 bool
373 is_quoted(const string &s)
374 {
375  return (!s.empty() && s[0] == '\"' && s[s.length()-1] == '\"');
376 }
377 
378 string
379 remove_quotes(const string &s)
380 {
381  if (is_quoted(s))
382  return s.substr(1, s.length() - 2);
383  else
384  return s;
385 }
386 
387 #ifdef WIN32
388 // Sometimes need to buffer within an iostream under win32 when
389 // we want the output to go to a FILE *. This is because
390 // it's not possible to associate an ofstream with a FILE *
391 // under the Standard ANSI C++ Library spec. Unix systems
392 // don't follow the spec in this regard.
393 void flush_stream(iostream ios, FILE *out)
394 {
395  int nbytes;
396  char buffer[512];
397 
398  ios.get(buffer, 512, NULL);
399  while ((nbytes = ios.gcount()) > 0) {
400  fwrite(buffer, 1, nbytes, out);
401  ios.get(buffer, 512, NULL);
402  }
403 
404  return;
405 }
406 #endif
407 
408 // Jose Garcia
409 void
410 append_long_to_string(long val, int base, string &str_val)
411 {
412  // The array digits contains 36 elements which are the
413  // posible valid digits for out bases in the range
414  // [2,36]
415  char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
416  // result of val / base
417  ldiv_t r;
418 
419  if (base > 36 || base < 2) {
420  // no conversion if wrong base
421  std::invalid_argument ex("The parameter base has an invalid value.");
422  throw ex;
423  }
424  if (val < 0)
425  str_val += '-';
426  r = ldiv(labs(val), base);
427 
428  // output digits of val/base first
429  if (r.quot > 0)
430  append_long_to_string(r.quot, base, str_val);
431 
432  // output last digit
433 
434  str_val += digits[(int)r.rem];
435 }
436 
437 // base defaults to 10
438 string
439 long_to_string(long val, int base)
440 {
441  string s;
442  append_long_to_string(val, base, s);
443  return s;
444 }
445 
446 // Jose Garcia
447 void append_double_to_string(const double &num, string &str)
448 {
449  // s having 100 characters should be enough for sprintf to do its job.
450  // I want to banish all instances of sprintf. 10/5/2001 jhrg
451  ostringstream oss;
452  oss.precision(9);
453  oss << num;
454  str += oss.str();
455 }
456 
457 string
458 double_to_string(const double &num)
459 {
460  string s;
461  append_double_to_string(num, s);
462  return s;
463 }
464 
465 // Get the version number of the core software. Defining this means that
466 // clients of the DAP don't have to rely on config.h for the version
467 // number.
468 string
470 {
471  return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ;
472 }
473 
474 // Given a pathname, return the file at the end of the path. This is used
475 // when reporting errors (maybe other times, too) to keep the server from
476 // revealing too much about its organization when sending error responses
477 // back to clients. 10/11/2000 jhrg
478 // MT-safe. 08/05/02 jhrg
479 
480 #ifdef WIN32
481 static const char path_sep[] =
482  {"\\"
483  };
484 #else
485 static const char path_sep[] =
486  {"/"
487  };
488 #endif
489 
490 string
491 path_to_filename(string path)
492 {
493  string::size_type pos = path.rfind(path_sep);
494 
495  return (pos == string::npos) ? path : path.substr(++pos);
496 }
497 
502 string
503 file_to_string(FILE *fp)
504 {
505  rewind(fp);
506  ostringstream oss;
507  char c;
508  while (fread(&c, 1, 1, fp))
509  oss << c;
510  return oss.str();
511 }
512 
513 int
514 wildcmp(const char *wild, const char *string)
515 {
516  // Written by Jack Handy - jakkhandy@hotmail.com
517 
518  if (!wild || !string)
519  return 0;
520 
521  const char *cp = NULL, *mp = NULL;
522 
523  while ((*string) && (*wild != '*')) {
524  if ((*wild != *string) && (*wild != '?')) {
525  return 0;
526  }
527  wild++;
528  string++;
529  }
530 
531  while (*string) {
532  if (*wild == '*') {
533  if (!*++wild) {
534  return 1;
535  }
536  mp = wild;
537  cp = string+1;
538  } else if ((*wild == *string) || (*wild == '?')) {
539  wild++;
540  string++;
541  } else {
542  wild = mp;
543  string = cp++;
544  }
545  }
546 
547  while (*wild == '*') {
548  wild++;
549  }
550  return !*wild;
551 }
552 
553 #define CHECK_BIT( tab, bit ) ( tab[ (bit)/8 ] & (1<<( (bit)%8 )) )
554 #define BITLISTSIZE 16 /* bytes used for [chars] in compiled expr */
555 
556 static void globchars( const char *s, const char *e, char *b );
557 
558 /*
559  * glob: match a string against a simple pattern
560  *
561  * Understands the following patterns:
562  *
563  * * any number of characters
564  * ? any single character
565  * [a-z] any single character in the range a-z
566  * [^a-z] any single character not in the range a-z
567  * \x match x
568  *
569  * @param c The pattern
570  * @param s The string
571  * @return 0 on success, -1 if the pattern is exhausted but there are
572  * characters remaining in the string and 1 if the pattern does not match
573  */
574 
575 int
576 glob(const char *c, const char *s)
577 {
578  if (!c || !s)
579  return 1;
580 
581  char bitlist[BITLISTSIZE];
582  int i = 0;
583  for (;;) {
584  ++i;
585  switch (*c++) {
586  case '\0':
587  return *s ? -1 : 0;
588 
589  case '?':
590  if (!*s++)
591  return i/*1*/;
592  break;
593 
594  case '[': {
595  /* scan for matching ] */
596 
597  const char *here = c;
598  do {
599  if (!*c++)
600  return i/*1*/;
601  } while (here == c || *c != ']');
602  c++;
603 
604  /* build character class bitlist */
605 
606  globchars(here, c, bitlist);
607 
608  if (!CHECK_BIT( bitlist, *(unsigned char *)s ))
609  return i/*1*/;
610  s++;
611  break;
612  }
613 
614  case '*': {
615  const char *here = s;
616 
617  while (*s)
618  s++;
619 
620  /* Try to match the rest of the pattern in a recursive */
621  /* call. If the match fails we'll back up chars, retrying. */
622 
623  while (s != here) {
624  int r;
625 
626  /* A fast path for the last token in a pattern */
627 
628  r = *c ? glob(c, s) : *s ? -1 : 0;
629 
630  if (!r)
631  return 0;
632  else if (r < 0)
633  return i/*1*/;
634 
635  --s;
636  }
637  break;
638  }
639 
640  case '\\':
641  /* Force literal match of next char. */
642 
643  if (!*c || *s++ != *c++)
644  return i/*1*/;
645  break;
646 
647  default:
648  if (*s++ != c[-1])
649  return i/*1*/;
650  break;
651  }
652  }
653 }
654 
655 /*
656  * globchars() - build a bitlist to check for character group match
657  */
658 
659 static void globchars(const char *s, const char *e, char *b) {
660  int neg = 0;
661 
662  memset(b, '\0', BITLISTSIZE);
663 
664  if (*s == '^')
665  neg++, s++;
666 
667  while (s < e) {
668  int c;
669 
670  if (s + 2 < e && s[1] == '-') {
671  for (c = s[0]; c <= s[2]; c++)
672  b[c / 8] |= (1 << (c % 8));
673  s += 3;
674  }
675  else {
676  c = *s++;
677  b[c / 8] |= (1 << (c % 8));
678  }
679  }
680 
681  if (neg) {
682  int i;
683  for (i = 0; i < BITLISTSIZE; i++)
684  b[i] ^= 0377;
685  }
686 
687  /* Don't include \0 in either $[chars] or $[^chars] */
688 
689  b[0] &= 0376;
690 }
691 
692 int wmatch(const char *pat, const char *s)
693 {
694  if (!pat || !s)
695  return 0;
696 
697  switch (*pat) {
698  case '\0': return (*s == '\0');
699  case '?': return (*s != '\0') && wmatch(pat+1, s+1);
700  case '*': return wmatch(pat+1, s) || (*s != '\0' && wmatch(pat, s+1));
701  default: return (*s == *pat) && wmatch(pat+1, s+1);
702  }
703 }
704 
707 
713 bool
714 size_ok(unsigned int sz, unsigned int nelem)
715 {
716  return (sz > 0 && nelem < UINT_MAX / sz);
717 }
718 
735 bool
736 pathname_ok(const string &path, bool strict)
737 {
738  if (path.length() > 255)
739  return false;
740 
741  Regex name("[-0-9A-z_./]+");
742  if (!strict)
743  name = "[:print:]+";
744 
745  string::size_type len = path.length();
746  int result = name.match(path.c_str(), len);
747  // Protect against casting too big an uint to int
748  // if LEN is bigger than the max int32, the second test can't work
749  if (len > INT_MAX || result != static_cast<int>(len))
750  return false;
751 
752  return true;
753 }
754 
756 
757 } // namespace libdap
758