i3
config_parser.c
Go to the documentation of this file.
1 #undef I3__FILE__
2 #define I3__FILE__ "config_parser.c"
3 /*
4  * vim:ts=4:sw=4:expandtab
5  *
6  * i3 - an improved dynamic tiling window manager
7  * © 2009-2012 Michael Stapelberg and contributors (see also: LICENSE)
8  *
9  * config_parser.c: hand-written parser to parse configuration directives.
10  *
11  * See also src/commands_parser.c for rationale on why we use a custom parser.
12  *
13  * This parser works VERY MUCH like src/commands_parser.c, so read that first.
14  * The differences are:
15  *
16  * 1. config_parser supports the 'number' token type (in addition to 'word' and
17  * 'string'). Numbers are referred to using &num (like $str).
18  *
19  * 2. Criteria are not executed immediately, they are just stored.
20  *
21  * 3. config_parser recognizes \n and \r as 'end' token, while commands_parser
22  * ignores them.
23  *
24  * 4. config_parser skips the current line on invalid inputs and follows the
25  * nearest <error> token.
26  *
27  */
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 
35 #include "all.h"
36 
37 // Macros to make the YAJL API a bit easier to use.
38 #define y(x, ...) yajl_gen_ ## x (command_output.json_gen, ##__VA_ARGS__)
39 #define ystr(str) yajl_gen_string(command_output.json_gen, (unsigned char*)str, strlen(str))
40 
41 /*******************************************************************************
42  * The data structures used for parsing. Essentially the current state and a
43  * list of tokens for that state.
44  *
45  * The GENERATED_* files are generated by generate-commands-parser.pl with the
46  * input parser-specs/configs.spec.
47  ******************************************************************************/
48 
49 #include "GENERATED_config_enums.h"
50 
51 typedef struct token {
52  char *name;
53  char *identifier;
54  /* This might be __CALL */
56  union {
57  uint16_t call_identifier;
58  } extra;
59 } cmdp_token;
60 
61 typedef struct tokenptr {
63  int n;
65 
67 
68 /*******************************************************************************
69  * The (small) stack where identified literals are stored during the parsing
70  * of a single command (like $workspace).
71  ******************************************************************************/
72 
73 struct stack_entry {
74  /* Just a pointer, not dynamically allocated. */
75  const char *identifier;
76  enum {
77  STACK_STR = 0,
79  } type;
80  union {
81  char *str;
82  long num;
83  } val;
84 };
85 
86 /* 10 entries should be enough for everybody. */
87 static struct stack_entry stack[10];
88 
89 /*
90  * Pushes a string (identified by 'identifier') on the stack. We simply use a
91  * single array, since the number of entries we have to store is very small.
92  *
93  */
94 static void push_string(const char *identifier, const char *str) {
95  for (int c = 0; c < 10; c++) {
96  if (stack[c].identifier != NULL &&
97  strcmp(stack[c].identifier, identifier) != 0)
98  continue;
99  if (stack[c].identifier == NULL) {
100  /* Found a free slot, let’s store it here. */
102  stack[c].val.str = sstrdup(str);
103  stack[c].type = STACK_STR;
104  } else {
105  /* Append the value. */
106  char *prev = stack[c].val.str;
107  sasprintf(&(stack[c].val.str), "%s,%s", prev, str);
108  free(prev);
109  }
110  return;
111  }
112 
113  /* When we arrive here, the stack is full. This should not happen and
114  * means there’s either a bug in this parser or the specification
115  * contains a command with more than 10 identified tokens. */
116  fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
117  "in the code, or a new command which contains more than "
118  "10 identified tokens.\n");
119  exit(1);
120 }
121 
122 static void push_long(const char *identifier, long num) {
123  for (int c = 0; c < 10; c++) {
124  if (stack[c].identifier != NULL)
125  continue;
126  /* Found a free slot, let’s store it here. */
128  stack[c].val.num = num;
129  stack[c].type = STACK_LONG;
130  return;
131  }
132 
133  /* When we arrive here, the stack is full. This should not happen and
134  * means there’s either a bug in this parser or the specification
135  * contains a command with more than 10 identified tokens. */
136  fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
137  "in the code, or a new command which contains more than "
138  "10 identified tokens.\n");
139  exit(1);
140 
141 }
142 
143 static const char *get_string(const char *identifier) {
144  for (int c = 0; c < 10; c++) {
145  if (stack[c].identifier == NULL)
146  break;
147  if (strcmp(identifier, stack[c].identifier) == 0)
148  return stack[c].val.str;
149  }
150  return NULL;
151 }
152 
153 static const long get_long(const char *identifier) {
154  for (int c = 0; c < 10; c++) {
155  if (stack[c].identifier == NULL)
156  break;
157  if (strcmp(identifier, stack[c].identifier) == 0)
158  return stack[c].val.num;
159  }
160  return 0;
161 }
162 
163 static void clear_stack(void) {
164  for (int c = 0; c < 10; c++) {
165  if (stack[c].type == STACK_STR && stack[c].val.str != NULL)
166  free(stack[c].val.str);
167  stack[c].identifier = NULL;
168  stack[c].val.str = NULL;
169  stack[c].val.num = 0;
170  }
171 }
172 
173 // TODO: remove this if it turns out we don’t need it for testing.
174 #if 0
175 /*******************************************************************************
176  * A dynamically growing linked list which holds the criteria for the current
177  * command.
178  ******************************************************************************/
179 
180 typedef struct criterion {
181  char *type;
182  char *value;
183 
184  TAILQ_ENTRY(criterion) criteria;
185 } criterion;
186 
187 static TAILQ_HEAD(criteria_head, criterion) criteria =
188  TAILQ_HEAD_INITIALIZER(criteria);
189 
190 /*
191  * Stores the given type/value in the list of criteria.
192  * Accepts a pointer as first argument, since it is 'call'ed by the parser.
193  *
194  */
195 static void push_criterion(void *unused_criteria, const char *type,
196  const char *value) {
197  struct criterion *criterion = malloc(sizeof(struct criterion));
198  criterion->type = strdup(type);
199  criterion->value = strdup(value);
200  TAILQ_INSERT_TAIL(&criteria, criterion, criteria);
201 }
202 
203 /*
204  * Clears the criteria linked list.
205  * Accepts a pointer as first argument, since it is 'call'ed by the parser.
206  *
207  */
208 static void clear_criteria(void *unused_criteria) {
209  struct criterion *criterion;
210  while (!TAILQ_EMPTY(&criteria)) {
211  criterion = TAILQ_FIRST(&criteria);
212  free(criterion->type);
213  free(criterion->value);
214  TAILQ_REMOVE(&criteria, criterion, criteria);
215  free(criterion);
216  }
217 }
218 #endif
219 
220 /*******************************************************************************
221  * The parser itself.
222  ******************************************************************************/
223 
228 
229 /* A list which contains the states that lead to the current state, e.g.
230  * INITIAL, WORKSPACE_LAYOUT.
231  * When jumping back to INITIAL, statelist_idx will simply be set to 1
232  * (likewise for other states, e.g. MODE or BAR).
233  * This list is used to process the nearest error token. */
234 static cmdp_state statelist[10] = { INITIAL };
235 /* NB: statelist_idx points to where the next entry will be inserted */
236 static int statelist_idx = 1;
237 
238 #include "GENERATED_config_call.h"
239 
240 
241 static void next_state(const cmdp_token *token) {
242  cmdp_state _next_state = token->next_state;
243 
244  //printf("token = name %s identifier %s\n", token->name, token->identifier);
245  //printf("next_state = %d\n", token->next_state);
246  if (token->next_state == __CALL) {
249  _next_state = subcommand_output.next_state;
250  clear_stack();
251  }
252 
253  state = _next_state;
254  if (state == INITIAL) {
255  clear_stack();
256  }
257 
258  /* See if we are jumping back to a state in which we were in previously
259  * (statelist contains INITIAL) and just move statelist_idx accordingly. */
260  for (int i = 0; i < statelist_idx; i++) {
261  if (statelist[i] != _next_state)
262  continue;
263  statelist_idx = i+1;
264  return;
265  }
266 
267  /* Otherwise, the state is new and we add it to the list */
268  statelist[statelist_idx++] = _next_state;
269 }
270 
271 /*
272  * Returns a pointer to the start of the line (one byte after the previous \r,
273  * \n) or the start of the input, if this is the first line.
274  *
275  */
276 static const char *start_of_line(const char *walk, const char *beginning) {
277  while (*walk != '\n' && *walk != '\r' && walk >= beginning) {
278  walk--;
279  }
280 
281  return walk + 1;
282 }
283 
284 /*
285  * Copies the line and terminates it at the next \n, if any.
286  *
287  * The caller has to free() the result.
288  *
289  */
290 static char *single_line(const char *start) {
291  char *result = sstrdup(start);
292  char *end = strchr(result, '\n');
293  if (end != NULL)
294  *end = '\0';
295  return result;
296 }
297 
298 struct ConfigResult *parse_config(const char *input, struct context *context) {
299  /* Dump the entire config file into the debug log. We cannot just use
300  * DLOG("%s", input); because one log message must not exceed 4 KiB. */
301  const char *dumpwalk = input;
302  int linecnt = 1;
303  while (*dumpwalk != '\0') {
304  char *next_nl = strchr(dumpwalk, '\n');
305  if (next_nl != NULL) {
306  DLOG("CONFIG(line %3d): %.*s\n", linecnt, (int)(next_nl - dumpwalk), dumpwalk);
307  dumpwalk = next_nl + 1;
308  } else {
309  DLOG("CONFIG(line %3d): %s\n", linecnt, dumpwalk);
310  break;
311  }
312  linecnt++;
313  }
314  state = INITIAL;
315  statelist_idx = 1;
316 
317 /* A YAJL JSON generator used for formatting replies. */
318 #if YAJL_MAJOR >= 2
319  command_output.json_gen = yajl_gen_alloc(NULL);
320 #else
321  command_output.json_gen = yajl_gen_alloc(NULL, NULL);
322 #endif
323 
324  y(array_open);
325 
326  const char *walk = input;
327  const size_t len = strlen(input);
328  int c;
329  const cmdp_token *token;
330  bool token_handled;
331  linecnt = 1;
332 
333  // TODO: make this testable
334 #ifndef TEST_PARSER
335  cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
336 #endif
337 
338  /* The "<=" operator is intentional: We also handle the terminating 0-byte
339  * explicitly by looking for an 'end' token. */
340  while ((walk - input) <= len) {
341  /* Skip whitespace before every token, newlines are relevant since they
342  * separate configuration directives. */
343  while ((*walk == ' ' || *walk == '\t') && *walk != '\0')
344  walk++;
345 
346  //printf("remaining input: %s\n", walk);
347 
348  cmdp_token_ptr *ptr = &(tokens[state]);
349  token_handled = false;
350  for (c = 0; c < ptr->n; c++) {
351  token = &(ptr->array[c]);
352 
353  /* A literal. */
354  if (token->name[0] == '\'') {
355  if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) {
356  if (token->identifier != NULL)
357  push_string(token->identifier, token->name + 1);
358  walk += strlen(token->name) - 1;
359  next_state(token);
360  token_handled = true;
361  break;
362  }
363  continue;
364  }
365 
366  if (strcmp(token->name, "number") == 0) {
367  /* Handle numbers. We only accept decimal numbers for now. */
368  char *end = NULL;
369  errno = 0;
370  long int num = strtol(walk, &end, 10);
371  if ((errno == ERANGE && (num == LONG_MIN || num == LONG_MAX)) ||
372  (errno != 0 && num == 0))
373  continue;
374 
375  /* No valid numbers found */
376  if (end == walk)
377  continue;
378 
379  if (token->identifier != NULL)
380  push_long(token->identifier, num);
381 
382  /* Set walk to the first non-number character */
383  walk = end;
384  next_state(token);
385  token_handled = true;
386  break;
387  }
388 
389  if (strcmp(token->name, "string") == 0 ||
390  strcmp(token->name, "word") == 0) {
391  const char *beginning = walk;
392  /* Handle quoted strings (or words). */
393  if (*walk == '"') {
394  beginning++;
395  walk++;
396  while (*walk != '\0' && (*walk != '"' || *(walk-1) == '\\'))
397  walk++;
398  } else {
399  if (token->name[0] == 's') {
400  while (*walk != '\0' && *walk != '\r' && *walk != '\n')
401  walk++;
402  } else {
403  /* For a word, the delimiters are white space (' ' or
404  * '\t'), closing square bracket (]), comma (,) and
405  * semicolon (;). */
406  while (*walk != ' ' && *walk != '\t' &&
407  *walk != ']' && *walk != ',' &&
408  *walk != ';' && *walk != '\r' &&
409  *walk != '\n' && *walk != '\0')
410  walk++;
411  }
412  }
413  if (walk != beginning) {
414  char *str = scalloc(walk-beginning + 1);
415  /* We copy manually to handle escaping of characters. */
416  int inpos, outpos;
417  for (inpos = 0, outpos = 0;
418  inpos < (walk-beginning);
419  inpos++, outpos++) {
420  /* We only handle escaped double quotes to not break
421  * backwards compatibility with people using \w in
422  * regular expressions etc. */
423  if (beginning[inpos] == '\\' && beginning[inpos+1] == '"')
424  inpos++;
425  str[outpos] = beginning[inpos];
426  }
427  if (token->identifier)
428  push_string(token->identifier, str);
429  free(str);
430  /* If we are at the end of a quoted string, skip the ending
431  * double quote. */
432  if (*walk == '"')
433  walk++;
434  next_state(token);
435  token_handled = true;
436  break;
437  }
438  }
439 
440  if (strcmp(token->name, "end") == 0) {
441  //printf("checking for end: *%s*\n", walk);
442  if (*walk == '\0' || *walk == '\n' || *walk == '\r') {
443  next_state(token);
444  token_handled = true;
445  /* To make sure we start with an appropriate matching
446  * datastructure for commands which do *not* specify any
447  * criteria, we re-initialize the criteria system after
448  * every command. */
449  // TODO: make this testable
450 #ifndef TEST_PARSER
451  cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
452 #endif
453  linecnt++;
454  walk++;
455  break;
456  }
457  }
458  }
459 
460  if (!token_handled) {
461  /* Figure out how much memory we will need to fill in the names of
462  * all tokens afterwards. */
463  int tokenlen = 0;
464  for (c = 0; c < ptr->n; c++)
465  tokenlen += strlen(ptr->array[c].name) + strlen("'', ");
466 
467  /* Build up a decent error message. We include the problem, the
468  * full input, and underline the position where the parser
469  * currently is. */
470  char *errormessage;
471  char *possible_tokens = smalloc(tokenlen + 1);
472  char *tokenwalk = possible_tokens;
473  for (c = 0; c < ptr->n; c++) {
474  token = &(ptr->array[c]);
475  if (token->name[0] == '\'') {
476  /* A literal is copied to the error message enclosed with
477  * single quotes. */
478  *tokenwalk++ = '\'';
479  strcpy(tokenwalk, token->name + 1);
480  tokenwalk += strlen(token->name + 1);
481  *tokenwalk++ = '\'';
482  } else {
483  /* Skip error tokens in error messages, they are used
484  * internally only and might confuse users. */
485  if (strcmp(token->name, "error") == 0)
486  continue;
487  /* Any other token is copied to the error message enclosed
488  * with angle brackets. */
489  *tokenwalk++ = '<';
490  strcpy(tokenwalk, token->name);
491  tokenwalk += strlen(token->name);
492  *tokenwalk++ = '>';
493  }
494  if (c < (ptr->n - 1)) {
495  *tokenwalk++ = ',';
496  *tokenwalk++ = ' ';
497  }
498  }
499  *tokenwalk = '\0';
500  sasprintf(&errormessage, "Expected one of these tokens: %s",
501  possible_tokens);
502  free(possible_tokens);
503 
504 
505  /* Go back to the beginning of the line */
506  const char *error_line = start_of_line(walk, input);
507 
508  /* Contains the same amount of characters as 'input' has, but with
509  * the unparseable part highlighted using ^ characters. */
510  char *position = scalloc(strlen(error_line) + 1);
511  const char *copywalk;
512  for (copywalk = error_line;
513  *copywalk != '\n' && *copywalk != '\r' && *copywalk != '\0';
514  copywalk++)
515  position[(copywalk - error_line)] = (copywalk >= walk ? '^' : (*copywalk == '\t' ? '\t' : ' '));
516  position[(copywalk - error_line)] = '\0';
517 
518  ELOG("CONFIG: %s\n", errormessage);
519  ELOG("CONFIG: (in file %s)\n", context->filename);
520  char *error_copy = single_line(error_line);
521 
522  /* Print context lines *before* the error, if any. */
523  if (linecnt > 1) {
524  const char *context_p1_start = start_of_line(error_line-2, input);
525  char *context_p1_line = single_line(context_p1_start);
526  if (linecnt > 2) {
527  const char *context_p2_start = start_of_line(context_p1_start-2, input);
528  char *context_p2_line = single_line(context_p2_start);
529  ELOG("CONFIG: Line %3d: %s\n", linecnt - 2, context_p2_line);
530  free(context_p2_line);
531  }
532  ELOG("CONFIG: Line %3d: %s\n", linecnt - 1, context_p1_line);
533  free(context_p1_line);
534  }
535  ELOG("CONFIG: Line %3d: %s\n", linecnt, error_copy);
536  ELOG("CONFIG: %s\n", position);
537  free(error_copy);
538  /* Print context lines *after* the error, if any. */
539  for (int i = 0; i < 2; i++) {
540  char *error_line_end = strchr(error_line, '\n');
541  if (error_line_end != NULL && *(error_line_end + 1) != '\0') {
542  error_line = error_line_end + 1;
543  error_copy = single_line(error_line);
544  ELOG("CONFIG: Line %3d: %s\n", linecnt + i + 1, error_copy);
545  free(error_copy);
546  }
547  }
548 
549  context->has_errors = true;
550 
551  /* Format this error message as a JSON reply. */
552  y(map_open);
553  ystr("success");
554  y(bool, false);
555  /* We set parse_error to true to distinguish this from other
556  * errors. i3-nagbar is spawned upon keypresses only for parser
557  * errors. */
558  ystr("parse_error");
559  y(bool, true);
560  ystr("error");
561  ystr(errormessage);
562  ystr("input");
563  ystr(input);
564  ystr("errorposition");
565  ystr(position);
566  y(map_close);
567 
568  /* Skip the rest of this line, but continue parsing. */
569  while ((walk - input) <= len && *walk != '\n')
570  walk++;
571 
572  free(position);
573  free(errormessage);
574  clear_stack();
575 
576  /* To figure out in which state to go (e.g. MODE or INITIAL),
577  * we find the nearest state which contains an <error> token
578  * and follow that one. */
579  bool error_token_found = false;
580  for (int i = statelist_idx-1; (i >= 0) && !error_token_found; i--) {
581  cmdp_token_ptr *errptr = &(tokens[statelist[i]]);
582  for (int j = 0; j < errptr->n; j++) {
583  if (strcmp(errptr->array[j].name, "error") != 0)
584  continue;
585  next_state(&(errptr->array[j]));
586  error_token_found = true;
587  break;
588  }
589  }
590 
591  assert(error_token_found);
592  }
593  }
594 
595  y(array_close);
596 
597  return &command_output;
598 }
599 
600 /*******************************************************************************
601  * Code for building the stand-alone binary test.commands_parser which is used
602  * by t/187-commands-parser.t.
603  ******************************************************************************/
604 
605 #ifdef TEST_PARSER
606 
607 /*
608  * Logs the given message to stdout while prefixing the current time to it,
609  * but only if debug logging was activated.
610  * This is to be called by DLOG() which includes filename/linenumber
611  *
612  */
613 void debuglog(char *fmt, ...) {
614  va_list args;
615 
616  va_start(args, fmt);
617  fprintf(stdout, "# ");
618  vfprintf(stdout, fmt, args);
619  va_end(args);
620 }
621 
622 void errorlog(char *fmt, ...) {
623  va_list args;
624 
625  va_start(args, fmt);
626  vfprintf(stderr, fmt, args);
627  va_end(args);
628 }
629 
630 static int criteria_next_state;
631 
632 void cfg_criteria_init(I3_CFG, int _state) {
633  criteria_next_state = _state;
634 }
635 
636 void cfg_criteria_add(I3_CFG, const char *ctype, const char *cvalue) {
637 }
638 
639 void cfg_criteria_pop_state(I3_CFG) {
640  result->next_state = criteria_next_state;
641 }
642 
643 int main(int argc, char *argv[]) {
644  if (argc < 2) {
645  fprintf(stderr, "Syntax: %s <command>\n", argv[0]);
646  return 1;
647  }
648  struct context context;
649  context.filename = "<stdin>";
650  parse_config(argv[1], &context);
651 }
652 #endif