QJson project page QJson home page

json_scanner.cpp
1 /* This file is part of QJson
2  *
3  * Copyright (C) 2008 Flavio Castelli <flavio.castelli@gmail.com>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB. If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 
21 #include "qjson_debug.h"
22 #include "json_scanner.h"
23 #include "json_parser.hh"
24 
25 #include <ctype.h>
26 
27 #include <QtCore/QDebug>
28 #include <QtCore/QRegExp>
29 
30 #include <cassert>
31 
32 bool ishexnstring(const QString& string) {
33  for (int i = 0; i < string.length(); i++) {
34  if (isxdigit(string[i] == 0))
35  return false;
36  }
37  return true;
38 }
39 
40 JSonScanner::JSonScanner(QIODevice* io)
41  : m_io (io)
42 {
43  m_quotmarkClosed = true;
44  m_quotmarkCount = 0;
45 }
46 
47 static QString unescape( const QByteArray& ba, bool* ok ) {
48  assert( ok );
49  *ok = false;
50  QString res;
51  QByteArray seg;
52  bool bs = false;
53  for ( int i = 0, size = ba.size(); i < size; ++i ) {
54  const char ch = ba[i];
55  if ( !bs ) {
56  if ( ch == '\\' )
57  bs = true;
58  else
59  seg += ch;
60  } else {
61  bs = false;
62  switch ( ch ) {
63  case 'b':
64  seg += '\b';
65  break;
66  case 'f':
67  seg += '\f';
68  break;
69  case 'n':
70  seg += '\n';
71  break;
72  case 'r':
73  seg += '\r';
74  break;
75  case 't':
76  seg += '\t';
77  break;
78  case 'u':
79  {
80  res += QString::fromUtf8( seg );
81  seg.clear();
82 
83  if ( i > size - 5 ) {
84  //error
85  return QString();
86  }
87 
88  const QString hex_digit1 = QString::fromUtf8( ba.mid( i + 1, 2 ) );
89  const QString hex_digit2 = QString::fromUtf8( ba.mid( i + 3, 2 ) );
90  i += 4;
91 
92  if ( !ishexnstring( hex_digit1 ) || !ishexnstring( hex_digit2 ) ) {
93  qCritical() << "Not an hex string:" << hex_digit1 << hex_digit2;
94  return QString();
95  }
96  bool hexOk;
97  const ushort hex_code1 = hex_digit1.toShort( &hexOk, 16 );
98  if (!hexOk) {
99  qCritical() << "error converting hex value to short:" << hex_digit1;
100  return QString();
101  }
102  const ushort hex_code2 = hex_digit2.toShort( &hexOk, 16 );
103  if (!hexOk) {
104  qCritical() << "error converting hex value to short:" << hex_digit2;
105  return QString();
106  }
107 
108  res += QChar(hex_code2, hex_code1);
109  break;
110  }
111  case '\\':
112  seg += '\\';
113  break;
114  default:
115  seg += ch;
116  break;
117  }
118  }
119  }
120  res += QString::fromUtf8( seg );
121  *ok = true;
122  return res;
123 }
124 
125 int JSonScanner::yylex(YYSTYPE* yylval, yy::location *yylloc)
126 {
127  char ch;
128 
129  if (!m_io->isOpen()) {
130  qCritical() << "JSonScanner::yylex - io device is not open";
131  return -1;
132  }
133 
134  yylloc->step();
135 
136  do {
137  bool ret;
138  if (m_io->atEnd()) {
139  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::END";
140  return yy::json_parser::token::END;
141  }
142  else
143  ret = m_io->getChar(&ch);
144 
145  if (!ret) {
146  qCritical() << "JSonScanner::yylex - error reading from io device";
147  return -1;
148  }
149 
150  qjsonDebug() << "JSonScanner::yylex - got |" << ch << "|";
151 
152  yylloc->columns();
153 
154  if (ch == '\n' || ch == '\r')
155  yylloc->lines();
156 
157  } while (m_quotmarkClosed && (isspace(ch) != 0));
158 
159  if (m_quotmarkClosed && ((ch == 't') || (ch == 'T')
160  || (ch == 'n') || (ch == 'N'))) {
161  // check true & null value
162  const QByteArray buf = m_io->peek(3).toLower();
163 
164  if (buf.length() == 3) {
165  if (buf == "rue") {
166  m_io->read (3);
167  yylloc->columns(3);
168  qjsonDebug() << "JSonScanner::yylex - TRUE_VAL";
169  return yy::json_parser::token::TRUE_VAL;
170  }
171  else if (buf == "ull") {
172  m_io->read (3);
173  yylloc->columns(3);
174  qjsonDebug() << "JSonScanner::yylex - NULL_VAL";
175  return yy::json_parser::token::NULL_VAL;
176  }
177  }
178  }
179  else if (m_quotmarkClosed && ((ch == 'f') || (ch == 'F'))) {
180  // check false value
181  const QByteArray buf = m_io->peek(4).toLower();
182  if (buf.length() == 4) {
183  if (buf == "alse") {
184  m_io->read (4);
185  yylloc->columns(4);
186  qjsonDebug() << "JSonScanner::yylex - FALSE_VAL";
187  return yy::json_parser::token::FALSE_VAL;
188  }
189  }
190  }
191  else if (m_quotmarkClosed && ((ch == 'e') || (ch == 'E'))) {
192  QByteArray ret(1, ch);
193  const QByteArray buf = m_io->peek(1);
194  if (!buf.isEmpty()) {
195  if ((buf[0] == '+' ) || (buf[0] == '-' )) {
196  ret += m_io->read (1);
197  yylloc->columns();
198  }
199  }
200  *yylval = QVariant(QString::fromUtf8(ret));
201  return yy::json_parser::token::E;
202  }
203 
204  if (ch != '"' && !m_quotmarkClosed) {
205  // we're inside a " " block
206  QByteArray raw;
207  raw += ch;
208  char prevCh = ch;
209  bool escape_on = (ch == '\\') ? true : false;
210 
211  while ( true ) {
212  char nextCh;
213  qint64 ret = m_io->peek(&nextCh, 1);
214  if (ret != 1) {
215  if (m_io->atEnd())
216  return yy::json_parser::token::END;
217  else
218  return -1;
219  } else if ( !escape_on && nextCh == '\"' ) {
220  bool ok;
221  const QString str = unescape( raw, &ok );
222  *yylval = ok ? str : QString();
223  return ok ? yy::json_parser::token::STRING : -1;
224  }
225 #if 0
226  if ( prevCh == '\\' && nextCh != '"' && nextCh != '\\' && nextCh != '/' &&
227  nextCh != 'b' && nextCh != 'f' && nextCh != 'n' &&
228  nextCh != 'r' && nextCh != 't' && nextCh != 'u') {
229  qjsonDebug() << "Just read" << nextCh;
230  qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
231  return -1;
232  }
233 #endif
234  m_io->read(1); // consume
235  raw += nextCh;
236  prevCh = nextCh;
237  if (escape_on)
238  escape_on = false;
239  else
240  escape_on = (prevCh == '\\') ? true : false;
241 #if 0
242  if (nextCh == '\\') {
243  char buf;
244  if (m_io->getChar (&buf)) {
245  yylloc->columns();
246  if (((buf != '"') && (buf != '\\') && (buf != '/') &&
247  (buf != 'b') && (buf != 'f') && (buf != 'n') &&
248  (buf != 'r') && (buf != 't') && (buf != 'u'))) {
249  qjsonDebug() << "Just read" << buf;
250  qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
251  return -1;
252  }
253  } else {
254  qCritical() << "JSonScanner::yylex - error decoding escaped sequence : io error";
255  return -1;
256  }
257  }
258 #endif
259  }
260  }
261  else if (isdigit(ch) != 0 && m_quotmarkClosed) {
262  *yylval = QVariant(QString::fromLatin1(QByteArray(&ch,1)));
263  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
264  return yy::json_parser::token::DIGIT;
265  }
266  else if (isalnum(ch) != 0) {
267  *yylval = QVariant(QString(QChar::fromLatin1(ch)));
268  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::WORD ("
269  << ch << ")";
270  return yy::json_parser::token::STRING;
271  }
272  else if (ch == ':') {
273  // set yylval
274  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COLON";
275  return yy::json_parser::token::COLON;
276  }
277  else if (ch == '"') {
278  // yy::json_parser::token::QUOTMARK (")
279 
280  // set yylval
281  m_quotmarkCount++;
282  if (m_quotmarkCount %2 == 0) {
283  m_quotmarkClosed = true;
284  m_quotmarkCount = 0;
285  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKCLOSE";
286  return yy::json_parser::token::QUOTMARKCLOSE;
287  }
288  else {
289  m_quotmarkClosed = false;
290  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKOPEN";
291  return yy::json_parser::token::QUOTMARKOPEN;
292  }
293  }
294  else if (ch == ',') {
295  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COMMA";
296  return yy::json_parser::token::COMMA;
297  }
298  else if (ch == '.') {
299  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DOT";
300  return yy::json_parser::token::DOT;
301  }
302  else if (ch == '-') {
303  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::MINUS";
304  return yy::json_parser::token::MINUS;
305  }
306  else if (ch == '[') {
307  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_OPEN";
308  return yy::json_parser::token::SQUARE_BRACKET_OPEN;
309  }
310  else if (ch == ']') {
311  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_CLOSE";
312  return yy::json_parser::token::SQUARE_BRACKET_CLOSE;
313  }
314  else if (ch == '{') {
315  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_OPEN";
316  return yy::json_parser::token::CURLY_BRACKET_OPEN;
317  }
318  else if (ch == '}') {
319  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_CLOSE";
320  return yy::json_parser::token::CURLY_BRACKET_CLOSE;
321  }
322 
323  //unknown char!
324  //TODO yyerror?
325  qCritical() << "JSonScanner::yylex - unknown char, returning -1";
326  return -1;
327 }
328 
329 

SourceForge Logo hosts this site. Send comments to:
QJson Developers