comparison sqlite/tokenize.c @ 1434:b6b61becdf4e trunk

[svn] - add sqlite/ directory
author nenolod
date Thu, 27 Jul 2006 22:41:31 -0700
parents
children
comparison
equal deleted inserted replaced
1433:3cbe3d14ea68 1434:b6b61becdf4e
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** An tokenizer for SQL
13 **
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
17 **
18 ** $Id: tokenize.c,v 1.118 2006/04/04 01:54:55 drh Exp $
19 */
20 #include "sqliteInt.h"
21 #include "os.h"
22 #include <ctype.h>
23 #include <stdlib.h>
24
25 /*
26 ** The charMap() macro maps alphabetic characters into their
27 ** lower-case ASCII equivalent. On ASCII machines, this is just
28 ** an upper-to-lower case map. On EBCDIC machines we also need
29 ** to adjust the encoding. Only alphabetic characters and underscores
30 ** need to be translated.
31 */
32 #ifdef SQLITE_ASCII
33 # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
34 #endif
35 #ifdef SQLITE_EBCDIC
36 # define charMap(X) ebcdicToAscii[(unsigned char)X]
37 const unsigned char ebcdicToAscii[] = {
38 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */
47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */
48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */
49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
51 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */
52 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */
53 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */
55 };
56 #endif
57
58 /*
59 ** The sqlite3KeywordCode function looks up an identifier to determine if
60 ** it is a keyword. If it is a keyword, the token code of that keyword is
61 ** returned. If the input is not a keyword, TK_ID is returned.
62 **
63 ** The implementation of this routine was generated by a program,
64 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
65 ** The output of the mkkeywordhash.c program is written into a file
66 ** named keywordhash.h and then included into this source file by
67 ** the #include below.
68 */
69 #include "keywordhash.h"
70
71
72 /*
73 ** If X is a character that can be used in an identifier then
74 ** IdChar(X) will be true. Otherwise it is false.
75 **
76 ** For ASCII, any character with the high-order bit set is
77 ** allowed in an identifier. For 7-bit characters,
78 ** sqlite3IsIdChar[X] must be 1.
79 **
80 ** For EBCDIC, the rules are more complex but have the same
81 ** end result.
82 **
83 ** Ticket #1066. the SQL standard does not allow '$' in the
84 ** middle of identfiers. But many SQL implementations do.
85 ** SQLite will allow '$' in identifiers for compatibility.
86 ** But the feature is undocumented.
87 */
88 #ifdef SQLITE_ASCII
89 const char sqlite3IsIdChar[] = {
90 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
91 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
97 };
98 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20]))
99 #endif
100 #ifdef SQLITE_EBCDIC
101 const char sqlite3IsIdChar[] = {
102 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
103 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */
104 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */
105 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */
106 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */
107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */
108 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */
109 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
111 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */
113 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */
115 };
116 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40]))
117 #endif
118
119
120 /*
121 ** Return the length of the token that begins at z[0].
122 ** Store the token type in *tokenType before returning.
123 */
124 static int getToken(const unsigned char *z, int *tokenType){
125 int i, c;
126 switch( *z ){
127 case ' ': case '\t': case '\n': case '\f': case '\r': {
128 for(i=1; isspace(z[i]); i++){}
129 *tokenType = TK_SPACE;
130 return i;
131 }
132 case '-': {
133 if( z[1]=='-' ){
134 for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
135 *tokenType = TK_COMMENT;
136 return i;
137 }
138 *tokenType = TK_MINUS;
139 return 1;
140 }
141 case '(': {
142 *tokenType = TK_LP;
143 return 1;
144 }
145 case ')': {
146 *tokenType = TK_RP;
147 return 1;
148 }
149 case ';': {
150 *tokenType = TK_SEMI;
151 return 1;
152 }
153 case '+': {
154 *tokenType = TK_PLUS;
155 return 1;
156 }
157 case '*': {
158 *tokenType = TK_STAR;
159 return 1;
160 }
161 case '/': {
162 if( z[1]!='*' || z[2]==0 ){
163 *tokenType = TK_SLASH;
164 return 1;
165 }
166 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
167 if( c ) i++;
168 *tokenType = TK_COMMENT;
169 return i;
170 }
171 case '%': {
172 *tokenType = TK_REM;
173 return 1;
174 }
175 case '=': {
176 *tokenType = TK_EQ;
177 return 1 + (z[1]=='=');
178 }
179 case '<': {
180 if( (c=z[1])=='=' ){
181 *tokenType = TK_LE;
182 return 2;
183 }else if( c=='>' ){
184 *tokenType = TK_NE;
185 return 2;
186 }else if( c=='<' ){
187 *tokenType = TK_LSHIFT;
188 return 2;
189 }else{
190 *tokenType = TK_LT;
191 return 1;
192 }
193 }
194 case '>': {
195 if( (c=z[1])=='=' ){
196 *tokenType = TK_GE;
197 return 2;
198 }else if( c=='>' ){
199 *tokenType = TK_RSHIFT;
200 return 2;
201 }else{
202 *tokenType = TK_GT;
203 return 1;
204 }
205 }
206 case '!': {
207 if( z[1]!='=' ){
208 *tokenType = TK_ILLEGAL;
209 return 2;
210 }else{
211 *tokenType = TK_NE;
212 return 2;
213 }
214 }
215 case '|': {
216 if( z[1]!='|' ){
217 *tokenType = TK_BITOR;
218 return 1;
219 }else{
220 *tokenType = TK_CONCAT;
221 return 2;
222 }
223 }
224 case ',': {
225 *tokenType = TK_COMMA;
226 return 1;
227 }
228 case '&': {
229 *tokenType = TK_BITAND;
230 return 1;
231 }
232 case '~': {
233 *tokenType = TK_BITNOT;
234 return 1;
235 }
236 case '`':
237 case '\'':
238 case '"': {
239 int delim = z[0];
240 for(i=1; (c=z[i])!=0; i++){
241 if( c==delim ){
242 if( z[i+1]==delim ){
243 i++;
244 }else{
245 break;
246 }
247 }
248 }
249 if( c ){
250 *tokenType = TK_STRING;
251 return i+1;
252 }else{
253 *tokenType = TK_ILLEGAL;
254 return i;
255 }
256 }
257 case '.': {
258 #ifndef SQLITE_OMIT_FLOATING_POINT
259 if( !isdigit(z[1]) )
260 #endif
261 {
262 *tokenType = TK_DOT;
263 return 1;
264 }
265 /* If the next character is a digit, this is a floating point
266 ** number that begins with ".". Fall thru into the next case */
267 }
268 case '0': case '1': case '2': case '3': case '4':
269 case '5': case '6': case '7': case '8': case '9': {
270 *tokenType = TK_INTEGER;
271 for(i=0; isdigit(z[i]); i++){}
272 #ifndef SQLITE_OMIT_FLOATING_POINT
273 if( z[i]=='.' ){
274 i++;
275 while( isdigit(z[i]) ){ i++; }
276 *tokenType = TK_FLOAT;
277 }
278 if( (z[i]=='e' || z[i]=='E') &&
279 ( isdigit(z[i+1])
280 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
281 )
282 ){
283 i += 2;
284 while( isdigit(z[i]) ){ i++; }
285 *tokenType = TK_FLOAT;
286 }
287 #endif
288 return i;
289 }
290 case '[': {
291 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
292 *tokenType = TK_ID;
293 return i;
294 }
295 case '?': {
296 *tokenType = TK_VARIABLE;
297 for(i=1; isdigit(z[i]); i++){}
298 return i;
299 }
300 case '#': {
301 for(i=1; isdigit(z[i]); i++){}
302 if( i>1 ){
303 /* Parameters of the form #NNN (where NNN is a number) are used
304 ** internally by sqlite3NestedParse. */
305 *tokenType = TK_REGISTER;
306 return i;
307 }
308 /* Fall through into the next case if the '#' is not followed by
309 ** a digit. Try to match #AAAA where AAAA is a parameter name. */
310 }
311 #ifndef SQLITE_OMIT_TCL_VARIABLE
312 case '$':
313 #endif
314 case '@': /* For compatibility with MS SQL Server */
315 case ':': {
316 int n = 0;
317 *tokenType = TK_VARIABLE;
318 for(i=1; (c=z[i])!=0; i++){
319 if( IdChar(c) ){
320 n++;
321 #ifndef SQLITE_OMIT_TCL_VARIABLE
322 }else if( c=='(' && n>0 ){
323 do{
324 i++;
325 }while( (c=z[i])!=0 && !isspace(c) && c!=')' );
326 if( c==')' ){
327 i++;
328 }else{
329 *tokenType = TK_ILLEGAL;
330 }
331 break;
332 }else if( c==':' && z[i+1]==':' ){
333 i++;
334 #endif
335 }else{
336 break;
337 }
338 }
339 if( n==0 ) *tokenType = TK_ILLEGAL;
340 return i;
341 }
342 #ifndef SQLITE_OMIT_BLOB_LITERAL
343 case 'x': case 'X': {
344 if( (c=z[1])=='\'' || c=='"' ){
345 int delim = c;
346 *tokenType = TK_BLOB;
347 for(i=2; (c=z[i])!=0; i++){
348 if( c==delim ){
349 if( i%2 ) *tokenType = TK_ILLEGAL;
350 break;
351 }
352 if( !isxdigit(c) ){
353 *tokenType = TK_ILLEGAL;
354 return i;
355 }
356 }
357 if( c ) i++;
358 return i;
359 }
360 /* Otherwise fall through to the next case */
361 }
362 #endif
363 default: {
364 if( !IdChar(*z) ){
365 break;
366 }
367 for(i=1; IdChar(z[i]); i++){}
368 *tokenType = keywordCode((char*)z, i);
369 return i;
370 }
371 }
372 *tokenType = TK_ILLEGAL;
373 return 1;
374 }
375 int sqlite3GetToken(const unsigned char *z, int *tokenType){
376 return getToken(z, tokenType);
377 }
378
379 /*
380 ** Run the parser on the given SQL string. The parser structure is
381 ** passed in. An SQLITE_ status code is returned. If an error occurs
382 ** and pzErrMsg!=NULL then an error message might be written into
383 ** memory obtained from malloc() and *pzErrMsg made to point to that
384 ** error message. Or maybe not.
385 */
386 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
387 int nErr = 0;
388 int i;
389 void *pEngine;
390 int tokenType;
391 int lastTokenParsed = -1;
392 sqlite3 *db = pParse->db;
393 extern void *sqlite3ParserAlloc(void*(*)(int));
394 extern void sqlite3ParserFree(void*, void(*)(void*));
395 extern int sqlite3Parser(void*, int, Token, Parse*);
396
397 db->flags &= ~SQLITE_Interrupt;
398 pParse->rc = SQLITE_OK;
399 i = 0;
400 pEngine = sqlite3ParserAlloc((void*(*)(int))sqlite3MallocX);
401 if( pEngine==0 ){
402 return SQLITE_NOMEM;
403 }
404 assert( pParse->sLastToken.dyn==0 );
405 assert( pParse->pNewTable==0 );
406 assert( pParse->pNewTrigger==0 );
407 assert( pParse->nVar==0 );
408 assert( pParse->nVarExpr==0 );
409 assert( pParse->nVarExprAlloc==0 );
410 assert( pParse->apVarExpr==0 );
411 pParse->zTail = pParse->zSql = zSql;
412 while( !sqlite3MallocFailed() && zSql[i]!=0 ){
413 assert( i>=0 );
414 pParse->sLastToken.z = (u8*)&zSql[i];
415 assert( pParse->sLastToken.dyn==0 );
416 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType);
417 i += pParse->sLastToken.n;
418 switch( tokenType ){
419 case TK_SPACE:
420 case TK_COMMENT: {
421 if( (db->flags & SQLITE_Interrupt)!=0 ){
422 pParse->rc = SQLITE_INTERRUPT;
423 sqlite3SetString(pzErrMsg, "interrupt", (char*)0);
424 goto abort_parse;
425 }
426 break;
427 }
428 case TK_ILLEGAL: {
429 if( pzErrMsg ){
430 sqliteFree(*pzErrMsg);
431 *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"",
432 &pParse->sLastToken);
433 }
434 nErr++;
435 goto abort_parse;
436 }
437 case TK_SEMI: {
438 pParse->zTail = &zSql[i];
439 /* Fall thru into the default case */
440 }
441 default: {
442 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
443 lastTokenParsed = tokenType;
444 if( pParse->rc!=SQLITE_OK ){
445 goto abort_parse;
446 }
447 break;
448 }
449 }
450 }
451 abort_parse:
452 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
453 if( lastTokenParsed!=TK_SEMI ){
454 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
455 pParse->zTail = &zSql[i];
456 }
457 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
458 }
459 sqlite3ParserFree(pEngine, sqlite3FreeX);
460 if( sqlite3MallocFailed() ){
461 pParse->rc = SQLITE_NOMEM;
462 }
463 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
464 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0);
465 }
466 if( pParse->zErrMsg ){
467 if( pzErrMsg && *pzErrMsg==0 ){
468 *pzErrMsg = pParse->zErrMsg;
469 }else{
470 sqliteFree(pParse->zErrMsg);
471 }
472 pParse->zErrMsg = 0;
473 if( !nErr ) nErr++;
474 }
475 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
476 sqlite3VdbeDelete(pParse->pVdbe);
477 pParse->pVdbe = 0;
478 }
479 #ifndef SQLITE_OMIT_SHARED_CACHE
480 if( pParse->nested==0 ){
481 sqliteFree(pParse->aTableLock);
482 pParse->aTableLock = 0;
483 pParse->nTableLock = 0;
484 }
485 #endif
486 sqlite3DeleteTable(pParse->db, pParse->pNewTable);
487 sqlite3DeleteTrigger(pParse->pNewTrigger);
488 sqliteFree(pParse->apVarExpr);
489 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
490 pParse->rc = SQLITE_ERROR;
491 }
492 return nErr;
493 }