1 {*********************************************************}
3 { Zeos Database Objects }
4 { String tokenizing classes for PostgreSQL }
6 { Originally written by Sergey Seroukhov }
8 {*********************************************************}
10 {@********************************************************}
11 { Copyright (c) 1999-2012 Zeos Development Group }
13 { License Agreement: }
15 { This library is distributed in the hope that it will be }
16 { useful, but WITHOUT ANY WARRANTY; without even the }
17 { implied warranty of MERCHANTABILITY or FITNESS FOR }
18 { A PARTICULAR PURPOSE. See the GNU Lesser General }
19 { Public License for more details. }
21 { The source code of the ZEOS Libraries and packages are }
22 { distributed under the Library GNU General Public }
23 { License (see the file COPYING / COPYING.ZEOS) }
24 { with the following modification: }
25 { As a special exception, the copyright holders of this }
26 { library give you permission to link this library with }
27 { independent modules to produce an executable, }
28 { regardless of the license terms of these independent }
29 { modules, and to copy and distribute the resulting }
30 { executable under terms of your choice, provided that }
31 { you also meet, for each linked independent module, }
32 { the terms and conditions of the license of that module. }
33 { An independent module is a module which is not derived }
34 { from or based on this library. If you modify this }
35 { library, you may extend this exception to your version }
36 { of the library, but you are not obligated to do so. }
37 { If you do not wish to do so, delete this exception }
38 { statement from your version. }
41 { The project web site is located on: }
42 { http://zeos.firmos.at (FORUM) }
43 { http://sourceforge.net/p/zeoslib/tickets/ (BUGTRACKER)}
44 { svn://svn.code.sf.net/p/zeoslib/code-0/trunk (SVN) }
46 { http://www.sourceforge.net/projects/zeoslib. }
49 { Zeos Development Group. }
50 {********************************************************@}
52 unit ZPostgreSqlToken;
59 Classes, {$IFDEF MSEgui}mclasses,{$ENDIF} SysUtils,
60 ZTokenizer, ZGenericSqlToken, ZMySqlToken;
64 {** Implements a PostgreSQL-specific number state object. }
65 TZPostgreSQLNumberState = class (TZNumberState)
67 function NextToken(Stream: TStream; FirstChar: Char;
68 Tokenizer: TZTokenizer): TZToken; override;
71 {** Implements a PostgreSQL-specific quote string state object. }
72 TZPostgreSQLQuoteState = class (TZMySQLQuoteState)
74 FStandardConformingStrings: Boolean;
76 function GetModifier(Stream: TStream; FirstChar: Char; ResetPosition: Boolean = True): string;
77 function GetDollarQuotedString(Stream: TStream; QuoteChar: Char): string;
78 function GetQuotedString(Stream: TStream; QuoteChar: Char; EscapeSyntax: Boolean): String;
79 function GetQuotedStringWithModifier(Stream: TStream; FirstChar: Char): string;
81 function NextToken(Stream: TStream; FirstChar: Char;
82 Tokenizer: TZTokenizer): TZToken; override;
83 procedure SetStandardConformingStrings(const Value: Boolean);
87 This state will either delegate to a comment-handling
88 state, or return a token with just a slash in it.
90 TZPostgreSQLCommentState = class (TZCppCommentState)
92 function GetMultiLineComment(Stream: TStream): string; override;
94 function NextToken(Stream: TStream; FirstChar: Char;
95 Tokenizer: TZTokenizer): TZToken; override;
98 {** Implements a symbol state object. }
99 TZPostgreSQLSymbolState = class (TZSymbolState)
104 {** Implements a word state object. }
105 TZPostgreSQLWordState = class (TZGenericSQLWordState)
110 IZPostgreSQLTokenizer = interface (IZTokenizer)
111 ['{82392175-9065-4048-9974-EE1253B921B4}']
112 procedure SetStandardConformingStrings(const Value: Boolean);
115 {** Implements a default tokenizer object. }
116 TZPostgreSQLTokenizer = class (TZTokenizer, IZPostgreSQLTokenizer)
118 function CheckEscapeState(const ActualState: TZTokenizerState; Stream: TStream;
119 const FirstChar: Char): TZTokenizerState; override;
121 procedure SetStandardConformingStrings(const Value: Boolean);
130 NameQuoteChar = Char('"');
131 DollarQuoteChar = Char('$');
132 SingleQuoteChar = Char('''');
134 { TZPostgreSQLNumberState }
137 Return a number token from a reader.
138 @return a number token from a reader
140 function TZPostgreSQLNumberState.NextToken(Stream: TStream; FirstChar: Char;
141 Tokenizer: TZTokenizer): TZToken;
147 function ReadDecDigits: string;
151 while Stream.Read(LastChar, SizeOf(Char)) > 0 do
153 if CharInSet(LastChar, ['0'..'9']) then
155 Result := Result + LastChar;
160 Stream.Seek(-SizeOf(Char), soFromCurrent);
167 FloatPoint := FirstChar = '.';
168 Result.Value := FirstChar;
169 Result.TokenType := ttUnknown;
172 { Reads the first part of the number before decimal point }
173 if not FloatPoint then
175 Result.Value := Result.Value + ReadDecDigits;
176 FloatPoint := LastChar = '.';
179 Stream.Read(TempChar, SizeOf(Char));
180 Result.Value := Result.Value + TempChar;
184 { Reads the second part of the number after decimal point }
186 Result.Value := Result.Value + ReadDecDigits;
188 { Reads a power part of the number }
189 if CharInSet(LastChar, ['e','E']) then
191 Stream.Read(TempChar, SizeOf(Char));
192 Result.Value := Result.Value + TempChar;
195 Stream.Read(TempChar, SizeOf(Char));
196 if CharInSet(TempChar, ['0'..'9','-','+']) then
197 Result.Value := Result.Value + TempChar + ReadDecDigits
200 Result.Value := Copy(Result.Value, 1, Length(Result.Value) - 1);
201 Stream.Seek(-2*SizeOf(Char), soFromCurrent);
205 { Prepare the result }
206 if Result.Value = '.' then
208 if Tokenizer.SymbolState <> nil then
209 Result := Tokenizer.SymbolState.NextToken(Stream, FirstChar, Tokenizer);
214 Result.TokenType := ttFloat
215 else Result.TokenType := ttInteger;
219 { TZPostgreSQLQuoteState }
222 Retrieves string modifier from quoted string.
223 @return a string with modifier for valid quoted string with modifier
224 or empty string otherwise.
226 function TZPostgreSQLQuoteState.GetModifier(Stream: TStream;
227 FirstChar: Char; ResetPosition: boolean = True): string;
234 if CharInSet(FirstChar, ['E', 'e', 'B', 'b', 'X', 'x', 'U', 'u']) then
236 Modifier := FirstChar;
237 ReadNum := Stream.Read(ReadChar, SizeOf(Char));
238 if ReadNum = SizeOf(Char) then
240 if (UpperCase(FirstChar) = 'U') and (ReadChar = '&') then // Check for U& modifier
242 Modifier := Modifier + ReadChar;
243 ReadNum := ReadNum + Stream.Read(ReadChar, SizeOf(Char));
246 if (ReadChar = SingleQuoteChar) then
249 if ResetPosition then
250 Stream.Seek(-ReadNum, soFromCurrent);
256 Returns a quoted string token from a reader. This method
257 will get Tag from first char to QuoteChar and will collect
258 characters until reaches same Tag.
260 @return a quoted string token from a reader
262 function TZPostgreSQLQuoteState.GetDollarQuotedString(Stream: TStream; QuoteChar: Char): string;
265 Tag, TempTag: string;
270 while Stream.Read(ReadChar, SizeOf(Char)) > 0 do
272 if (ReadChar = QuoteChar) then
274 if (TagState = 0) then
279 else if (TagState = 1) then
284 else if (TagState = 2) then
286 if TempTag = Tag then
293 Result := Result + ReadChar;
296 TempTag := TempTag + ReadChar
297 else if TagState = 3 then
303 Returns a quoted string token from a reader. This method
304 will collect characters until it sees same QuoteChar,
305 ommitting doubled chars
307 @return a quoted string token from a reader
309 function TZPostgreSQLQuoteState.GetQuotedString(Stream: TStream; QuoteChar: Char;
310 EscapeSyntax: Boolean): String;
311 const BackSlash = Char('\');
321 while Stream.Read(ReadChar, SizeOf(Char)) > 0 do
323 if ReadChar = QuoteChar then
326 if (LastChar = QuoteChar) and (ReadChar <> QuoteChar) then
328 if QuoteCount mod 2 = 0 then
330 Stream.Seek(-SizeOf(Char), soFromCurrent);
334 Result := Result + ReadChar;
335 if (LastChar = BackSlash) and EscapeSyntax then
337 else if (LastChar = QuoteChar) and (ReadChar = QuoteChar) then
339 else LastChar := ReadChar;
344 Returns a quoted string token with leading modifier from a reader.
346 @return a quoted string token from a reader
348 function TZPostgreSQLQuoteState.GetQuotedStringWithModifier(Stream: TStream;
349 FirstChar: Char): string;
352 EscapeSyntax: Boolean;
354 Modifier := GetModifier(Stream, FirstChar, False);
355 if (Modifier <> '') then
356 FirstChar := SingleQuoteChar;
357 EscapeSyntax := (not FStandardConformingStrings and (Modifier = '')) or
358 (UpperCase(Modifier) = 'E');
359 Result := Modifier + GetQuotedString(Stream, FirstChar, EscapeSyntax);
363 Return a quoted string token from a reader. This method
364 will collect characters until it sees a match to the
365 character that the tokenizer used to switch to this state.
367 @return a quoted string token from a reader
369 function TZPostgreSQLQuoteState.NextToken(Stream: TStream;
370 FirstChar: Char; Tokenizer: TZTokenizer): TZToken;
372 Result.Value := FirstChar;
373 if FirstChar = NameQuoteChar then
375 Result.TokenType := ttWord;
376 Result.Value := GetQuotedString(Stream, FirstChar, False);
378 else if FirstChar = DollarQuoteChar then
380 Result.TokenType := ttQuoted;
381 Result.Value := GetDollarQuotedString(Stream, FirstChar);
385 Result.TokenType := ttQuoted;
386 Result.Value := GetQuotedStringWithModifier(Stream, FirstChar);
391 Sets how backslashes in quoted strings are handled
392 @param True means backslashes are escape characters
394 procedure TZPostgreSQLQuoteState.SetStandardConformingStrings(const Value:
397 FStandardConformingStrings := Value;
400 { TZPostgreSQLCommentState }
403 Ignore everything up to a last closing star and slash, and
404 then return the tokenizer's next token.
405 @return the tokenizer's next token
407 function TZPostgreSQLCommentState.GetMultiLineComment(Stream: TStream): string;
409 ReadChar, LastChar: Char;
410 NestedLevel: Integer;
415 while Stream.Read(ReadChar, 1 * SizeOf(Char)) > 0 do
417 Result := Result + ReadChar;
418 if (LastChar = '*') and (ReadChar = '/') then
421 if NestedLevel = 0 then
424 if (LastChar = '/') and (ReadChar = '*') then
426 LastChar := ReadChar;
431 Gets a PostgreSQL specific comments like -- or /* */.
432 @return either just a slash token, or the results of
433 delegating to a comment-handling state
435 function TZPostgreSQLCommentState.NextToken(Stream: TStream;
436 FirstChar: Char; Tokenizer: TZTokenizer): TZToken;
441 Result.TokenType := ttUnknown;
442 Result.Value := FirstChar;
444 if FirstChar = '-' then
446 ReadNum := Stream.Read(ReadChar, SizeOf(Char));
447 if (ReadNum > 0) and (ReadChar = '-') then
449 Result.TokenType := ttComment;
450 Result.Value := '--' + GetSingleLineComment(Stream);
455 Stream.Seek(-SizeOf(Char), soFromCurrent);
458 else if FirstChar = '/' then
460 ReadNum := Stream.Read(ReadChar, SizeOf(Char));
461 if (ReadNum > 0) and (ReadChar = '*') then
463 Result.TokenType := ttComment;
464 Result.Value := '/*' + GetMultiLineComment(Stream);
469 Stream.Seek(-SizeOf(Char), soFromCurrent);
473 if (Result.TokenType = ttUnknown) and (Tokenizer.SymbolState <> nil) then
474 Result := Tokenizer.SymbolState.NextToken(Stream, FirstChar, Tokenizer);
477 { TZPostgreSQLSymbolState }
480 Creates this PostgreSQL-specific symbol state object.
482 constructor TZPostgreSQLSymbolState.Create;
495 { TZPostgreSQLWordState }
498 Constructs this PostgreSQL-specific word state object.
500 constructor TZPostgreSQLWordState.Create;
502 SetWordChars(#0, #191, False);
503 SetWordChars(#192, high(char), True);
504 SetWordChars('a', 'z', True);
505 SetWordChars('A', 'Z', True);
506 SetWordChars('0', '9', True);
507 SetWordChars('_', '_', True);
508 SetWordChars('$', '$', True);
512 informs the Postgre Tokenizer '\' should be handled as Escape-char
513 @param True means backslashes are quoted strings
515 procedure TZPostgreSQLTokenizer.SetStandardConformingStrings(
516 const Value: Boolean);
518 (QuoteState as TZPostgreSQLQuoteState).SetStandardConformingStrings(Value);
522 Constructs a tokenizer with a default state table (as
523 described in the class comment).
525 constructor TZPostgreSQLTokenizer.Create;
527 EscapeState := TZEscapeState.Create;
528 WhitespaceState := TZWhitespaceState.Create;
530 SymbolState := TZPostgreSQLSymbolState.Create;
531 NumberState := TZPostgreSQLNumberState.Create;
532 QuoteState := TZPostgreSQLQuoteState.Create;
533 WordState := TZPostgreSQLWordState.Create;
534 CommentState := TZPostgreSQLCommentState.Create;
536 SetCharacterState(#0, #32, WhitespaceState);
537 SetCharacterState(#33, #191, SymbolState);
538 SetCharacterState(#192, High(Char), WordState);
540 SetCharacterState('a', 'z', WordState);
541 SetCharacterState('A', 'Z', WordState);
542 SetCharacterState('_', '_', WordState);
544 SetCharacterState('0', '9', NumberState);
545 SetCharacterState('.', '.', NumberState);
547 SetCharacterState(NameQuoteChar, NameQuoteChar, QuoteState);
548 SetCharacterState(SingleQuoteChar, SingleQuoteChar, QuoteState);
549 SetCharacterState(DollarQuoteChar, DollarQuoteChar, QuoteState);
551 SetCharacterState('/', '/', CommentState);
552 SetCharacterState('-', '-', CommentState);
556 Checks if WordState is QuoteState with modifier and sets QuoteState.
557 @param Stream the Read-Stream which has to checked for Next-Chars.
558 @FirstChar The FirstChar which was readed and sets the Symbolstate
559 @returns either the given SymbolState or the QuoteState
561 function TZPostgreSQLTokenizer.CheckEscapeState(const ActualState:
562 TZTokenizerState; Stream: TStream; const FirstChar: Char): TZTokenizerState;
566 Result := inherited CheckEscapeState(ActualState, Stream, FirstChar);
567 if (Result is TZWordState) then
569 Modifier := (QuoteState as TZPostgreSQLQuoteState).GetModifier(Stream, FirstChar);
570 if (Modifier <> '') then
571 Result := QuoteState;