zeoslib  UNKNOWN
 All Files
ZPostgreSqlToken.pas
Go to the documentation of this file.
1 {*********************************************************}
2 { }
3 { Zeos Database Objects }
4 { String tokenizing classes for PostgreSQL }
5 { }
6 { Originally written by Sergey Seroukhov }
7 { }
8 {*********************************************************}
9 
10 {@********************************************************}
11 { Copyright (c) 1999-2012 Zeos Development Group }
12 { }
13 { License Agreement: }
14 { }
15 { This library is distributed in the hope that it will be }
16 { useful, but WITHOUT ANY WARRANTY; without even the }
17 { implied warranty of MERCHANTABILITY or FITNESS FOR }
18 { A PARTICULAR PURPOSE. See the GNU Lesser General }
19 { Public License for more details. }
20 { }
21 { The source code of the ZEOS Libraries and packages are }
22 { distributed under the Library GNU General Public }
23 { License (see the file COPYING / COPYING.ZEOS) }
24 { with the following modification: }
25 { As a special exception, the copyright holders of this }
26 { library give you permission to link this library with }
27 { independent modules to produce an executable, }
28 { regardless of the license terms of these independent }
29 { modules, and to copy and distribute the resulting }
30 { executable under terms of your choice, provided that }
31 { you also meet, for each linked independent module, }
32 { the terms and conditions of the license of that module. }
33 { An independent module is a module which is not derived }
34 { from or based on this library. If you modify this }
35 { library, you may extend this exception to your version }
36 { of the library, but you are not obligated to do so. }
37 { If you do not wish to do so, delete this exception }
38 { statement from your version. }
39 { }
40 { }
41 { The project web site is located on: }
42 { http://zeos.firmos.at (FORUM) }
43 { http://sourceforge.net/p/zeoslib/tickets/ (BUGTRACKER)}
44 { svn://svn.code.sf.net/p/zeoslib/code-0/trunk (SVN) }
45 { }
46 { http://www.sourceforge.net/projects/zeoslib. }
47 { }
48 { }
49 { Zeos Development Group. }
50 {********************************************************@}
51 
52 unit ZPostgreSqlToken;
53 
54 interface
55 
56 {$I ZParseSql.inc}
57 
58 uses
59  Classes, {$IFDEF MSEgui}mclasses,{$ENDIF} SysUtils,
60  ZTokenizer, ZGenericSqlToken, ZMySqlToken;
61 
62 type
63 
64  {** Implements a PostgreSQL-specific number state object. }
65  TZPostgreSQLNumberState = class (TZNumberState)
66  public
67  function NextToken(Stream: TStream; FirstChar: Char;
68  Tokenizer: TZTokenizer): TZToken; override;
69  end;
70 
71  {** Implements a PostgreSQL-specific quote string state object. }
72  TZPostgreSQLQuoteState = class (TZMySQLQuoteState)
73  private
74  FStandardConformingStrings: Boolean;
75  protected
76  function GetModifier(Stream: TStream; FirstChar: Char; ResetPosition: Boolean = True): string;
77  function GetDollarQuotedString(Stream: TStream; QuoteChar: Char): string;
78  function GetQuotedString(Stream: TStream; QuoteChar: Char; EscapeSyntax: Boolean): String;
79  function GetQuotedStringWithModifier(Stream: TStream; FirstChar: Char): string;
80  public
81  function NextToken(Stream: TStream; FirstChar: Char;
82  Tokenizer: TZTokenizer): TZToken; override;
83  procedure SetStandardConformingStrings(const Value: Boolean);
84  end;
85 
86  {**
87  This state will either delegate to a comment-handling
88  state, or return a token with just a slash in it.
89  }
90  TZPostgreSQLCommentState = class (TZCppCommentState)
91  protected
92  function GetMultiLineComment(Stream: TStream): string; override;
93  public
94  function NextToken(Stream: TStream; FirstChar: Char;
95  Tokenizer: TZTokenizer): TZToken; override;
96  end;
97 
98  {** Implements a symbol state object. }
99  TZPostgreSQLSymbolState = class (TZSymbolState)
100  public
101  constructor Create;
102  end;
103 
104  {** Implements a word state object. }
105  TZPostgreSQLWordState = class (TZGenericSQLWordState)
106  public
107  constructor Create;
108  end;
109 
110  IZPostgreSQLTokenizer = interface (IZTokenizer)
111  ['{82392175-9065-4048-9974-EE1253B921B4}']
112  procedure SetStandardConformingStrings(const Value: Boolean);
113  end;
114 
115  {** Implements a default tokenizer object. }
116  TZPostgreSQLTokenizer = class (TZTokenizer, IZPostgreSQLTokenizer)
117  protected
118  function CheckEscapeState(const ActualState: TZTokenizerState; Stream: TStream;
119  const FirstChar: Char): TZTokenizerState; override;
120  public
121  procedure SetStandardConformingStrings(const Value: Boolean);
122  constructor Create;
123  end;
124 
125 implementation
126 
127 uses ZCompatibility;
128 
129 const
130  NameQuoteChar = Char('"');
131  DollarQuoteChar = Char('$');
132  SingleQuoteChar = Char('''');
133 
134 { TZPostgreSQLNumberState }
135 
136 {**
137  Return a number token from a reader.
138  @return a number token from a reader
139 }
140 function TZPostgreSQLNumberState.NextToken(Stream: TStream; FirstChar: Char;
141  Tokenizer: TZTokenizer): TZToken;
142 var
143  TempChar: Char;
144  FloatPoint: Boolean;
145  LastChar: Char;
146 
147  function ReadDecDigits: string;
148  begin
149  Result := '';
150  LastChar := #0;
151  while Stream.Read(LastChar, SizeOf(Char)) > 0 do
152  begin
153  if CharInSet(LastChar, ['0'..'9']) then
154  begin
155  Result := Result + LastChar;
156  LastChar := #0;
157  end
158  else
159  begin
160  Stream.Seek(-SizeOf(Char), soFromCurrent);
161  Break;
162  end;
163  end;
164  end;
165 
166 begin
167  FloatPoint := FirstChar = '.';
168  Result.Value := FirstChar;
169  Result.TokenType := ttUnknown;
170  LastChar := #0;
171 
172  { Reads the first part of the number before decimal point }
173  if not FloatPoint then
174  begin
175  Result.Value := Result.Value + ReadDecDigits;
176  FloatPoint := LastChar = '.';
177  if FloatPoint then
178  begin
179  Stream.Read(TempChar, SizeOf(Char));
180  Result.Value := Result.Value + TempChar;
181  end;
182  end;
183 
184  { Reads the second part of the number after decimal point }
185  if FloatPoint then
186  Result.Value := Result.Value + ReadDecDigits;
187 
188  { Reads a power part of the number }
189  if CharInSet(LastChar, ['e','E']) then
190  begin
191  Stream.Read(TempChar, SizeOf(Char));
192  Result.Value := Result.Value + TempChar;
193  FloatPoint := True;
194 
195  Stream.Read(TempChar, SizeOf(Char));
196  if CharInSet(TempChar, ['0'..'9','-','+']) then
197  Result.Value := Result.Value + TempChar + ReadDecDigits
198  else
199  begin
200  Result.Value := Copy(Result.Value, 1, Length(Result.Value) - 1);
201  Stream.Seek(-2*SizeOf(Char), soFromCurrent);
202  end;
203  end;
204 
205  { Prepare the result }
206  if Result.Value = '.' then
207  begin
208  if Tokenizer.SymbolState <> nil then
209  Result := Tokenizer.SymbolState.NextToken(Stream, FirstChar, Tokenizer);
210  end
211  else
212  begin
213  if FloatPoint then
214  Result.TokenType := ttFloat
215  else Result.TokenType := ttInteger;
216  end;
217 end;
218 
219 { TZPostgreSQLQuoteState }
220 
221 {**
222  Retrieves string modifier from quoted string.
223  @return a string with modifier for valid quoted string with modifier
224  or empty string otherwise.
225 }
226 function TZPostgreSQLQuoteState.GetModifier(Stream: TStream;
227  FirstChar: Char; ResetPosition: boolean = True): string;
228 var
229  ReadChar: Char;
230  Modifier: string;
231  ReadNum: Integer;
232 begin
233  Result := '';
234  if CharInSet(FirstChar, ['E', 'e', 'B', 'b', 'X', 'x', 'U', 'u']) then
235  begin
236  Modifier := FirstChar;
237  ReadNum := Stream.Read(ReadChar, SizeOf(Char));
238  if ReadNum = SizeOf(Char) then
239  begin
240  if (UpperCase(FirstChar) = 'U') and (ReadChar = '&') then // Check for U& modifier
241  begin
242  Modifier := Modifier + ReadChar;
243  ReadNum := ReadNum + Stream.Read(ReadChar, SizeOf(Char));
244  end;
245 
246  if (ReadChar = SingleQuoteChar) then
247  Result := Modifier;
248 
249  if ResetPosition then
250  Stream.Seek(-ReadNum, soFromCurrent);
251  end;
252  end;
253 end;
254 
255 {**
256  Returns a quoted string token from a reader. This method
257  will get Tag from first char to QuoteChar and will collect
258  characters until reaches same Tag.
259 
260  @return a quoted string token from a reader
261 }
262 function TZPostgreSQLQuoteState.GetDollarQuotedString(Stream: TStream; QuoteChar: Char): string;
263 var
264  ReadChar: Char;
265  Tag, TempTag: string;
266  TagState: integer;
267 begin
268  Result := QuoteChar;
269  TagState := 0;
270  while Stream.Read(ReadChar, SizeOf(Char)) > 0 do
271  begin
272  if (ReadChar = QuoteChar) then
273  begin
274  if (TagState = 0) then
275  begin
276  TagState := 1;
277  Tag := Result;
278  end
279  else if (TagState = 1) then
280  begin
281  TagState := 2;
282  TempTag := '';
283  end
284  else if (TagState = 2) then
285  begin
286  if TempTag = Tag then
287  TagState := 3
288  else
289  TempTag := '';
290  end;
291  end;
292 
293  Result := Result + ReadChar;
294 
295  if TagState = 2 then
296  TempTag := TempTag + ReadChar
297  else if TagState = 3 then
298  Break;
299  end;
300 end;
301 
302 {**
303  Returns a quoted string token from a reader. This method
304  will collect characters until it sees same QuoteChar,
305  ommitting doubled chars
306 
307  @return a quoted string token from a reader
308 }
309 function TZPostgreSQLQuoteState.GetQuotedString(Stream: TStream; QuoteChar: Char;
310  EscapeSyntax: Boolean): String;
311 const BackSlash = Char('\');
312 var
313  ReadChar: Char;
314  LastChar: Char;
315  QuoteCount: Integer;
316 begin
317  LastChar := #0;
318  Result := QuoteChar;
319  QuoteCount := 1;
320 
321  while Stream.Read(ReadChar, SizeOf(Char)) > 0 do
322  begin
323  if ReadChar = QuoteChar then
324  Inc(QuoteCount);
325 
326  if (LastChar = QuoteChar) and (ReadChar <> QuoteChar) then
327  begin
328  if QuoteCount mod 2 = 0 then
329  begin
330  Stream.Seek(-SizeOf(Char), soFromCurrent);
331  Break;
332  end;
333  end;
334  Result := Result + ReadChar;
335  if (LastChar = BackSlash) and EscapeSyntax then
336  LastChar := #0
337  else if (LastChar = QuoteChar) and (ReadChar = QuoteChar) then
338  LastChar := #0
339  else LastChar := ReadChar;
340  end;
341 end;
342 
343 {**
344  Returns a quoted string token with leading modifier from a reader.
345 
346  @return a quoted string token from a reader
347 }
348 function TZPostgreSQLQuoteState.GetQuotedStringWithModifier(Stream: TStream;
349  FirstChar: Char): string;
350 var
351  Modifier: string;
352  EscapeSyntax: Boolean;
353 begin
354  Modifier := GetModifier(Stream, FirstChar, False);
355  if (Modifier <> '') then
356  FirstChar := SingleQuoteChar;
357  EscapeSyntax := (not FStandardConformingStrings and (Modifier = '')) or
358  (UpperCase(Modifier) = 'E');
359  Result := Modifier + GetQuotedString(Stream, FirstChar, EscapeSyntax);
360 end;
361 
362 {**
363  Return a quoted string token from a reader. This method
364  will collect characters until it sees a match to the
365  character that the tokenizer used to switch to this state.
366 
367  @return a quoted string token from a reader
368 }
369 function TZPostgreSQLQuoteState.NextToken(Stream: TStream;
370  FirstChar: Char; Tokenizer: TZTokenizer): TZToken;
371 begin
372  Result.Value := FirstChar;
373  if FirstChar = NameQuoteChar then
374  begin
375  Result.TokenType := ttWord;
376  Result.Value := GetQuotedString(Stream, FirstChar, False);
377  end
378  else if FirstChar = DollarQuoteChar then
379  begin
380  Result.TokenType := ttQuoted;
381  Result.Value := GetDollarQuotedString(Stream, FirstChar);
382  end
383  else
384  begin
385  Result.TokenType := ttQuoted;
386  Result.Value := GetQuotedStringWithModifier(Stream, FirstChar);
387  end;
388 end;
389 
390 {**
391  Sets how backslashes in quoted strings are handled
392  @param True means backslashes are escape characters
393 }
394 procedure TZPostgreSQLQuoteState.SetStandardConformingStrings(const Value:
395  Boolean);
396 begin
397  FStandardConformingStrings := Value;
398 end;
399 
400 { TZPostgreSQLCommentState }
401 
402 {**
403  Ignore everything up to a last closing star and slash, and
404  then return the tokenizer's next token.
405  @return the tokenizer's next token
406 }
407 function TZPostgreSQLCommentState.GetMultiLineComment(Stream: TStream): string;
408 var
409  ReadChar, LastChar: Char;
410  NestedLevel: Integer;
411 begin
412  LastChar := #0;
413  NestedLevel := 1;
414  Result := '';
415  while Stream.Read(ReadChar, 1 * SizeOf(Char)) > 0 do
416  begin
417  Result := Result + ReadChar;
418  if (LastChar = '*') and (ReadChar = '/') then
419  begin
420  Dec(NestedLevel);
421  if NestedLevel = 0 then
422  Break;
423  end;
424  if (LastChar = '/') and (ReadChar = '*') then
425  Inc(NestedLevel);
426  LastChar := ReadChar;
427  end;
428 end;
429 
430 {**
431  Gets a PostgreSQL specific comments like -- or /* */.
432  @return either just a slash token, or the results of
433  delegating to a comment-handling state
434 }
435 function TZPostgreSQLCommentState.NextToken(Stream: TStream;
436  FirstChar: Char; Tokenizer: TZTokenizer): TZToken;
437 var
438  ReadChar: Char;
439  ReadNum: Integer;
440 begin
441  Result.TokenType := ttUnknown;
442  Result.Value := FirstChar;
443 
444  if FirstChar = '-' then
445  begin
446  ReadNum := Stream.Read(ReadChar, SizeOf(Char));
447  if (ReadNum > 0) and (ReadChar = '-') then
448  begin
449  Result.TokenType := ttComment;
450  Result.Value := '--' + GetSingleLineComment(Stream);
451  end
452  else
453  begin
454  if ReadNum > 0 then
455  Stream.Seek(-SizeOf(Char), soFromCurrent);
456  end;
457  end
458  else if FirstChar = '/' then
459  begin
460  ReadNum := Stream.Read(ReadChar, SizeOf(Char));
461  if (ReadNum > 0) and (ReadChar = '*') then
462  begin
463  Result.TokenType := ttComment;
464  Result.Value := '/*' + GetMultiLineComment(Stream);
465  end
466  else
467  begin
468  if ReadNum > 0 then
469  Stream.Seek(-SizeOf(Char), soFromCurrent);
470  end;
471  end;
472 
473  if (Result.TokenType = ttUnknown) and (Tokenizer.SymbolState <> nil) then
474  Result := Tokenizer.SymbolState.NextToken(Stream, FirstChar, Tokenizer);
475 end;
476 
477 { TZPostgreSQLSymbolState }
478 
479 {**
480  Creates this PostgreSQL-specific symbol state object.
481 }
482 constructor TZPostgreSQLSymbolState.Create;
483 begin
484  inherited Create;
485  Add('<=');
486  Add('>=');
487  Add('<>');
488  Add('<<');
489  Add('>>');
490  Add('~*');
491  Add('!~');
492  Add('!~*');
493 end;
494 
495 { TZPostgreSQLWordState }
496 
497 {**
498  Constructs this PostgreSQL-specific word state object.
499 }
500 constructor TZPostgreSQLWordState.Create;
501 begin
502  SetWordChars(#0, #191, False);
503  SetWordChars(#192, high(char), True);
504  SetWordChars('a', 'z', True);
505  SetWordChars('A', 'Z', True);
506  SetWordChars('0', '9', True);
507  SetWordChars('_', '_', True);
508  SetWordChars('$', '$', True);
509 end;
510 
511 {**
512  informs the Postgre Tokenizer '\' should be handled as Escape-char
513  @param True means backslashes are quoted strings
514 }
515 procedure TZPostgreSQLTokenizer.SetStandardConformingStrings(
516  const Value: Boolean);
517 begin
518  (QuoteState as TZPostgreSQLQuoteState).SetStandardConformingStrings(Value);
519 end;
520 
521 {**
522  Constructs a tokenizer with a default state table (as
523  described in the class comment).
524 }
525 constructor TZPostgreSQLTokenizer.Create;
526 begin
527  EscapeState := TZEscapeState.Create;
528  WhitespaceState := TZWhitespaceState.Create;
529 
530  SymbolState := TZPostgreSQLSymbolState.Create;
531  NumberState := TZPostgreSQLNumberState.Create;
532  QuoteState := TZPostgreSQLQuoteState.Create;
533  WordState := TZPostgreSQLWordState.Create;
534  CommentState := TZPostgreSQLCommentState.Create;
535 
536  SetCharacterState(#0, #32, WhitespaceState);
537  SetCharacterState(#33, #191, SymbolState);
538  SetCharacterState(#192, High(Char), WordState);
539 
540  SetCharacterState('a', 'z', WordState);
541  SetCharacterState('A', 'Z', WordState);
542  SetCharacterState('_', '_', WordState);
543 
544  SetCharacterState('0', '9', NumberState);
545  SetCharacterState('.', '.', NumberState);
546 
547  SetCharacterState(NameQuoteChar, NameQuoteChar, QuoteState);
548  SetCharacterState(SingleQuoteChar, SingleQuoteChar, QuoteState);
549  SetCharacterState(DollarQuoteChar, DollarQuoteChar, QuoteState);
550 
551  SetCharacterState('/', '/', CommentState);
552  SetCharacterState('-', '-', CommentState);
553 end;
554 
555 {**
556  Checks if WordState is QuoteState with modifier and sets QuoteState.
557  @param Stream the Read-Stream which has to checked for Next-Chars.
558  @FirstChar The FirstChar which was readed and sets the Symbolstate
559  @returns either the given SymbolState or the QuoteState
560 }
561 function TZPostgreSQLTokenizer.CheckEscapeState(const ActualState:
562  TZTokenizerState; Stream: TStream; const FirstChar: Char): TZTokenizerState;
563 var
564  Modifier: string;
565 begin
566  Result := inherited CheckEscapeState(ActualState, Stream, FirstChar);
567  if (Result is TZWordState) then
568  begin
569  Modifier := (QuoteState as TZPostgreSQLQuoteState).GetModifier(Stream, FirstChar);
570  if (Modifier <> '') then
571  Result := QuoteState;
572  end;
573 end;
574 
575 end.
576