1 {*********************************************************}
3 { Zeos Database Objects }
4 { PostgreSQL Database Connectivity Classes }
6 { Originally written by Sergey Seroukhov }
8 {*********************************************************}
10 {@********************************************************}
11 { Copyright (c) 1999-2012 Zeos Development Group }
13 { License Agreement: }
15 { This library is distributed in the hope that it will be }
16 { useful, but WITHOUT ANY WARRANTY; without even the }
17 { implied warranty of MERCHANTABILITY or FITNESS FOR }
18 { A PARTICULAR PURPOSE. See the GNU Lesser General }
19 { Public License for more details. }
21 { The source code of the ZEOS Libraries and packages are }
22 { distributed under the Library GNU General Public }
23 { License (see the file COPYING / COPYING.ZEOS) }
24 { with the following modification: }
25 { As a special exception, the copyright holders of this }
26 { library give you permission to link this library with }
27 { independent modules to produce an executable, }
28 { regardless of the license terms of these independent }
29 { modules, and to copy and distribute the resulting }
30 { executable under terms of your choice, provided that }
31 { you also meet, for each linked independent module, }
32 { the terms and conditions of the license of that module. }
33 { An independent module is a module which is not derived }
34 { from or based on this library. If you modify this }
35 { library, you may extend this exception to your version }
36 { of the library, but you are not obligated to do so. }
37 { If you do not wish to do so, delete this exception }
38 { statement from your version. }
41 { The project web site is located on: }
42 { http://zeos.firmos.at (FORUM) }
43 { http://sourceforge.net/p/zeoslib/tickets/ (BUGTRACKER)}
44 { svn://svn.code.sf.net/p/zeoslib/code-0/trunk (SVN) }
46 { http://www.sourceforge.net/projects/zeoslib. }
49 { Zeos Development Group. }
50 {********************************************************@}
59 Classes, {$IFDEF MSEgui}mclasses,{$ENDIF} Math,
60 {$IFDEF WITH_LCONVENCODING}
62 LCLVersion, LConvEncoding,
64 {$IF defined(MSWINDOWS) and not defined(WITH_UNICODEFROMLOCALECHARS)}
70 //zCP_ACP = 0; {ASCII US}
71 zCP_EBC037 = 37; {IBM EBCDIC US-Canada}
72 zCP_EBC273 = 273; {EBCDIC Code Page 273/1 8-bit Austrian German}
73 zCP_EBC277 = 277; {EBCDIC Code Page 277/1 8-bit Danish}
74 zCP_EBC278 = 278; {EBCDIC Code Page 278/1 8-bit Swedish}
75 zCP_EBC280 = 280; {EBCDIC Code Page 280/1 8-bit Italian}
76 zCP_EBC284 = 284; {EBCDIC Code Page 284 8-bit Latin American/Spanish}
78 zCP_DOS437 = 437; {IBM437/MS-DOS odepage 437 (US)}
79 zCP_DOS500 = 500; {IBM EBCDIC International}
80 zCP_DOS708 = 708; {Arabic (ASMO 708)}
81 zCP_DOS709 = 709; {Arabic (ASMO-449+, BCON V4)}
82 zCP_DOS710 = 710; {Arabic - Transparent Arabic}
83 zCP_DOS720 = 720; {Arabic (Transparent ASMO); Arabic (DOS)}
84 zCP_DOS737 = 737; {OEM Greek (formerly 437G); Greek (DOS)}
85 zCP_DOS775 = 775; {MS-DOS Codepage 775 (BaltRim)}
86 zCP_DOS850 = 850; {MS-DOS Codepage 850 (Multilingual Latin 1)}
87 zCP_DOS851 = 851; {MS-DOS Codepage 851 (Greece) - obsolete}
88 zCP_DOS852 = 852; {ibm852 852 east european(DOS)}
89 zCP_DOS853 = 853; {MS-DOS Codepage 853 (Multilingual Latin 3)}
90 zCP_DOS855 = 855; {MS-DOS Codepage 855 (Russia) - obsolete}
92 zCP_DOS857 = 857; {MS-DOS Codepage 857 (Multilingual Latin 5)}
93 zCP_DOS858 = 858; {MS-DOS Codepage 858 Latin I + Euro symbol}
94 zCP_DOS895 = 895; {MS-DOS Codepage 895 (Kamenicky CS)}
95 zCP_DOS860 = 860; {MS-DOS Codepage 860 (Portugal)}
96 zCP_DOS861 = 861; {MS-DOS Codepage 861 (Iceland)}
97 zCP_DOS862 = 862; {MS-DOS Codepage 862 (Israel)}
98 zCP_DOS863 = 863; {MS-DOS Codepage 863 (Canada (French))}
99 zCP_DOS864 = 864; {MS-DOS Codepage 864 (Arabic) without BOX DRAWINGS below 20}
100 zCP_DOS865 = 865; {MS-DOS Codepage 865 (Norway)}
101 zCP_DOS866 = 866; {ibm866 866 Cyrl (DOS)}
102 zCP_DOS869 = 869; {MS-DOS Codepage 869 (Greece)}
103 zCP_DOS870 = 870; {IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2}
104 zCP_DOS874 = 874; {ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows)}
105 zCP_EBC875 = 875; {EBCDIC Codepage 875 (Greek)}
108 zCP_EBC924 = 924; {Latin 9 EBCDIC 924}
109 zCP_SHIFTJS = 932; {ANSI/OEM Japanese; Japanese (Shift-JIS)}
110 zCP_GB2312 = 936; {ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)}
111 zCP_EUCKR = 949; {ANSI/OEM Korean (Unified Hangul Code)}
112 zCP_Big5 = 950; {ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)}
113 zCP_IBM1026 = 1026; {EBCDIC Code Page 1026 8-bit Turkish}
114 zCP_IBM01047 = 1047; {IBM EBCDIC Latin 1/Open System}
115 zCP_IBM01140 = 1140; {IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)}
116 zCP_IBM01141 = 1141; {IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)}
117 zCP_IBM01142 = 1142; {IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)}
118 zCP_IBM01143 = 1143; {IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)}
119 zCP_IBM01144 = 1144; {IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)}
120 zCP_IBM01145 = 1145; {IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)}
121 zCP_IBM01146 = 1146; {IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)}
122 zCP_IBM01147 = 1147; {IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)}
123 zCP_IBM01148 = 1148; {IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)}
124 zCP_IBM01149 = 1149; {IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)}
126 zCP_UTF16 = 1200; {utf-16; Indicates the Unicode character set, Windows code page 1200}
127 zCP_UTF16BE = 1201; {Unicode UTF-16, big endian byte order; available only to managed applications}
128 zCP_WIN1250 = 1250; {Microsoft Windows Codepage 1250 (East European)}
129 zCP_WIN1251 = 1251; {Microsoft Windows Codepage 1251 (Cyrl)}
130 zCP_WIN1252 = 1252; {Microsoft Windows Codepage 1252 (ANSI), USASCCI}
131 zCP_WIN1253 = 1253; {Microsoft Windows Codepage 1253 (Greek)}
132 zCP_WIN1254 = 1254; {Microsoft Windows Codepage 1254 (Turk)}
133 zCP_WIN1255 = 1255; {Microsoft Windows Codepage 1255 (Hebrew)}
134 cCP_WIN1256 = 1256; {Microsoft Windows Codepage 1256 (Arab)}
135 zCP_WIN1257 = 1257; {Microsoft Windows Codepage 1257 (BaltRim)}
136 zCP_WIN1258 = 1258; {Microsoft Windows Codepage 1258 (Viet), TCVN-5712}
137 ZCP_JOHAB = 1361; {Korean (Johab)}
138 zCP_KOREAN = 2022; {iso-2022-kr 50225 Korean (ISO)}
140 zCP_macintosh = 10000; {MAC Roman; Western European (Mac)}
141 zCP_x_mac_japanese = 10001; {Japanese (Mac)}
142 zCP_x_mac_chinesetrad = 10002; {MAC Traditional Chinese (Big5); Chinese Traditional (Mac)}
143 zCP_x_mac_korean = 10003; {Korean (Mac)}
144 zCP_x_mac_arabic = 10004; {Arabic (Mac)}
145 zCP_x_mac_hebrew = 10005; {Hebrew (Mac)}
146 zCP_x_mac_greek = 10006; {Greek (Mac)}
147 zCP_x_mac_cyrillic = 10007; {Cyrillic (Mac)}
148 zCP_x_mac_chinesesimp = 10008; {MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)}
149 zCP_x_mac_romanian = 10010; {Romanian (Mac)}
150 zCP_x_mac_ukrainian = 10017; {Ukrainian (Mac)}
151 zCP_x_mac_thai = 10021; {Thai (Mac)}
152 zCP_x_mac_ce = 10029; {MAC Latin 2; Central European (Mac)}
153 zCP_x_mac_icelandic = 10079; {Icelandic (Mac)}
154 zCP_x_mac_turkish = 10081; {Turkish (Mac)}
155 zCP_x_mac_croatian = 10082; {Croatian (Mac)}
156 zCP_utf32 = 12000; {Unicode UTF-32, little endian byte order; available only to managed applications}
157 zCP_utf32BE = 12001; {Unicode UTF-32, big endian byte order; available only to managed applications}
159 zCP_x_Chinese_CNS = 20000; {CNS Taiwan; Chinese Traditional (CNS)}
160 zCP_x_cp20001 = 20001; {TCA Taiwan}
161 zCP_x_Chinese_Eten = 20002; {Eten Taiwan; Chinese Traditional (Eten)}
162 zCP_x_cp20003 = 20003; {IBM5550 Taiwan}
163 zCP_x_cp20004 = 20004; {TeleText Taiwan}
164 zCP_x_cp20005 = 20005; {Wang Taiwan}
165 zCP_x_IA5 = 20105; {IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)}
166 zCP_x_IA5_German = 20106; {IA5 German (7-bit)}
167 zCP_x_IA5_Swedish = 20107; {IA5 Swedish (7-bit)}
168 zCP_x_IA5_Norwegian = 20108; {IA5 Norwegian (7-bit)}
169 zCP_us_ascii = 20127; {US-ASCII (7-bit)}
170 zCP_x_cp20261 = 20261; {T.61}
171 zCP_x_cp20269 = 20269; {ISO 6937 Non-Spacing Accent}
172 zCP_IBM273 = 20273; {IBM EBCDIC Germany}
173 zCP_IBM277 = 20277; {IBM EBCDIC Denmark-Norway}
174 zCP_IBM278 = 20278; {IBM EBCDIC Finland-Sweden}
175 zCP_IBM280 = 20280; {IBM EBCDIC Italy}
176 zCP_IBM284 = 20284; {IBM EBCDIC Latin America-Spain}
177 zCP_IBM285 = 20285; {IBM EBCDIC United Kingdom}
178 zCP_IBM290 = 20290; {IBM EBCDIC Japanese Katakana Extended}
179 zCP_IBM297 = 20297; {IBM EBCDIC France}
180 zCP_IBM420 = 20420; {IBM EBCDIC Arabic}
181 zCP_IBM423 = 20423; {IBM EBCDIC Greek}
182 zCP_IBM424 = 20424; {IBM EBCDIC Hebrew}
183 zCP_x_EBCDIC_KoreanExtended = 20833; {IBM EBCDIC Korean Extended}
184 zCP_IBM_Thai = 20838; {IBM EBCDIC Thai / TIS-620}
185 zCP_KOI8R = 20866; {cskoi8r 20866 Cyrillic (KOI8-R)}
186 zCP_IBM871 = 20871; {IBM EBCDIC Icelandic}
187 zCP_IBM880 = 20880; {IBM EBCDIC Cyrillic Russian}
188 zCP_IBM905 = 20905; {IBM EBCDIC Turkish}
189 zCP_IBM00924 = 20924; {IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)}
190 zCP_EUC_JP = 20932; {Japanese (JIS 0208-1990 and 0121-1990)}
191 zCP_x_cp20936 = 20936; {Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)}
192 zCP_x_cp20949 = 20949; {Korean Wansung}
193 zCP_cp1025 = 21025; {IBM EBCDIC Cyrillic Serbian-Bulgarian}
194 //21027 (deprecated)}}
195 zCP_KOI8U = 21866; {KOI8-U is an 8-bit character encoding, designed to cover Ukrainian, which uses the Cyrillic alphabet.}
196 zCP_L1_ISO_8859_1 = 28591; {8-bit single-byte coded graphic character sets Part 1: Latin alphabet No. 1, is part of the ISO/IEC 8859 series of ASCII-based standard character encodings}
197 zCP_L2_ISO_8859_2 = 28592; {latin2 east european (ISO), 8-bit single-byte coded graphic character sets - Part 2: Latin alphabet No. 2, is part of the ISO/IEC 8859 series of ASCII-based standard character encodings}
198 zCP_L3_ISO_8859_3 = 28593; {ISO 8859-3 Latin 3}
199 zCP_L4_ISO_8859_4 = 28594; {ISO 8859-4 Baltic}
200 zCP_L5_ISO_8859_5 = 28595; {8bit single-byte coded graphic character sets - Part 5: Latin/Cyrillic alphabet, is part of the ISO/IEC 8859 series of ASCII-based standard character encodings}
201 zCP_L6_ISO_8859_6 = 28596; {ISO 8859-6 Arabic}
202 zCP_L7_ISO_8859_7 = 28597; {ISO 8859-7 Greek}
203 zCP_L8_ISO_8859_8 = 28598; {ISO 8859-8 Hebrew; Hebrew (ISO-Visual)}
204 zCP_L5_ISO_8859_9 = 28599; {ISO 8859-9 Turkish}
205 zCP_L6_ISO_8859_10 = 28600; { ISO 8859-10, ECMA 144 Nordic }
206 zCP_L7_ISO_8859_13 = 28603; {ISO 8859-13 Estonian}
207 zCP_L8_ISO_8859_14 = 28604; { ISO 8859-14 Celtic }
208 zCP_L9_ISO_8859_15 = 28605; {ISO 8859-15 Latin 9}
209 zCP_L10_ISO_8859_16 = 28606; { ISO 8859-16, ASRO SR 14111 Romanian }
210 zCP_x_Europa = 29001; {Europa 3}
211 zCP_iso_8859_8_i = 38598; {ISO 8859-8 Hebrew; Hebrew (ISO-Logical)}
213 zCP_iso_2022_jp = 50220; {ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)}
214 zCP_csISO2022JP = 50221; {ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)}
215 zCP_x_iso_2022_jp = 50222; {ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)}
216 zCP_iso_2022_kr = 50225; {ISO 2022 Korean}
217 zCP_x_cp50227 = 50227; {ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)}
218 zCP_EUC_TC_ISO220 = 50229; {ISO 2022 Traditional Chinese}
219 zCP_EBCDIC_euc_jpe = 50930; {EBCDIC Japanese (Katakana) Extended}
220 zCP_EBCDIC_euc_jp = 50931; {EBCDIC US-Canada and Japanese}
221 zCP_euc_jp_auto = 50932; {EUC Japanese, Indicates Japanese auto-detect (50932). }
222 zCP_EBCDIC_euc_kr = 50933; {EBCDIC Korean Extended and Korean}
223 zCP_EBCDIC_euc_cn = 50935; {EBCDIC Simplified Chinese Extended and Simplified Chinese}
224 zCP_EBCDIC_euc_sc = 50936; {EBCDIC Simplified Chinese}
225 zCP_EBCDIC_USC_TC = 50937; {EBCDIC US-Canada and Traditional Chinese}
226 zCP_euc_cn_auto = 50939; {EBCDIC Japanese (Latin) Extended and Japanese}
227 zCP_euc_kr_auto = 50949; {EUC Korean, Indicates Korean auto-detect (50949).}
228 zCP_euc_JP_win = 51932; {EUC Japanese}
229 zCP_EUC_CN = 51936; {EUC Simplified Chinese; Chinese Simplified (EUC)}
230 zCP_euc_kr = 51949; {EUC Korean}
231 zCP_euc_tc = 51950; {EUC Traditional Chinese}
232 zCP_hz_gb_2312 = 52936; {HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)}
233 zCP_GB18030 = 54936; {Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)}
234 zCP_x_iscii_de = 57002; {ISCII Devanagari}
235 zCP_x_iscii_be = 57003; {ISCII Bengali}
236 zCP_x_iscii_ta = 57004; {ISCII Tamil}
237 zCP_x_iscii_te = 57005; {ISCII Telugu}
238 zCP_x_iscii_as = 57006; {ISCII Assamese}
239 zCP_x_iscii_or = 57007; {ISCII Oriya}
240 zCP_x_iscii_ka = 57008; {ISCII Kannada}
241 zCP_x_iscii_ma = 57009; {ISCII Malayalam}
242 zCP_x_iscii_gu = 57010; {ISCII Gujarati}
243 zCP_x_iscii_pa = 57011; {ISCII Punjabi}
248 {$IFDEF WITH_LCONVENCODING}
250 ZLConvCodepages: array[0..16] of Word = (
267 20866 //KOI8 (Russian)
270 function IsLConvEncodingCodePage(const CP: Word): Boolean;
271 procedure SetConvertFunctions(const CTRL_CP, DB_CP: Word;
272 out PlainConvert, DbcConvert: TConvertEncodingFunction); overload;
275 function StringToAnsiEx(const s: String; const {$IFNDEF UNICODE}FromCP,{$ENDIF} ToCP: Word): RawByteString; {$IFDEF WITH_INLINE}inline;{$ENDIF}
276 function AnsiToStringEx(const s: RawByteString; const FromCP{$IFNDEF UNICODE}, ToCP{$ENDIF}: Word): String; {$IFDEF WITH_INLINE}inline;{$ENDIF}
279 function ZRawToUnicode(const S: RawByteString; const CP: Word): ZWideString;
280 function ZUnicodeToRaw(const US: ZWideString; CP: Word): RawByteString;
282 {converter functions for the String-types}
283 function ZConvertAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
284 function ZConvertRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
285 function ZConvertAnsiToUTF8(const Src: AnsiString): UTF8String;
286 function ZConvertUTF8ToAnsi(const Src: UTF8String): AnsiString;
287 function ZConvertRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
288 function ZConvertUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
289 function ZConvertRawToString(const Src: RawByteString; const RawCP, StringCP: Word): String;
290 function ZConvertStringToRaw(const Src: String; const StringCP, RawCP: Word): RawByteString;
291 function ZConvertStringToRawWithAutoEncode(const Src: String; const StringCP, RawCP: Word): RawByteString;
292 function ZConvertUTF8ToString(const Src: UTF8String; const StringCP: Word): String;
293 function ZConvertStringToUTF8(const Src: String; const StringCP: Word): UTF8String;
294 function ZConvertStringToUTF8WithAutoEncode(const Src: String; const StringCP: Word): UTF8String;
295 function ZConvertStringToAnsi(const Src: String; const StringCP: Word): AnsiString;
296 function ZConvertStringToAnsiWithAutoEncode(const Src: String; const StringCP: Word): AnsiString;
297 function ZConvertAnsiToString(const Src: AnsiString; const StringCP: Word): String;
298 function ZConvertUnicodeToString(const Src: ZWideString; const StringCP: Word): String;
299 function ZConvertUnicodeToString_CPUTF8(const Src: ZWideString; const StringCP: Word): String;
300 function ZConvertStringToUnicode(const Src: String; const StringCP: Word): ZWideString;
301 function ZConvertString_CPUTF8ToUnicode(const Src: String; const StringCP: Word): ZWideString;
302 function ZConvertStringToUnicodeWithAutoEncode(const Src: String; const StringCP: Word): ZWideString;
303 {move functions for the String types}
304 function ZMoveAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
305 function ZMoveRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
306 function ZMoveAnsiToUTF8(const Src: AnsiString): UTF8String;
307 function ZMoveUTF8ToAnsi(const Src: UTF8String): AnsiString;
308 function ZMoveRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
309 function ZMoveUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
310 function ZMoveStringToAnsi(Const Src: String; const StringCP: Word): AnsiString;
311 function ZMoveAnsiToString(const Src: AnsiString; const StringCP: Word): String;
312 function ZMoveRawToString(const Src: RawByteString; const RawCP, StringCP: Word): String;
313 function ZMoveStringToRaw(const Src: String; const StringCP, RawCP: Word): RawByteString;
314 function ZMoveUTF8ToString(const Src: UTF8String; StringCP: Word): String;
315 function ZMoveStringToUTF8(const Src: String; const StringCP: Word): UTF8String;
317 function ZUnknownRawToUnicode(const S: RawByteString; const CP: Word): ZWideString;
318 function ZUnknownRawToUnicodeWithAutoEncode(const S: RawByteString;
319 const CP: Word): ZWideString;
320 function ZUnicodeToUnknownRaw(const US: ZWideString; CP: Word): RawByteString;
323 Get the current system codepage of AnsiString
324 @return current system codepage of AnsiString
326 function ZDefaultSystemCodePage: Word;
329 Is the codepage equal or compatible?
330 @param CP1 word the first codepage to compare
331 @param CP2 word the second codepage to compare
332 @returns Boolean True if codepage is equal or compatible
334 function ZCompatibleCodePages(const CP1, CP2: Word): Boolean; {$IFDEF WITH_INLINE}inline;{$ENDIF}
337 Set the string-types conversion funtion in relation to the Connection-Settings.
338 The Results should be as optimal as possible to speed up the behavior
339 @param ConSettings a Pointer to the ConnectionSetting
341 procedure SetConvertFunctions(ConSettings: PZConSettings); {$IFDEF WITH_LCONVENCODING}overload;{$ENDIF}
344 GetValidatedTextStream the incoming Stream for his given Memory and
345 returns a valid UTF8/Ansi StringStream
346 @param Stream the Stream with the unknown format and data
347 @return a valid utf8 encoded stringstram
349 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
350 ConSettings: PZConSettings): RawByteString; overload;
352 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
353 ConSettings: PZConSettings; ToCP: Word): RawByteString; overload;
355 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
356 WasDecoded: Boolean; ConSettings: PZConSettings): RawByteString; overload;
358 function GetValidatedAnsiString(const Ansi: RawByteString;
359 ConSettings: PZConSettings; const FromDB: Boolean): RawByteString; overload;
361 function GetValidatedAnsiString(const Uni: ZWideString;
362 ConSettings: PZConSettings; const FromDB: Boolean): RawByteString; overload;
365 GetValidatedUnicodeStream the incoming Stream for his given Memory and
366 returns a valid Unicode/Widestring Stream
367 @param Buffer the pointer to the Data
368 @return a valid Unicode encoded stringstram
370 function GetValidatedUnicodeStream(const Buffer: Pointer; Size: Cardinal;
371 ConSettings: PZConSettings; FromDB: Boolean): TStream; overload;
373 function GetValidatedUnicodeStream(const Ansi: RawByteString;
374 ConSettings: PZConSettings; FromDB: Boolean): TStream; overload;
378 uses SysUtils, Types {$IFDEF WITH_WIDESTRUTILS},WideStrUtils{$ENDIF},
379 ZSysUtils{$IFDEF WITH_STRLEN_DEPRECATED}, AnsiStrings{$ENDIF};
384 function ZUnknownRawToUnicode(const S: RawByteString;
385 const CP: Word): ZWideString;
387 Result := ZWideString(S);
390 function ZUnknownRawToUnicodeWithAutoEncode(const S: RawByteString;
391 const CP: Word): ZWideString;
393 case DetectUTF8Encoding(S) of
394 etUSASCII, etUTF8: Result := UTF8ToString(S);
396 Result := ZWideString(S);
400 function ZUnicodeToUnknownRaw(const US: ZWideString; CP: Word):
403 Result := RawByteString(US);
409 function ZRawToUnicode(const S: RawByteString; const CP: Word): ZWideString;
410 {$IFDEF WITH_LCONVENCODING}
414 Result := UTF8Decode(ISO_8859_1ToUTF8(PAnsiChar(S)));
416 Result := UTF8Decode(ISO_8859_2ToUTF8(PAnsiChar(S)));
418 Result := UTF8Decode(CP1250ToUTF8(PAnsiChar(S)));
420 Result := UTF8Decode(CP1251ToUTF8(PAnsiChar(S)));
422 Result := UTF8Decode(CP1252ToUTF8(PAnsiChar(S)));
424 Result := UTF8Decode(CP1253ToUTF8(PAnsiChar(S)));
426 Result := UTF8Decode(CP1254ToUTF8(PAnsiChar(S)));
428 Result := UTF8Decode(CP1255ToUTF8(PAnsiChar(S)));
430 Result := UTF8Decode(CP1256ToUTF8(PAnsiChar(S)));
432 Result := UTF8Decode(CP1257ToUTF8(PAnsiChar(S)));
434 Result := UTF8Decode(CP1258ToUTF8(PAnsiChar(S)));
436 Result := UTF8Decode(CP437ToUTF8(PAnsiChar(S)));
438 Result := UTF8Decode(CP850ToUTF8(PAnsiChar(S)));
439 {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
441 Result := UTF8Decode(CP852ToUTF8(PAnsiChar(S)));
444 Result := UTF8Decode(CP866ToUTF8(PAnsiChar(S)));
446 Result := UTF8Decode(CP874ToUTF8(PAnsiChar(S)));
447 20866: //KOI8 (Russian)
448 Result := UTF8Decode(KOI8ToUTF8(PAnsiChar(S)));
450 Result := UTF8Decode(PAnsiChar(s));
452 Result := ZWideString(S); //random success!
456 {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
458 {$IFDEF WITH_UNICODEFROMLOCALECHARS}
467 if CP = zCP_NONE then
468 Result := ZUnknownRawToUnicode(s, CP)
470 {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
472 {$IFDEF WITH_UNICODEFROMLOCALECHARS}
474 wlen := UnicodeFromLocaleChars(cp, 0, PAnsiChar(S), ulen, NIL, 0); // wlen is the number of UCS2 without NULL terminater.
475 if wlen = 0 then exit;
476 SetLength(result, wlen);
477 UnicodeFromLocaleChars(cp, 0, PAnsiChar(S), ulen, PWideChar(Result), wlen);
479 l := MultiByteToWideChar(CP, 0, PAnsiChar(@s[1]), - 1, nil, 0); //Checkout the Result-Lengh
481 SetLength(US, l - 1); //Set Result-Length
482 MultiByteToWideChar(CP, 0, PAnsiChar(@s[1]),
483 - 1, PWideChar(@US[1]), l - 1); //Convert Ansi to Wide with supported Chars
488 {$IFDEF FPC_HAS_BUILTIN_WIDESTR_MANAGER} //FPC2.7+
489 WidestringManager.Ansi2WideMoveProc(PAnsiChar(s), CP, Result, Length(s));
491 if ZCompatibleCodePages(CP, zCP_UTF8) then
492 Result := UTF8Encode(s)
494 Result := ZWideString(s);
500 function ZUnicodeToRaw(const US: ZWideString; CP: Word): RawByteString;
501 {$IFDEF WITH_LCONVENCODING}
505 Result := UTF8ToISO_8859_1(UTF8Encode(US));
507 Result := UTF8ToISO_8859_2(UTF8Encode(US));
509 Result := UTF8ToCP1250(UTF8Encode(US));
511 Result := UTF8ToCP1251(UTF8Encode(US));
513 Result := UTF8ToCP1252(UTF8Encode(US));
515 Result := UTF8ToCP1253(UTF8Encode(US));
517 Result := UTF8ToCP1254(UTF8Encode(US));
519 Result := UTF8ToCP1255(UTF8Encode(US));
521 Result := UTF8ToCP1256(UTF8Encode(US));
523 Result := UTF8ToCP1257(UTF8Encode(US));
525 Result := UTF8ToCP1258(UTF8Encode(US));
527 Result := UTF8ToCP437(UTF8Encode(US));
529 Result := UTF8ToCP850(UTF8Encode(US));
530 {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
532 Result := UTF8ToCP852(UTF8Encode(US));
535 Result := UTF8ToCP866(UTF8Encode(US));
537 Result := UTF8ToCP874(UTF8Encode(US));
538 20866: //KOI8 (Russian)
539 Result := UTF8ToKOI8(UTF8Encode(US));
541 Result := UTF8Encode(US);
543 Result := RawByteString(US); //random success!
547 {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
549 {$IFDEF WITH_UNICODEFROMLOCALECHARS}
558 if CP = zCP_NONE then
559 Result := RawByteString(US) //random success
561 {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
563 {$IFDEF WITH_UNICODEFROMLOCALECHARS}
565 ulen := LocaleCharsFromUnicode(CP, 0, PWideChar(US), wlen, NIL, 0, NIL, NIL);
566 setlength(Result, ulen);
567 LocaleCharsFromUnicode(CP, 0, PWideChar(US), wlen, PAnsiChar(Result), ulen, NIL, NIL);
570 l := WideCharToMultiByte(CP,0, @WS[1], - 1, nil, 0, nil, nil); //Checkout the result length
572 SetLength(Result, l - 1); //SetResult Length
573 WideCharToMultiByte(CP,0, @WS[1], - 1, @Result[1], l - 1, nil, nil); // Convert Wide down to Ansi
577 {$IFDEF FPC_HAS_BUILTIN_WIDESTR_MANAGER} //FPC2.7+
578 WidestringManager.Unicode2AnsiMoveProc(PWideChar(US), Result, CP, Length(US));
580 if ZCompatibleCodePages(CP, zCP_UTF8) then
581 Result := UTF8Encode(US)
583 Result := RawByteString(US); //random success
589 {$IFNDEF WITH_LCONVENCODING}
590 function AnsiToStringEx(const s: RawByteString;
591 const FromCP{$IFNDEF UNICODE}, ToCP{$ENDIF}: Word): String;
596 if ( FromCP = zCP_NONE ) {$IFNDEF UNICODE} or ( FromCP = ToCP ){$ENDIF}then
600 if FromCP = zCP_UTF8 then
601 result := UTF8ToString(s)
603 Result := ZRawToUnicode(s, FromCP);
604 {$ELSE} //Ansi-Compiler
605 Result := ZUnicodeToRaw(ZRawToUnicode(s, FromCP), ToCP);
609 function StringToAnsiEx(const s: String; const {$IFNDEF UNICODE}FromCP, {$ENDIF} ToCP: Word): RawByteString;
614 if ( ToCP = zCP_NONE ) {$IFNDEF UNICODE} or ( FromCP = ToCP ){$ENDIF}then
615 Result := RawByteString(s)
618 if ToCP = zCP_UTF8 then
619 result := UTF8Encode(s)
621 Result := ZUnicodeToRaw(s, ToCP);
622 {$ELSE} //Ansi-Compiler
623 Result := ZUnicodeToRaw(ZRawToUnicode(s, FromCP), ToCP);
629 function IsLConvEncodingCodePage(const CP: Word): Boolean;
633 for i := 0 to High(ZLConvCodepages) do
635 Result := CP = ZLConvCodepages[i];
636 if Result then Break;
640 function NoConvert(const s: string): string;
645 procedure SetConvertFunctions(const CTRL_CP, DB_CP: Word;
646 out PlainConvert, DbcConvert: TConvertEncodingFunction);
648 if CTRL_CP = DB_CP then
650 PlainConvert := @NoConvert;
651 DbcConvert := @NoConvert;
658 DbcConvert := @ISO_8859_1ToUTF8;
659 PlainConvert := @UTF8ToISO_8859_1;
663 DbcConvert := @ISO_8859_2ToUTF8;
664 PlainConvert := @UTF8ToISO_8859_2;
668 DbcConvert := @CP1250ToUTF8;
669 PlainConvert := @UTF8ToCP1250;
673 DbcConvert := @CP1251ToUTF8;
674 PlainConvert := @UTF8ToCP1251;
678 DbcConvert := @CP1252ToUTF8;
679 PlainConvert := @UTF8ToCP1252;
683 DbcConvert := @CP1253ToUTF8;
684 PlainConvert := @UTF8ToCP1253;
688 DbcConvert := @CP1254ToUTF8;
689 PlainConvert := @UTF8ToCP1254;
693 DbcConvert := @CP1255ToUTF8;
694 PlainConvert := @UTF8ToCP1255;
698 DbcConvert := @CP1256ToUTF8;
699 PlainConvert := @UTF8ToCP1256;
703 DbcConvert := @CP1257ToUTF8;
704 PlainConvert := @UTF8ToCP1257;
708 DbcConvert := @CP1258ToUTF8;
709 PlainConvert := @UTF8ToCP1258;
713 DbcConvert := @CP437ToUTF8;
714 PlainConvert := @UTF8ToCP437;
718 DbcConvert := @CP850ToUTF8;
719 PlainConvert := @UTF8ToCP850;
721 {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
724 DbcConvert := @CP852ToUTF8;
725 PlainConvert := @UTF8ToCP852;
730 DbcConvert := @CP866ToUTF8;
731 PlainConvert := @UTF8ToCP866;
735 DbcConvert := @CP874ToUTF8;
736 PlainConvert := @UTF8ToCP874;
738 20866: //KOI8 (Russian)
740 DbcConvert := @KOI8ToUTF8;
741 PlainConvert := @UTF8ToKOI8;
745 DbcConvert := @NoConvert;
746 PlainConvert := @NoConvert;
753 function ZDefaultSystemCodePage: Word;
755 {$IFDEF WITH_DEFAULTSYSTEMCODEPAGE}
756 Result := Word(DefaultSystemCodePage);
759 Result := GetACP; //available for Windows and WinCE
761 Result := zCP_UTF8; //how to determine the current OS CP?
767 Is the codepage equal or compatible?
768 @param CP1 word the first codepage to compare
769 @param CP2 word the second codepage to compare
770 @returns Boolean True if codepage is equal or compatible
772 function ZCompatibleCodePages(const CP1, CP2: Word): Boolean;
774 Result := (CP1 = CP2) or (CP1 = zCP_us_ascii) or (CP2 = zCP_us_ascii);
780 function TestEncoding(const Bytes: TByteDynArray; const Size: Cardinal;
781 const ConSettings: PZConSettings): TZCharEncoding;
785 Step one: Findout, wat's comming in! To avoid User-Bugs as good as possible
786 it is possible that a PAnsiChar OR a PWideChar was written into
787 the Stream!!! And these chars could be trunced with changing the
789 I know this can lead to pain with two byte ansi chars, but what else can i do?
790 step two: detect the encoding }
792 if ( {$IFDEF WITH_STRLEN_DEPRECATED}AnsiStrings.{$ENDIF}StrLen(PAnsiChar(Bytes)) < Size ) then //Sure PWideChar written!! A #0 was in the byte-sequence!
795 if ConSettings.AutoEncode then
796 case DetectUTF8Encoding(PAnsichar(Bytes)) of
797 etUSASCII: Result := ceDefault; //Exact!
799 { Sure this isn't right in all cases!
800 Two/four byte WideChars causing the same result!
801 Leads to pain! Is there a way to get a better test?
802 I've to start from the premise the function which calls this func
803 should decide wether ansi or unicode}
805 etUTF8: Result := ceUTF8; //Exact!
814 function ZConvertAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
815 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
821 US := ZWideString(Src);
822 Result := ZUnicodeToRaw(US, RawCP);
826 function ZConvertRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
827 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
833 US := ZRawToUnicode(Src, RawCP);
834 Result := AnsiString(US); //use compiler convertation
838 function ZConvertAnsiToUTF8(const Src: AnsiString): UTF8String;
839 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
845 US := ZWideString(Src);
846 Result := {$IFDEF WITH_RAWBYTESTRING}UTF8String{$ELSE}UTF8Encode{$ENDIF}(US);
850 function ZConvertUTF8ToAnsi(const Src: UTF8String): AnsiString;
851 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
857 US := {$IFDEF WITH_RAWBYTESTRING}ZWideString{$ELSE}UTF8Decode{$ENDIF}(Src);
858 Result := AnsiString(US);
862 function ZConvertRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
864 {$IFDEF WITH_LCONVENCODING}
867 US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
874 {$IFDEF WITH_LCONVENCODING}
878 sUTF8 := ISO_8859_1ToUTF8(PAnsiChar(Src));
880 sUTF8 := ISO_8859_2ToUTF8(PAnsiChar(Src));
882 sUTF8 := CP1250ToUTF8(PAnsiChar(Src));
884 sUTF8 := CP1251ToUTF8(PAnsiChar(Src));
886 sUTF8 := CP1252ToUTF8(PAnsiChar(Src));
888 sUTF8 := CP1253ToUTF8(PAnsiChar(Src));
890 sUTF8 := CP1254ToUTF8(PAnsiChar(Src));
892 sUTF8 := CP1255ToUTF8(PAnsiChar(Src));
894 sUTF8 := CP1256ToUTF8(PAnsiChar(Src));
896 sUTF8 := CP1257ToUTF8(PAnsiChar(Src));
898 sUTF8 := CP1258ToUTF8(PAnsiChar(Src));
900 sUTF8 := CP437ToUTF8(PAnsiChar(Src));
902 sUTF8 := CP850ToUTF8(PAnsiChar(Src));
904 sUTF8 := CP852ToUTF8(PAnsiChar(Src));
906 sUTF8 := CP866ToUTF8(PAnsiChar(Src));
908 sUTF8 := CP874ToUTF8(PAnsiChar(Src));
909 20866: //KOI8 (Russian)
910 sUTF8 := KOI8ToUTF8(PAnsiChar(Src));
912 sUTF8 := PAnsiChar(Src);
914 sUTF8 := PAnsiChar(Src);
916 ZSetString(PAnsichar(sUTF8), Result);
918 US := ZRawToUnicode(Src, CP);
919 Result := {$IFDEF WITH_RAWBYTESTRING}UTF8String{$ELSE}UTF8Encode{$ENDIF}(US);
924 function ZConvertUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
926 {$IFDEF WITH_LCONVENCODING}
929 US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
936 {$IFDEF WITH_LCONVENCODING}
939 sUTF8 := UTF8ToISO_8859_1(PAnsiChar(Src));
941 sUTF8 := UTF8ToISO_8859_2(PAnsiChar(Src));
943 sUTF8 := UTF8ToCP1250(PAnsiChar(Src));
945 sUTF8 := UTF8ToCP1251(PAnsiChar(Src));
947 sUTF8 := UTF8ToCP1252(PAnsiChar(Src));
949 sUTF8 := UTF8ToCP1253(PAnsiChar(Src));
951 sUTF8 := UTF8ToCP1254(PAnsiChar(Src));
953 sUTF8 := UTF8ToCP1255(PAnsiChar(Src));
955 sUTF8 := UTF8ToCP1256(PAnsiChar(Src));
957 sUTF8 := UTF8ToCP1257(PAnsiChar(Src));
959 sUTF8 := UTF8ToCP1258(PAnsiChar(Src));
961 sUTF8 := UTF8ToCP437(PAnsiChar(Src));
963 sUTF8 := UTF8ToCP850(PAnsiChar(Src));
965 sUTF8 := UTF8ToCP852(PAnsiChar(Src));
967 sUTF8 := UTF8ToCP866(PAnsiChar(Src));
969 sUTF8 := UTF8ToCP874(PAnsiChar(Src));
970 20866: //KOI8 (Russian)
971 sUTF8 := UTF8ToKOI8(PAnsiChar(Src));
973 sUTF8 := PAnsiChar(Src);
975 sUTF8 := PAnsiChar(Src);
977 Result := ''; //Makes compler happy
978 ZSetString(PAnsiChar(sUTF8), Result);
980 US := UTF8ToString(PAnsiChar(Src));
981 Result := ZUnicodeToRaw(US, CP);
989 function ZConvertRawToString(const Src: RawByteString;
990 const RawCP, StringCP: Word): String;
993 {$IFDEF WITH_LCONVENCODING}
996 US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
1004 {$IFDEF WITH_LCONVENCODING}
1005 sUTF8 := ''; //Makes Compiler happy
1006 ZSetString(PAnsichar(Src), sUTF8);
1009 Result := ISO_8859_1ToUTF8(PAnsiChar(sUTF8));
1011 Result := ISO_8859_2ToUTF8(PAnsiChar(sUTF8));
1013 Result := CP1250ToUTF8(PAnsiChar(sUTF8));
1015 Result := CP1251ToUTF8(PAnsiChar(sUTF8));
1017 Result := CP1252ToUTF8(PAnsiChar(sUTF8));
1019 Result := CP1253ToUTF8(PAnsiChar(sUTF8));
1021 Result := CP1254ToUTF8(PAnsiChar(sUTF8));
1023 Result := CP1255ToUTF8(PAnsiChar(sUTF8));
1025 Result := CP1256ToUTF8(PAnsiChar(sUTF8));
1027 Result := CP1257ToUTF8(PAnsiChar(sUTF8));
1029 Result := CP1258ToUTF8(PAnsiChar(sUTF8));
1031 Result := CP437ToUTF8(PAnsiChar(sUTF8));
1033 Result := CP850ToUTF8(PAnsiChar(sUTF8));
1034 {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
1036 Result := CP852ToUTF8(PAnsiChar(sUTF8));
1039 Result := CP866ToUTF8(PAnsiChar(sUTF8));
1041 Result := CP874ToUTF8(PAnsiChar(sUTF8));
1042 20866: //KOI8 (Russian)
1043 Result := KOI8ToUTF8(PAnsiChar(sUTF8));
1045 Result := PAnsiChar(sUTF8);
1047 Result := PAnsiChar(sUTF8);
1051 Result := ZRawToUnicode(Src, RawCP);
1053 US := ZRawToUnicode(Src, RawCP);
1054 ZSetString(PAnsiChar(ZUnicodeToRaw(US, StringCP)), Result);
1060 function ZConvertStringToRaw(const Src: String; const StringCP, RawCP: Word): RawByteString;
1063 {$IFDEF WITH_LCONVENCODING}
1066 US: ZWideString; //COM based, so let's localize the value to avoid Buffer overrun
1073 {$IFDEF WITH_LCONVENCODING}
1077 sUTF8 := UTF8ToISO_8859_1(Src);
1079 sUTF8 := UTF8ToISO_8859_2(Src);
1081 sUTF8 := UTF8ToCP1250(Src);
1083 sUTF8 := UTF8ToCP1251(Src);
1085 sUTF8 := UTF8ToCP1252(Src);
1087 sUTF8 := UTF8ToCP1253(Src);
1089 sUTF8 := UTF8ToCP1254(Src);
1091 sUTF8 := UTF8ToCP1255(Src);
1093 sUTF8 := UTF8ToCP1256(Src);
1095 sUTF8 := UTF8ToCP1257(Src);
1097 sUTF8 := UTF8ToCP1258(Src);
1099 sUTF8 := UTF8ToCP437(Src);
1101 sUTF8 := UTF8ToCP850(Src);
1102 {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
1104 sUTF8 := UTF8ToCP852(Src);
1107 sUTF8 := UTF8ToCP866(Src);
1109 sUTF8 := UTF8ToCP874(Src);
1110 20866: //KOI8 (Russian)
1111 sUTF8 := UTF8ToKOI8(Src);
1117 Result := ''; //Makes compler happy
1118 ZSetString(PAnsiChar(sUTF8), Result);
1122 Result := ZUnicodeToRaw(Src, RawCP);
1125 US := ZRawToUnicode(Src, StringCP);
1126 Result := ZUnicodeToRaw(US, RawCP);
1136 function ZConvertStringToRawWithAutoEncode(const Src: String;
1137 const StringCP, RawCP: Word): RawByteString;
1140 Result := ZUnicodeToRaw(Src, RawCP);
1142 Result := ''; //init for FPC
1143 case DetectUTF8Encoding(Src) of
1144 etUSASCII: ZSetString(PAnsiChar(Src), Result);
1146 if (RawCP = zCP_UTF8) then
1147 if ZCompatibleCodePages(StringCP, zCP_UTF8 ) then
1148 Result := ZUnicodeToRaw(ZWideString(Src), RawCP) //Random success unknown String CP
1150 Result := ZConvertStringToRaw(Src, StringCP, RawCP)
1152 ZSetString(PAnsiChar(Src), Result);
1154 if (RawCP = zCP_UTF8) then
1155 ZSetString(PAnsiChar(Src), Result)
1157 Result := ZConvertStringToRaw(Src, zCP_UTF8, RawCP);
1163 function ZConvertUTF8ToString(const Src: UTF8String;
1164 const StringCP: Word): String;
1167 US: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1175 Result := UTF8ToString(PAnsiChar(Src));
1178 Result := ''; //Makes Compiler happy
1179 US := UTF8ToString(PAnsiChar(Src));
1180 S := ZUnicodeToRaw(US, StringCP);
1181 ZSetString(PAnsiChar(S), Result);
1186 function ZConvertStringToUTF8(const Src: String;
1187 const StringCP: Word): UTF8String;
1190 US: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1197 Result := UTF8String(Src);
1200 US := ZRawToUnicode(Src, StringCP);
1201 {$IFDEF WITH_RAWBYTESTRING}
1202 Result := UTF8String(US);
1204 Result := UTF8Encode(US);
1210 function ZConvertStringToUTF8WithAutoEncode(const Src: String;
1211 const StringCP: Word): UTF8String;
1213 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1217 Result := UTF8String(Src);
1220 If DetectUTF8Encoding(PAnsiChar(Src)) in [etUSASCII, etUTF8] then
1221 ZSetString(PAnsiChar(Src), Result)
1224 if ZCompatibleCodePages(StringCP, zCP_UTF8) then
1225 Tmp := ZWideString(Src)
1227 Tmp := ZRawToUnicode(PAnsiChar(Src), StringCP);
1228 {$IFDEF WITH_RAWBYTESTRING}
1229 Result := UTF8String(Tmp);
1231 Result := UTF8Encode(Tmp);
1237 function ZConvertStringToAnsi(const Src: String;
1238 const StringCP: Word): AnsiString;
1240 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1244 Result := AnsiString(Src);
1246 Tmp := ZRawToUnicode(PAnsiChar(Src), StringCP);
1247 Result := AnsiString(Tmp);
1254 function ZConvertStringToAnsiWithAutoEncode(const Src: String;
1255 const StringCP: Word): AnsiString;
1257 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1261 Result := AnsiString(Src);
1264 If DetectUTF8Encoding(PAnsiChar(Src)) in [etUSASCII, etAnsi] then
1265 ZSetString(PAnsiChar(Src), Result)
1268 Tmp := UTF8ToString(PAnsiChar(Src));
1269 Result := AnsiString(Tmp);
1277 function ZConvertAnsiToString(const Src: AnsiString;
1278 const StringCP: Word): String;
1280 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1284 Result := String(Src);
1287 Tmp := ZRawToUnicode(PAnsiChar(Src), ZDefaultSystemCodePage);
1288 ZSetString(PAnsiChar(ZUnicodeToRaw(Tmp, StringCP)), Result);
1292 function ZConvertUnicodeToString(const Src: ZWideString;
1293 const StringCP: Word): String;
1295 var Tmp: RawByteString;
1301 Tmp := ZUnicodeToRaw(Src, StringCP);
1302 ZSetString(PAnsiChar(Tmp), Result);
1309 function ZConvertUnicodeToString_CPUTF8(const Src: ZWideString;
1310 const StringCP: Word): String;
1312 var Tmp: RawByteString;
1318 Tmp := UTF8Encode(Src);
1319 ZSetString(PAnsiChar(Tmp), Result);
1326 function ZConvertStringToUnicode(const Src: String;
1327 const StringCP: Word): ZWideString;
1329 var Tmp: RawByteString;
1335 Tmp := ''; //Makes compiler Happy
1336 ZSetString(PAnsiChar(Src), Tmp);
1337 Result := ZRawToUnicode(Tmp, StringCP);
1344 function ZConvertString_CPUTF8ToUnicode(const Src: String;
1345 const StringCP: Word): ZWideString;
1347 var Tmp: RawByteString;
1353 Tmp := ''; //Makes Compiler happy
1354 ZSetString(PAnsiChar(Src), Tmp);
1355 Result := UTF8ToString(Tmp);
1363 function ZConvertStringToUnicodeWithAutoEncode(const Src: String;
1364 const StringCP: Word): ZWideString;
1369 If DetectUTF8Encoding(PAnsiChar(Src)) in [etUSASCII, etUTF8] then
1370 Result := UTF8ToString(PAnsiChar(Src))
1372 if ZCompatibleCodePages(StringCP, zCP_UTF8) then
1373 Result := ZWideString(Src)
1375 Result := ZRawToUnicode(PAnsiChar(Src), StringCP);
1383 function ZMoveAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
1385 ZSetString(PAnsiChar(Src), Result);
1388 function ZMoveRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
1390 ZSetString(PAnsiChar(Src), Result);
1393 function ZMoveAnsiToUTF8(const Src: AnsiString): UTF8String;
1395 ZSetString(PAnsiChar(Src), Result);
1398 function ZMoveUTF8ToAnsi(const Src: UTF8String): AnsiString;
1400 ZSetString(PAnsiChar(Src), Result);
1403 function ZMoveRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
1405 ZSetString(PAnsiChar(Src), Result);
1408 function ZMoveUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
1410 ZSetString(PAnsiChar(Src), Result);
1413 function ZMoveStringToAnsi(Const Src: String; const StringCP: Word): AnsiString;
1416 Result := AnsiString(Src);
1418 ZSetString(PAnsiChar(Src), Result);
1422 function ZMoveAnsiToString(const Src: AnsiString; const StringCP: Word): String;
1425 Result := String(Src);
1427 ZSetString(PAnsiChar(Src), Result);
1432 function ZMoveRawToString(const Src: RawByteString;
1433 const RawCP, StringCP: Word): String;
1436 Result := ZRawToUnicode(Src, RawCP);
1438 ZSetString(PAnsiChar(Src), Result);
1442 function ZMoveStringToRaw(const Src: String;
1443 const StringCP, RawCP: Word): RawByteString;
1446 Result := ZUnicodeToRaw(Src, RawCP);
1448 ZSetString(PAnsiChar(Src), Result);
1452 function ZMoveUTF8ToString(const Src: UTF8String; StringCP: Word): String;
1455 Result := String(Src);
1457 ZSetString(PAnsiChar(Src), Result);
1461 function ZMoveStringToUTF8(const Src: String; const StringCP: Word): UTF8String;
1464 Result := UTF8String(Src);
1466 ZSetString(PAnsiChar(Src), Result);
1475 GetValidatedTextStream the incoming Stream for his given Memory and
1476 returns a valid UTF8/Ansi StringStream
1477 @param Stream the Stream with the unknown format and data
1478 @return a valid utf8 encoded stringstram
1481 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
1482 ConSettings: PZConSettings): RawByteString;
1485 Bytes: TByteDynArray;
1491 SetLength(Bytes, Size +2);
1492 System.move(Buffer^, Pointer(Bytes)^, Size);
1493 case TestEncoding(Bytes, Size, ConSettings) of
1494 ceDefault: Result := PAnsiChar(Bytes);
1496 if ConSettings.ClientCodePage.Encoding = ceAnsi then
1497 if ( ConSettings.CTRL_CP = zCP_UTF8) or (ConSettings.CTRL_CP = ConSettings.ClientCodePage.CP) then //second test avoids encode the string twice
1498 Result := PAnsiChar(Bytes) //should be exact
1500 {$IFDEF WITH_LCONVENCODING}
1501 Result := Consettings.PlainConvertFunc(AnsiToUTF8(PAnsiChar(Bytes))) //no other possibility
1503 Result := ZUnicodeToRaw(ZRawToUnicode(PAnsiChar(Bytes), ConSettings.CTRL_CP), ConSettings.ClientCodePage.CP)
1505 else //Database expects UTF8
1506 if ( ConSettings.CTRL_CP = zCP_UTF8) then
1507 Result := AnsiToUTF8(String(PAnsiChar(Bytes))) //Can't localize the ansi CP
1509 {$IFDEF WITH_LCONVENCODING}
1510 Result := AnsiToUTF8(PAnsiChar(Bytes));
1512 Result := UTF8Encode(ZRawToUnicode(PAnsiChar(Bytes), ConSettings.CTRL_CP));
1515 if ConSettings.ClientCodePage.Encoding = ceAnsi then //ansi expected
1516 {$IFDEF WITH_LCONVENCODING}
1517 Result := Consettings.PlainConvertFunc(String(PAnsiChar(Bytes)))
1519 Result := ZUnicodeToRaw(UTF8ToString(PAnsiChar(Bytes)), ConSettings.ClientCodePage.CP)
1521 else //UTF8 Expected
1522 Result := PAnsiChar(Bytes);
1525 SetLength(US, Size div 2);
1526 System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1527 if ConSettings.ClientCodePage.Encoding = ceAnsi then
1528 {$IFDEF WITH_LCONVENCODING}
1529 Result := Consettings.PlainConvertFunc(UTF8Encode(US))
1531 Result := ZUnicodeToRaw(US, ConSettings.ClientCodePage.CP)
1534 Result := UTF8Encode(US);
1543 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
1544 ConSettings: PZConSettings; ToCP: Word): RawByteString;
1547 DB_CP := ConSettings.ClientCodePage.CP;
1548 ConSettings.ClientCodePage.CP := ToCP;
1549 Result := GetValidatedAnsiStringFromBuffer(Buffer, Size, ConSettings);
1550 ConSettings.ClientCodePage.CP := DB_CP;
1553 function GetValidatedAnsiString(const Ansi: RawByteString;
1554 ConSettings: PZConSettings; const FromDB: Boolean): RawByteString;
1557 if ( ConSettings.CTRL_CP = ConSettings.ClientCodePage.CP ) or not ConSettings.AutoEncode then
1560 {$IFDEF WITH_LCONVENCODING}
1561 Result := Consettings.DbcConvertFunc(Ansi)
1563 Result := ZUnicodeToRaw(ZRawToUnicode(Ansi, ConSettings^.ClientCodePage^.CP), ConSettings^.CTRL_CP)
1566 Result := ''; // not done yet and not needed. Makes the compiler happy
1569 function GetValidatedAnsiString(const Uni: ZWideString;
1570 ConSettings: PZConSettings; const FromDB: Boolean): RawByteString;
1573 {$IFDEF WITH_LCONVENCODING}
1574 Result := UTF8Encode(Uni)
1576 Result := ZUnicodeToRaw(Uni, ConSettings^.CTRL_CP)
1579 Result := ''; // not done yet and not needed. Makes the compiler happy
1582 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
1583 WasDecoded: Boolean; ConSettings: PZConSettings): RawByteString;
1589 SetLength(US, Size div 2);
1590 System.Move(Buffer^, PWideChar(US)^, Size);
1591 Result := ZUnicodeToRaw(US, ConSettings.ClientCodePage.CP);
1594 Result := GetValidatedAnsiStringFromBuffer(Buffer, Size, ConSettings);
1597 GetValidatedUnicodeStream the incoming Stream for his given Memory and
1598 returns a valid Unicode/Widestring Stream
1599 @param Stream the Stream with the unknown format and data
1600 @return a valid Unicode encoded stringstram
1602 function GetValidatedUnicodeStream(const Buffer: Pointer; Size: Cardinal;
1603 ConSettings: PZConSettings; FromDB: Boolean): TStream;
1607 Bytes: TByteDynArray;
1609 procedure SetFromWide;
1611 SetLength(US, Size div 2);
1612 System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1617 if Assigned(Buffer) and ( Size > 0 ) then
1619 SetLength(Bytes, Size +2);
1620 System.move(Buffer^, Pointer(Bytes)^, Size);
1621 if FromDB then //do not check encoding twice
1622 Result := GetValidatedUnicodeStream(PAnsiChar(Bytes), ConSettings, FromDB)
1624 case TestEncoding(Bytes, Size, ConSettings) of
1626 case Consettings.ClientCodePage.Encoding of
1627 ceUTF8: US := UTF8ToString(PAnsiChar(Bytes));
1629 {$IFDEF WITH_LCONVENCODING}
1630 US := ZWideString(PAnsiChar(Bytes)); //cast means random success
1632 if ( ConSettings.CTRL_CP = zCP_UTF8) then
1633 US := ZWideString(PAnsiChar(Bytes)) //random success
1635 US := ZRawToUnicode(PAnsiChar(Bytes), ConSettings.CTRL_CP);
1638 ceAnsi: //We've to start from the premisse we've got a Unicode string i there
1640 SetLength(US, Size div 2);
1641 System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1643 ceUTF8: US := UTF8ToString(PAnsiChar(Bytes));
1646 SetLength(US, Size div 2);
1647 System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1651 Len := Length(US)*2;
1652 if not Assigned(Result) and (Len > 0) then
1654 Result := TMemoryStream.Create;
1656 System.Move(PWideChar(US)^, TMemoryStream(Result).Memory^, Len);
1657 Result.Position := 0;
1659 SetLength(Bytes, 0);
1663 function GetValidatedUnicodeStream(const Ansi: RawByteString;
1664 ConSettings: PZConSettings; FromDB: Boolean): TStream;
1673 {$IFDEF WITH_LCONVENCODING}
1674 US := UTF8ToString(Consettings.DbcConvertFunc(Ansi))
1676 US := ZRawToUnicode(Ansi, ConSettings.ClientCodePage.CP)
1679 case DetectUTF8Encoding(Ansi) of
1680 etUSASCII, etUTF8: US := UTF8ToString(Ansi);
1682 {$IFDEF WITH_LCONVENCODING}
1683 US := ZWideString(Ansi); //random success
1685 if ( ConSettings.CTRL_CP = zCP_UTF8) then
1686 US := ZWideString(Ansi) //random success
1688 US := ZRawToUnicode(Ansi, ConSettings.CTRL_CP);
1692 Len := Length(US)*2;
1695 Result := TMemoryStream.Create;
1697 System.Move(PWideChar(US)^, TMemoryStream(Result).Memory^, Len);
1698 Result.Position := 0;
1703 procedure SetConvertFunctions(ConSettings: PZConSettings);
1705 ConSettings^.ConvFuncs.ZAnsiToUTF8 := nil;
1706 ConSettings^.ConvFuncs.ZUTF8ToAnsi:= nil;
1707 ConSettings^.ConvFuncs.ZUTF8ToString:= nil;
1708 ConSettings^.ConvFuncs.ZStringToUTF8:= nil;
1709 ConSettings^.ConvFuncs.ZAnsiToRaw:= nil;
1710 ConSettings^.ConvFuncs.ZRawToAnsi:= nil;
1711 ConSettings^.ConvFuncs.ZRawToUTF8:= nil;
1712 ConSettings^.ConvFuncs.ZUTF8ToRaw:= nil;
1713 ConSettings^.ConvFuncs.ZStringToRaw:= nil;
1714 ConSettings^.ConvFuncs.ZAnsiToString := nil;
1715 ConSettings^.ConvFuncs.ZStringToAnsi := nil;
1716 ConSettings^.ConvFuncs.ZRawToString:= nil;
1717 ConSettings^.ConvFuncs.ZUnicodeToRaw:= nil;
1718 ConSettings^.ConvFuncs.ZRawToUnicode:= nil;
1719 ConSettings^.ConvFuncs.ZUnicodeToString:= nil;
1720 ConSettings^.ConvFuncs.ZStringToUnicode:= nil;
1722 //Let's start with the AnsiTo/From types..
1723 // Ansi to/from UTF8String
1724 if ZCompatibleCodePages(ZDefaultSystemCodePage, zCP_UTF8) then
1726 ConSettings^.ConvFuncs.ZAnsiToUTF8 := @ZMoveAnsiToUTF8;
1727 ConSettings^.ConvFuncs.ZUTF8ToAnsi := @ZMoveUTF8ToAnsi;
1731 ConSettings^.ConvFuncs.ZAnsiToUTF8 := @ZConvertAnsiToUTF8;
1732 ConSettings^.ConvFuncs.ZUTF8ToAnsi := @ZConvertUTF8ToAnsi;
1735 // Ansi to/from String
1736 if ZCompatibleCodePages(ZDefaultSystemCodePage, ConSettings^.CTRL_CP) then
1738 ConSettings^.ConvFuncs.ZAnsiToString := @ZMoveAnsiToString;
1739 if ConSettings^.AutoEncode then
1740 ConSettings^.ConvFuncs.ZStringToAnsi := @ZConvertStringToAnsiWithAutoEncode
1742 ConSettings^.ConvFuncs.ZStringToAnsi := @ZMoveStringToAnsi;
1746 ConSettings^.ConvFuncs.ZAnsiToString := @ZConvertAnsiToString;
1747 if ConSettings^.AutoEncode then
1748 ConSettings^.ConvFuncs.ZStringToAnsi := @ZConvertStringToAnsiWithAutoEncode
1750 ConSettings^.ConvFuncs.ZStringToAnsi := @ZConvertStringToAnsi;
1753 if ConSettings^.ClientCodePage^.IsStringFieldCPConsistent then
1756 if ZCompatibleCodePages(ConSettings^.ClientCodePage^.CP, zCP_UTF8) then
1758 ConSettings^.ConvFuncs.ZRawToUTF8 := @ZMoveRawToUTF8;
1759 ConSettings^.ConvFuncs.ZUTF8ToRaw := @ZMoveUTF8ToRaw;
1763 ConSettings^.ConvFuncs.ZRawToUTF8 := @ZConvertRawToUTF8;
1764 ConSettings^.ConvFuncs.ZUTF8ToRaw := @ZConvertUTF8ToRaw;
1768 if ZCompatibleCodePages(ConSettings^.ClientCodePage^.CP, ZDefaultSystemCodePage) then
1770 ConSettings^.ConvFuncs.ZAnsiToRaw := @ZMoveAnsiToRaw;
1771 ConSettings^.ConvFuncs.ZRawToAnsi := @ZMoveRawToAnsi;
1775 ConSettings^.ConvFuncs.ZAnsiToRaw := @ZConvertAnsiToRaw;
1776 ConSettings^.ConvFuncs.ZRawToAnsi := @ZConvertRawToAnsi;
1779 // raw to/from unicode
1780 if ConSettings^.ClientCodePage^.CP = zCP_NONE then
1782 if ConSettings^.AutoEncode then
1783 ConSettings^.ConvFuncs.ZRawToUnicode := @ZUnknownRawToUnicodeWithAutoEncode
1785 ConSettings^.ConvFuncs.ZRawToUnicode := @ZUnknownRawToUnicode;
1786 ConSettings^.ConvFuncs.ZUnicodeToRaw := @ZUnicodeToUnknownRaw;
1790 ConSettings^.ConvFuncs.ZRawToUnicode := @ZRawToUnicode;
1791 ConSettings^.ConvFuncs.ZUnicodeToRaw := @ZUnicodeToRaw;
1794 //last but not least the String to/from converters
1795 //string represents the DataSet/IZResultSet Strings
1796 if ZCompatibleCodePages(ConSettings^.CTRL_CP, zCP_UTF8) then
1798 ConSettings^.ConvFuncs.ZUTF8ToString := @ZMoveUTF8ToString;
1799 if ConSettings^.AutoEncode then
1800 ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8WithAutoEncode
1802 ConSettings^.ConvFuncs.ZStringToUTF8 := @ZMoveStringToUTF8;
1806 ConSettings^.ConvFuncs.ZUTF8ToString := @ZConvertUTF8ToString;
1807 if ConSettings^.AutoEncode then
1808 ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8WithAutoEncode
1810 ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8
1814 Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRaw;
1815 Consettings^.ConvFuncs.ZRawToString := @ZConvertRawToString;
1817 ConSettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString;
1818 Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicode;
1820 {String To/From Raw}
1821 if ZCompatibleCodePages(ConSettings^.ClientCodePage^.CP, ConSettings^.CTRL_CP) then
1823 Consettings^.ConvFuncs.ZRawToString := @ZMoveRawToString;
1824 if ConSettings^.AutoEncode then
1825 Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRawWithAutoEncode
1827 Consettings^.ConvFuncs.ZStringToRaw := @ZMoveStringToRaw;
1830 if ConSettings^.AutoEncode then
1832 Consettings^.ConvFuncs.ZRawToString := @ZConvertRawToString;
1833 Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRawWithAutoEncode;
1837 Consettings^.ConvFuncs.ZStringToRaw := @ZMoveStringToRaw;
1838 Consettings^.ConvFuncs.ZRawToString := @ZMoveRawToString;
1841 {String To/From Unicode}
1842 if ConSettings^.CTRL_CP = zCP_UTF8 then
1843 Consettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString_CPUTF8
1845 Consettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString;
1847 if ConSettings^.AutoEncode then
1848 Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicodeWithAutoEncode
1850 if ConSettings^.CTRL_CP = zCP_UTF8 then
1851 Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertString_CPUTF8ToUnicode
1853 Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicode;
1856 else //autoencode strings is allways true
1858 ConSettings^.ConvFuncs.ZUTF8ToString := @ZConvertUTF8ToString;
1859 ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8WithAutoEncode;
1860 ConSettings^.ConvFuncs.ZAnsiToRaw := @ZConvertAnsiToRaw;
1861 ConSettings^.ConvFuncs.ZRawToAnsi := @ZConvertRawToAnsi;
1862 ConSettings^.ConvFuncs.ZRawToUTF8 := @ZConvertRawToUTF8;
1863 ConSettings^.ConvFuncs.ZUTF8ToRaw := @ZConvertUTF8ToRaw;
1864 Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRawWithAutoEncode;
1865 Consettings^.ConvFuncs.ZRawToString := @ZConvertRawToString;
1866 Consettings^.ConvFuncs.ZUnicodeToRaw := @ZUnicodeToRaw;
1867 Consettings^.ConvFuncs.ZRawToUnicode := @ZRawToUnicode;
1868 ConSettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString;
1869 Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicodeWithAutoEncode;