zeoslib  UNKNOWN
 All Files
ZEncoding.pas
Go to the documentation of this file.
1 {*********************************************************}
2 { }
3 { Zeos Database Objects }
4 { PostgreSQL Database Connectivity Classes }
5 { }
6 { Originally written by Sergey Seroukhov }
7 { }
8 {*********************************************************}
9 
10 {@********************************************************}
11 { Copyright (c) 1999-2012 Zeos Development Group }
12 { }
13 { License Agreement: }
14 { }
15 { This library is distributed in the hope that it will be }
16 { useful, but WITHOUT ANY WARRANTY; without even the }
17 { implied warranty of MERCHANTABILITY or FITNESS FOR }
18 { A PARTICULAR PURPOSE. See the GNU Lesser General }
19 { Public License for more details. }
20 { }
21 { The source code of the ZEOS Libraries and packages are }
22 { distributed under the Library GNU General Public }
23 { License (see the file COPYING / COPYING.ZEOS) }
24 { with the following modification: }
25 { As a special exception, the copyright holders of this }
26 { library give you permission to link this library with }
27 { independent modules to produce an executable, }
28 { regardless of the license terms of these independent }
29 { modules, and to copy and distribute the resulting }
30 { executable under terms of your choice, provided that }
31 { you also meet, for each linked independent module, }
32 { the terms and conditions of the license of that module. }
33 { An independent module is a module which is not derived }
34 { from or based on this library. If you modify this }
35 { library, you may extend this exception to your version }
36 { of the library, but you are not obligated to do so. }
37 { If you do not wish to do so, delete this exception }
38 { statement from your version. }
39 { }
40 { }
41 { The project web site is located on: }
42 { http://zeos.firmos.at (FORUM) }
43 { http://sourceforge.net/p/zeoslib/tickets/ (BUGTRACKER)}
44 { svn://svn.code.sf.net/p/zeoslib/code-0/trunk (SVN) }
45 { }
46 { http://www.sourceforge.net/projects/zeoslib. }
47 { }
48 { }
49 { Zeos Development Group. }
50 {********************************************************@}
51 
52 unit ZEncoding;
53 
54 interface
55 
56 {$I ZCore.inc}
57 
58 uses
59  Classes, {$IFDEF MSEgui}mclasses,{$ENDIF} Math,
60  {$IFDEF WITH_LCONVENCODING}
61  {$MACRO ON}
62  LCLVersion, LConvEncoding,
63  {$ENDIF}
64  {$IF defined(MSWINDOWS) and not defined(WITH_UNICODEFROMLOCALECHARS)}
65  Windows,
66  {$IFEND}
67  ZCompatibility;
68 
69 const
70  //zCP_ACP = 0; {ASCII US}
71  zCP_EBC037 = 37; {IBM EBCDIC US-Canada}
72  zCP_EBC273 = 273; {EBCDIC Code Page 273/1 8-bit Austrian German}
73  zCP_EBC277 = 277; {EBCDIC Code Page 277/1 8-bit Danish}
74  zCP_EBC278 = 278; {EBCDIC Code Page 278/1 8-bit Swedish}
75  zCP_EBC280 = 280; {EBCDIC Code Page 280/1 8-bit Italian}
76  zCP_EBC284 = 284; {EBCDIC Code Page 284 8-bit Latin American/Spanish}
77 
78  zCP_DOS437 = 437; {IBM437/MS-DOS odepage 437 (US)}
79  zCP_DOS500 = 500; {IBM EBCDIC International}
80  zCP_DOS708 = 708; {Arabic (ASMO 708)}
81  zCP_DOS709 = 709; {Arabic (ASMO-449+, BCON V4)}
82  zCP_DOS710 = 710; {Arabic - Transparent Arabic}
83  zCP_DOS720 = 720; {Arabic (Transparent ASMO); Arabic (DOS)}
84  zCP_DOS737 = 737; {OEM Greek (formerly 437G); Greek (DOS)}
85  zCP_DOS775 = 775; {MS-DOS Codepage 775 (BaltRim)}
86  zCP_DOS850 = 850; {MS-DOS Codepage 850 (Multilingual Latin 1)}
87  zCP_DOS851 = 851; {MS-DOS Codepage 851 (Greece) - obsolete}
88  zCP_DOS852 = 852; {ibm852 852 east european(DOS)}
89  zCP_DOS853 = 853; {MS-DOS Codepage 853 (Multilingual Latin 3)}
90  zCP_DOS855 = 855; {MS-DOS Codepage 855 (Russia) - obsolete}
91  zCP_DOS856 = 856;
92  zCP_DOS857 = 857; {MS-DOS Codepage 857 (Multilingual Latin 5)}
93  zCP_DOS858 = 858; {MS-DOS Codepage 858 Latin I + Euro symbol}
94  zCP_DOS895 = 895; {MS-DOS Codepage 895 (Kamenicky CS)}
95  zCP_DOS860 = 860; {MS-DOS Codepage 860 (Portugal)}
96  zCP_DOS861 = 861; {MS-DOS Codepage 861 (Iceland)}
97  zCP_DOS862 = 862; {MS-DOS Codepage 862 (Israel)}
98  zCP_DOS863 = 863; {MS-DOS Codepage 863 (Canada (French))}
99  zCP_DOS864 = 864; {MS-DOS Codepage 864 (Arabic) without BOX DRAWINGS below 20}
100  zCP_DOS865 = 865; {MS-DOS Codepage 865 (Norway)}
101  zCP_DOS866 = 866; {ibm866 866 Cyrl (DOS)}
102  zCP_DOS869 = 869; {MS-DOS Codepage 869 (Greece)}
103  zCP_DOS870 = 870; {IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2}
104  zCP_DOS874 = 874; {ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows)}
105  zCP_EBC875 = 875; {EBCDIC Codepage 875 (Greek)}
106  zCP_MSWIN921 = 921;
107  zCP_MSWIN923 = 923;
108  zCP_EBC924 = 924; {Latin 9 EBCDIC 924}
109  zCP_SHIFTJS = 932; {ANSI/OEM Japanese; Japanese (Shift-JIS)}
110  zCP_GB2312 = 936; {ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)}
111  zCP_EUCKR = 949; {ANSI/OEM Korean (Unified Hangul Code)}
112  zCP_Big5 = 950; {ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)}
113  zCP_IBM1026 = 1026; {EBCDIC Code Page 1026 8-bit Turkish}
114  zCP_IBM01047 = 1047; {IBM EBCDIC Latin 1/Open System}
115  zCP_IBM01140 = 1140; {IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)}
116  zCP_IBM01141 = 1141; {IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)}
117  zCP_IBM01142 = 1142; {IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)}
118  zCP_IBM01143 = 1143; {IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)}
119  zCP_IBM01144 = 1144; {IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)}
120  zCP_IBM01145 = 1145; {IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)}
121  zCP_IBM01146 = 1146; {IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)}
122  zCP_IBM01147 = 1147; {IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)}
123  zCP_IBM01148 = 1148; {IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)}
124  zCP_IBM01149 = 1149; {IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)}
125 
126  zCP_UTF16 = 1200; {utf-16; Indicates the Unicode character set, Windows code page 1200}
127  zCP_UTF16BE = 1201; {Unicode UTF-16, big endian byte order; available only to managed applications}
128  zCP_WIN1250 = 1250; {Microsoft Windows Codepage 1250 (East European)}
129  zCP_WIN1251 = 1251; {Microsoft Windows Codepage 1251 (Cyrl)}
130  zCP_WIN1252 = 1252; {Microsoft Windows Codepage 1252 (ANSI), USASCCI}
131  zCP_WIN1253 = 1253; {Microsoft Windows Codepage 1253 (Greek)}
132  zCP_WIN1254 = 1254; {Microsoft Windows Codepage 1254 (Turk)}
133  zCP_WIN1255 = 1255; {Microsoft Windows Codepage 1255 (Hebrew)}
134  cCP_WIN1256 = 1256; {Microsoft Windows Codepage 1256 (Arab)}
135  zCP_WIN1257 = 1257; {Microsoft Windows Codepage 1257 (BaltRim)}
136  zCP_WIN1258 = 1258; {Microsoft Windows Codepage 1258 (Viet), TCVN-5712}
137  ZCP_JOHAB = 1361; {Korean (Johab)}
138  zCP_KOREAN = 2022; {iso-2022-kr 50225 Korean (ISO)}
139 
140  zCP_macintosh = 10000; {MAC Roman; Western European (Mac)}
141  zCP_x_mac_japanese = 10001; {Japanese (Mac)}
142  zCP_x_mac_chinesetrad = 10002; {MAC Traditional Chinese (Big5); Chinese Traditional (Mac)}
143  zCP_x_mac_korean = 10003; {Korean (Mac)}
144  zCP_x_mac_arabic = 10004; {Arabic (Mac)}
145  zCP_x_mac_hebrew = 10005; {Hebrew (Mac)}
146  zCP_x_mac_greek = 10006; {Greek (Mac)}
147  zCP_x_mac_cyrillic = 10007; {Cyrillic (Mac)}
148  zCP_x_mac_chinesesimp = 10008; {MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)}
149  zCP_x_mac_romanian = 10010; {Romanian (Mac)}
150  zCP_x_mac_ukrainian = 10017; {Ukrainian (Mac)}
151  zCP_x_mac_thai = 10021; {Thai (Mac)}
152  zCP_x_mac_ce = 10029; {MAC Latin 2; Central European (Mac)}
153  zCP_x_mac_icelandic = 10079; {Icelandic (Mac)}
154  zCP_x_mac_turkish = 10081; {Turkish (Mac)}
155  zCP_x_mac_croatian = 10082; {Croatian (Mac)}
156  zCP_utf32 = 12000; {Unicode UTF-32, little endian byte order; available only to managed applications}
157  zCP_utf32BE = 12001; {Unicode UTF-32, big endian byte order; available only to managed applications}
158 
159  zCP_x_Chinese_CNS = 20000; {CNS Taiwan; Chinese Traditional (CNS)}
160  zCP_x_cp20001 = 20001; {TCA Taiwan}
161  zCP_x_Chinese_Eten = 20002; {Eten Taiwan; Chinese Traditional (Eten)}
162  zCP_x_cp20003 = 20003; {IBM5550 Taiwan}
163  zCP_x_cp20004 = 20004; {TeleText Taiwan}
164  zCP_x_cp20005 = 20005; {Wang Taiwan}
165  zCP_x_IA5 = 20105; {IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)}
166  zCP_x_IA5_German = 20106; {IA5 German (7-bit)}
167  zCP_x_IA5_Swedish = 20107; {IA5 Swedish (7-bit)}
168  zCP_x_IA5_Norwegian = 20108; {IA5 Norwegian (7-bit)}
169  zCP_us_ascii = 20127; {US-ASCII (7-bit)}
170  zCP_x_cp20261 = 20261; {T.61}
171  zCP_x_cp20269 = 20269; {ISO 6937 Non-Spacing Accent}
172  zCP_IBM273 = 20273; {IBM EBCDIC Germany}
173  zCP_IBM277 = 20277; {IBM EBCDIC Denmark-Norway}
174  zCP_IBM278 = 20278; {IBM EBCDIC Finland-Sweden}
175  zCP_IBM280 = 20280; {IBM EBCDIC Italy}
176  zCP_IBM284 = 20284; {IBM EBCDIC Latin America-Spain}
177  zCP_IBM285 = 20285; {IBM EBCDIC United Kingdom}
178  zCP_IBM290 = 20290; {IBM EBCDIC Japanese Katakana Extended}
179  zCP_IBM297 = 20297; {IBM EBCDIC France}
180  zCP_IBM420 = 20420; {IBM EBCDIC Arabic}
181  zCP_IBM423 = 20423; {IBM EBCDIC Greek}
182  zCP_IBM424 = 20424; {IBM EBCDIC Hebrew}
183  zCP_x_EBCDIC_KoreanExtended = 20833; {IBM EBCDIC Korean Extended}
184  zCP_IBM_Thai = 20838; {IBM EBCDIC Thai / TIS-620}
185  zCP_KOI8R = 20866; {cskoi8r 20866 Cyrillic (KOI8-R)}
186  zCP_IBM871 = 20871; {IBM EBCDIC Icelandic}
187  zCP_IBM880 = 20880; {IBM EBCDIC Cyrillic Russian}
188  zCP_IBM905 = 20905; {IBM EBCDIC Turkish}
189  zCP_IBM00924 = 20924; {IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)}
190  zCP_EUC_JP = 20932; {Japanese (JIS 0208-1990 and 0121-1990)}
191  zCP_x_cp20936 = 20936; {Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)}
192  zCP_x_cp20949 = 20949; {Korean Wansung}
193  zCP_cp1025 = 21025; {IBM EBCDIC Cyrillic Serbian-Bulgarian}
194  //21027 (deprecated)}}
195  zCP_KOI8U = 21866; {KOI8-U is an 8-bit character encoding, designed to cover Ukrainian, which uses the Cyrillic alphabet.}
196  zCP_L1_ISO_8859_1 = 28591; {8-bit single-byte coded graphic character sets Part 1: Latin alphabet No. 1, is part of the ISO/IEC 8859 series of ASCII-based standard character encodings}
197  zCP_L2_ISO_8859_2 = 28592; {latin2 east european (ISO), 8-bit single-byte coded graphic character sets - Part 2: Latin alphabet No. 2, is part of the ISO/IEC 8859 series of ASCII-based standard character encodings}
198  zCP_L3_ISO_8859_3 = 28593; {ISO 8859-3 Latin 3}
199  zCP_L4_ISO_8859_4 = 28594; {ISO 8859-4 Baltic}
200  zCP_L5_ISO_8859_5 = 28595; {8bit single-byte coded graphic character sets - Part 5: Latin/Cyrillic alphabet, is part of the ISO/IEC 8859 series of ASCII-based standard character encodings}
201  zCP_L6_ISO_8859_6 = 28596; {ISO 8859-6 Arabic}
202  zCP_L7_ISO_8859_7 = 28597; {ISO 8859-7 Greek}
203  zCP_L8_ISO_8859_8 = 28598; {ISO 8859-8 Hebrew; Hebrew (ISO-Visual)}
204  zCP_L5_ISO_8859_9 = 28599; {ISO 8859-9 Turkish}
205  zCP_L6_ISO_8859_10 = 28600; { ISO 8859-10, ECMA 144 Nordic }
206  zCP_L7_ISO_8859_13 = 28603; {ISO 8859-13 Estonian}
207  zCP_L8_ISO_8859_14 = 28604; { ISO 8859-14 Celtic }
208  zCP_L9_ISO_8859_15 = 28605; {ISO 8859-15 Latin 9}
209  zCP_L10_ISO_8859_16 = 28606; { ISO 8859-16, ASRO SR 14111 Romanian }
210  zCP_x_Europa = 29001; {Europa 3}
211  zCP_iso_8859_8_i = 38598; {ISO 8859-8 Hebrew; Hebrew (ISO-Logical)}
212 
213  zCP_iso_2022_jp = 50220; {ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)}
214  zCP_csISO2022JP = 50221; {ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)}
215  zCP_x_iso_2022_jp = 50222; {ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)}
216  zCP_iso_2022_kr = 50225; {ISO 2022 Korean}
217  zCP_x_cp50227 = 50227; {ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)}
218  zCP_EUC_TC_ISO220 = 50229; {ISO 2022 Traditional Chinese}
219  zCP_EBCDIC_euc_jpe = 50930; {EBCDIC Japanese (Katakana) Extended}
220  zCP_EBCDIC_euc_jp = 50931; {EBCDIC US-Canada and Japanese}
221  zCP_euc_jp_auto = 50932; {EUC Japanese, Indicates Japanese auto-detect (50932). }
222  zCP_EBCDIC_euc_kr = 50933; {EBCDIC Korean Extended and Korean}
223  zCP_EBCDIC_euc_cn = 50935; {EBCDIC Simplified Chinese Extended and Simplified Chinese}
224  zCP_EBCDIC_euc_sc = 50936; {EBCDIC Simplified Chinese}
225  zCP_EBCDIC_USC_TC = 50937; {EBCDIC US-Canada and Traditional Chinese}
226  zCP_euc_cn_auto = 50939; {EBCDIC Japanese (Latin) Extended and Japanese}
227  zCP_euc_kr_auto = 50949; {EUC Korean, Indicates Korean auto-detect (50949).}
228  zCP_euc_JP_win = 51932; {EUC Japanese}
229  zCP_EUC_CN = 51936; {EUC Simplified Chinese; Chinese Simplified (EUC)}
230  zCP_euc_kr = 51949; {EUC Korean}
231  zCP_euc_tc = 51950; {EUC Traditional Chinese}
232  zCP_hz_gb_2312 = 52936; {HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)}
233  zCP_GB18030 = 54936; {Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)}
234  zCP_x_iscii_de = 57002; {ISCII Devanagari}
235  zCP_x_iscii_be = 57003; {ISCII Bengali}
236  zCP_x_iscii_ta = 57004; {ISCII Tamil}
237  zCP_x_iscii_te = 57005; {ISCII Telugu}
238  zCP_x_iscii_as = 57006; {ISCII Assamese}
239  zCP_x_iscii_or = 57007; {ISCII Oriya}
240  zCP_x_iscii_ka = 57008; {ISCII Kannada}
241  zCP_x_iscii_ma = 57009; {ISCII Malayalam}
242  zCP_x_iscii_gu = 57010; {ISCII Gujarati}
243  zCP_x_iscii_pa = 57011; {ISCII Punjabi}
244  zCP_UTF8 = 65001;
245  zCP_UTF7 = 65000;
246  zCP_NONE = $ffff;
247 
248 {$IFDEF WITH_LCONVENCODING}
249 const
250  ZLConvCodepages: array[0..16] of Word = (
251  28591, //ISO_8859_1
252  28592, //ISO_8859_2
253  1250, //WIN1250
254  1251, //WIN1251
255  1252, //WIN1252
256  1253, //WIN1253
257  1254, //WIN1254
258  1255, //WIN1255
259  1256, //WIN1256
260  1257, //WIN1257
261  1258, //WIN1258
262  437, //CP437
263  850, //CP850
264  852, //CP852
265  866, //CP866
266  874, //CP874
267  20866 //KOI8 (Russian)
268  );
269 
270 function IsLConvEncodingCodePage(const CP: Word): Boolean;
271 procedure SetConvertFunctions(const CTRL_CP, DB_CP: Word;
272  out PlainConvert, DbcConvert: TConvertEncodingFunction); overload;
273 {$ELSE}
274 
275 function StringToAnsiEx(const s: String; const {$IFNDEF UNICODE}FromCP,{$ENDIF} ToCP: Word): RawByteString; {$IFDEF WITH_INLINE}inline;{$ENDIF}
276 function AnsiToStringEx(const s: RawByteString; const FromCP{$IFNDEF UNICODE}, ToCP{$ENDIF}: Word): String; {$IFDEF WITH_INLINE}inline;{$ENDIF}
277 {$ENDIF}
278 
279 function ZRawToUnicode(const S: RawByteString; const CP: Word): ZWideString;
280 function ZUnicodeToRaw(const US: ZWideString; CP: Word): RawByteString;
281 
282 {converter functions for the String-types}
283 function ZConvertAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
284 function ZConvertRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
285 function ZConvertAnsiToUTF8(const Src: AnsiString): UTF8String;
286 function ZConvertUTF8ToAnsi(const Src: UTF8String): AnsiString;
287 function ZConvertRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
288 function ZConvertUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
289 function ZConvertRawToString(const Src: RawByteString; const RawCP, StringCP: Word): String;
290 function ZConvertStringToRaw(const Src: String; const StringCP, RawCP: Word): RawByteString;
291 function ZConvertStringToRawWithAutoEncode(const Src: String; const StringCP, RawCP: Word): RawByteString;
292 function ZConvertUTF8ToString(const Src: UTF8String; const StringCP: Word): String;
293 function ZConvertStringToUTF8(const Src: String; const StringCP: Word): UTF8String;
294 function ZConvertStringToUTF8WithAutoEncode(const Src: String; const StringCP: Word): UTF8String;
295 function ZConvertStringToAnsi(const Src: String; const StringCP: Word): AnsiString;
296 function ZConvertStringToAnsiWithAutoEncode(const Src: String; const StringCP: Word): AnsiString;
297 function ZConvertAnsiToString(const Src: AnsiString; const StringCP: Word): String;
298 function ZConvertUnicodeToString(const Src: ZWideString; const StringCP: Word): String;
299 function ZConvertUnicodeToString_CPUTF8(const Src: ZWideString; const StringCP: Word): String;
300 function ZConvertStringToUnicode(const Src: String; const StringCP: Word): ZWideString;
301 function ZConvertString_CPUTF8ToUnicode(const Src: String; const StringCP: Word): ZWideString;
302 function ZConvertStringToUnicodeWithAutoEncode(const Src: String; const StringCP: Word): ZWideString;
303 {move functions for the String types}
304 function ZMoveAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
305 function ZMoveRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
306 function ZMoveAnsiToUTF8(const Src: AnsiString): UTF8String;
307 function ZMoveUTF8ToAnsi(const Src: UTF8String): AnsiString;
308 function ZMoveRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
309 function ZMoveUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
310 function ZMoveStringToAnsi(Const Src: String; const StringCP: Word): AnsiString;
311 function ZMoveAnsiToString(const Src: AnsiString; const StringCP: Word): String;
312 function ZMoveRawToString(const Src: RawByteString; const RawCP, StringCP: Word): String;
313 function ZMoveStringToRaw(const Src: String; const StringCP, RawCP: Word): RawByteString;
314 function ZMoveUTF8ToString(const Src: UTF8String; StringCP: Word): String;
315 function ZMoveStringToUTF8(const Src: String; const StringCP: Word): UTF8String;
316 
317 function ZUnknownRawToUnicode(const S: RawByteString; const CP: Word): ZWideString;
318 function ZUnknownRawToUnicodeWithAutoEncode(const S: RawByteString;
319  const CP: Word): ZWideString;
320 function ZUnicodeToUnknownRaw(const US: ZWideString; CP: Word): RawByteString;
321 
322 {**
323  Get the current system codepage of AnsiString
324  @return current system codepage of AnsiString
325 }
326 function ZDefaultSystemCodePage: Word;
327 
328 {**
329  Is the codepage equal or compatible?
330  @param CP1 word the first codepage to compare
331  @param CP2 word the second codepage to compare
332  @returns Boolean True if codepage is equal or compatible
333 }
334 function ZCompatibleCodePages(const CP1, CP2: Word): Boolean; {$IFDEF WITH_INLINE}inline;{$ENDIF}
335 
336 {**
337  Set the string-types conversion funtion in relation to the Connection-Settings.
338  The Results should be as optimal as possible to speed up the behavior
339  @param ConSettings a Pointer to the ConnectionSetting
340 }
341 procedure SetConvertFunctions(ConSettings: PZConSettings); {$IFDEF WITH_LCONVENCODING}overload;{$ENDIF}
342 
343 {**
344  GetValidatedTextStream the incoming Stream for his given Memory and
345  returns a valid UTF8/Ansi StringStream
346  @param Stream the Stream with the unknown format and data
347  @return a valid utf8 encoded stringstram
348 }
349 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
350  ConSettings: PZConSettings): RawByteString; overload;
351 
352 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
353  ConSettings: PZConSettings; ToCP: Word): RawByteString; overload;
354 
355 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
356  WasDecoded: Boolean; ConSettings: PZConSettings): RawByteString; overload;
357 
358 function GetValidatedAnsiString(const Ansi: RawByteString;
359  ConSettings: PZConSettings; const FromDB: Boolean): RawByteString; overload;
360 
361 function GetValidatedAnsiString(const Uni: ZWideString;
362  ConSettings: PZConSettings; const FromDB: Boolean): RawByteString; overload;
363 
364 {**
365  GetValidatedUnicodeStream the incoming Stream for his given Memory and
366  returns a valid Unicode/Widestring Stream
367  @param Buffer the pointer to the Data
368  @return a valid Unicode encoded stringstram
369 }
370 function GetValidatedUnicodeStream(const Buffer: Pointer; Size: Cardinal;
371  ConSettings: PZConSettings; FromDB: Boolean): TStream; overload;
372 
373 function GetValidatedUnicodeStream(const Ansi: RawByteString;
374  ConSettings: PZConSettings; FromDB: Boolean): TStream; overload;
375 
376 implementation
377 
378 uses SysUtils, Types {$IFDEF WITH_WIDESTRUTILS},WideStrUtils{$ENDIF},
379  ZSysUtils{$IFDEF WITH_STRLEN_DEPRECATED}, AnsiStrings{$ENDIF};
380 
381 {$IFDEF FPC}
382  {$HINTS OFF}
383 {$ENDIF}
384 function ZUnknownRawToUnicode(const S: RawByteString;
385  const CP: Word): ZWideString;
386 begin
387  Result := ZWideString(S);
388 end;
389 
390 function ZUnknownRawToUnicodeWithAutoEncode(const S: RawByteString;
391  const CP: Word): ZWideString;
392 begin
393  case DetectUTF8Encoding(S) of
394  etUSASCII, etUTF8: Result := UTF8ToString(S);
395  else
396  Result := ZWideString(S);
397  end;
398 end;
399 
400 function ZUnicodeToUnknownRaw(const US: ZWideString; CP: Word):
401  RawByteString;
402 begin
403  Result := RawByteString(US);
404 end;
405 {$IFDEF FPC}
406  {$HINTS ON}
407 {$ENDIF}
408 
409 function ZRawToUnicode(const S: RawByteString; const CP: Word): ZWideString;
410 {$IFDEF WITH_LCONVENCODING}
411 begin
412  case CP of
413  28591: //ISO_8859_1
414  Result := UTF8Decode(ISO_8859_1ToUTF8(PAnsiChar(S)));
415  28592: //ISO_8859_2
416  Result := UTF8Decode(ISO_8859_2ToUTF8(PAnsiChar(S)));
417  1250: //WIN1250
418  Result := UTF8Decode(CP1250ToUTF8(PAnsiChar(S)));
419  1251: //WIN1251
420  Result := UTF8Decode(CP1251ToUTF8(PAnsiChar(S)));
421  1252: //WIN1252
422  Result := UTF8Decode(CP1252ToUTF8(PAnsiChar(S)));
423  1253: //WIN1253
424  Result := UTF8Decode(CP1253ToUTF8(PAnsiChar(S)));
425  1254: //WIN1254
426  Result := UTF8Decode(CP1254ToUTF8(PAnsiChar(S)));
427  1255: //WIN1255
428  Result := UTF8Decode(CP1255ToUTF8(PAnsiChar(S)));
429  1256: //WIN1256
430  Result := UTF8Decode(CP1256ToUTF8(PAnsiChar(S)));
431  1257: //WIN1257
432  Result := UTF8Decode(CP1257ToUTF8(PAnsiChar(S)));
433  1258: //WIN1258
434  Result := UTF8Decode(CP1258ToUTF8(PAnsiChar(S)));
435  437: //CP437
436  Result := UTF8Decode(CP437ToUTF8(PAnsiChar(S)));
437  850: //CP850
438  Result := UTF8Decode(CP850ToUTF8(PAnsiChar(S)));
439  {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
440  852: //CP852
441  Result := UTF8Decode(CP852ToUTF8(PAnsiChar(S)));
442  {$ENDIF}
443  866: //CP866
444  Result := UTF8Decode(CP866ToUTF8(PAnsiChar(S)));
445  874: //CP874
446  Result := UTF8Decode(CP874ToUTF8(PAnsiChar(S)));
447  20866: //KOI8 (Russian)
448  Result := UTF8Decode(KOI8ToUTF8(PAnsiChar(S)));
449  65001: //UTF8
450  Result := UTF8Decode(PAnsiChar(s));
451  else
452  Result := ZWideString(S); //random success!
453  end;
454 end;
455 {$ELSE}
456 {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
457 var
458  {$IFDEF WITH_UNICODEFROMLOCALECHARS}
459  wlen, ulen: Integer;
460  {$ELSE}
461  l: Integer;
462  US: WideString;
463  {$ENDIF}
464 {$IFEND}
465 begin
466  Result := '';
467  if CP = zCP_NONE then
468  Result := ZUnknownRawToUnicode(s, CP)
469  else
470  {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
471  begin
472  {$IFDEF WITH_UNICODEFROMLOCALECHARS}
473  ulen := Length(s);
474  wlen := UnicodeFromLocaleChars(cp, 0, PAnsiChar(S), ulen, NIL, 0); // wlen is the number of UCS2 without NULL terminater.
475  if wlen = 0 then exit;
476  SetLength(result, wlen);
477  UnicodeFromLocaleChars(cp, 0, PAnsiChar(S), ulen, PWideChar(Result), wlen);
478  {$ELSE}
479  l := MultiByteToWideChar(CP, 0, PAnsiChar(@s[1]), - 1, nil, 0); //Checkout the Result-Lengh
480  if l = 0 then Exit;
481  SetLength(US, l - 1); //Set Result-Length
482  MultiByteToWideChar(CP, 0, PAnsiChar(@s[1]),
483  - 1, PWideChar(@US[1]), l - 1); //Convert Ansi to Wide with supported Chars
484  Result := US;
485  {$ENDIF}
486  end;
487  {$ELSE}
488  {$IFDEF FPC_HAS_BUILTIN_WIDESTR_MANAGER} //FPC2.7+
489  WidestringManager.Ansi2WideMoveProc(PAnsiChar(s), CP, Result, Length(s));
490  {$ELSE}
491  if ZCompatibleCodePages(CP, zCP_UTF8) then
492  Result := UTF8Encode(s)
493  else
494  Result := ZWideString(s);
495  {$ENDIF}
496  {$IFEND}
497 end;
498 {$ENDIF}
499 
500 function ZUnicodeToRaw(const US: ZWideString; CP: Word): RawByteString;
501 {$IFDEF WITH_LCONVENCODING}
502 begin
503  case CP of
504  28591: //ISO_8859_1
505  Result := UTF8ToISO_8859_1(UTF8Encode(US));
506  28592: //ISO_8859_2
507  Result := UTF8ToISO_8859_2(UTF8Encode(US));
508  1250: //WIN1250
509  Result := UTF8ToCP1250(UTF8Encode(US));
510  1251: //WIN1251
511  Result := UTF8ToCP1251(UTF8Encode(US));
512  1252: //WIN1252
513  Result := UTF8ToCP1252(UTF8Encode(US));
514  1253: //WIN1253
515  Result := UTF8ToCP1253(UTF8Encode(US));
516  1254: //WIN1254
517  Result := UTF8ToCP1254(UTF8Encode(US));
518  1255: //WIN1255
519  Result := UTF8ToCP1255(UTF8Encode(US));
520  1256: //WIN1256
521  Result := UTF8ToCP1256(UTF8Encode(US));
522  1257: //WIN1257
523  Result := UTF8ToCP1257(UTF8Encode(US));
524  1258: //WIN1258
525  Result := UTF8ToCP1258(UTF8Encode(US));
526  437: //CP437
527  Result := UTF8ToCP437(UTF8Encode(US));
528  850: //CP850
529  Result := UTF8ToCP850(UTF8Encode(US));
530  {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
531  852: //CP852
532  Result := UTF8ToCP852(UTF8Encode(US));
533  {$ENDIF}
534  866: //CP866
535  Result := UTF8ToCP866(UTF8Encode(US));
536  874: //CP874
537  Result := UTF8ToCP874(UTF8Encode(US));
538  20866: //KOI8 (Russian)
539  Result := UTF8ToKOI8(UTF8Encode(US));
540  65001: //UTF8
541  Result := UTF8Encode(US);
542  else
543  Result := RawByteString(US); //random success!
544  end;
545 end;
546 {$ELSE}
547 {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
548 var
549  {$IFDEF WITH_UNICODEFROMLOCALECHARS}
550  wlen, ulen: Integer;
551  {$ELSE}
552  l: Integer;
553  WS: WideString;
554  {$ENDIF}
555 {$IFEND}
556 begin
557  Result := '';
558  if CP = zCP_NONE then
559  Result := RawByteString(US) //random success
560  else
561  {$IF defined(MSWINDOWS) or defined(WITH_UNICODEFROMLOCALECHARS)}
562  begin
563  {$IFDEF WITH_UNICODEFROMLOCALECHARS}
564  wlen := Length(US);
565  ulen := LocaleCharsFromUnicode(CP, 0, PWideChar(US), wlen, NIL, 0, NIL, NIL);
566  setlength(Result, ulen);
567  LocaleCharsFromUnicode(CP, 0, PWideChar(US), wlen, PAnsiChar(Result), ulen, NIL, NIL);
568  {$ELSE}
569  WS := US;
570  l := WideCharToMultiByte(CP,0, @WS[1], - 1, nil, 0, nil, nil); //Checkout the result length
571  if l = 0 then Exit;
572  SetLength(Result, l - 1); //SetResult Length
573  WideCharToMultiByte(CP,0, @WS[1], - 1, @Result[1], l - 1, nil, nil); // Convert Wide down to Ansi
574  {$ENDIF}
575  end;
576  {$ELSE}
577  {$IFDEF FPC_HAS_BUILTIN_WIDESTR_MANAGER} //FPC2.7+
578  WidestringManager.Unicode2AnsiMoveProc(PWideChar(US), Result, CP, Length(US));
579  {$ELSE}
580  if ZCompatibleCodePages(CP, zCP_UTF8) then
581  Result := UTF8Encode(US)
582  else
583  Result := RawByteString(US); //random success
584  {$ENDIF}
585  {$IFEND}
586 end;
587 {$ENDIF}
588 
589 {$IFNDEF WITH_LCONVENCODING}
590 function AnsiToStringEx(const s: RawByteString;
591  const FromCP{$IFNDEF UNICODE}, ToCP{$ENDIF}: Word): String;
592 begin
593  if s = '' then
594  Result := ''
595  else
596  if ( FromCP = zCP_NONE ) {$IFNDEF UNICODE} or ( FromCP = ToCP ){$ENDIF}then
597  Result := String(s)
598  else
599  {$IFDEF UNICODE}
600  if FromCP = zCP_UTF8 then
601  result := UTF8ToString(s)
602  else
603  Result := ZRawToUnicode(s, FromCP);
604  {$ELSE} //Ansi-Compiler
605  Result := ZUnicodeToRaw(ZRawToUnicode(s, FromCP), ToCP);
606  {$ENDIF}
607 end;
608 
609 function StringToAnsiEx(const s: String; const {$IFNDEF UNICODE}FromCP, {$ENDIF} ToCP: Word): RawByteString;
610 begin
611  if s = '' then
612  Result := ''
613  else
614  if ( ToCP = zCP_NONE ) {$IFNDEF UNICODE} or ( FromCP = ToCP ){$ENDIF}then
615  Result := RawByteString(s)
616  else
617  {$IFDEF UNICODE}
618  if ToCP = zCP_UTF8 then
619  result := UTF8Encode(s)
620  else
621  Result := ZUnicodeToRaw(s, ToCP);
622  {$ELSE} //Ansi-Compiler
623  Result := ZUnicodeToRaw(ZRawToUnicode(s, FromCP), ToCP);
624  {$ENDIF}
625 end;
626 
627 {$ELSE}
628 
629 function IsLConvEncodingCodePage(const CP: Word): Boolean;
630 var
631  I: Integer;
632 begin
633  for i := 0 to High(ZLConvCodepages) do
634  begin
635  Result := CP = ZLConvCodepages[i];
636  if Result then Break;
637  end;
638 end;
639 
640 function NoConvert(const s: string): string;
641 begin
642  Result := s;
643 end;
644 
645 procedure SetConvertFunctions(const CTRL_CP, DB_CP: Word;
646  out PlainConvert, DbcConvert: TConvertEncodingFunction);
647 begin
648  if CTRL_CP = DB_CP then
649  begin
650  PlainConvert := @NoConvert;
651  DbcConvert := @NoConvert;
652  end
653  else
654  begin
655  case DB_CP of
656  28591: //ISO_8859_1
657  begin
658  DbcConvert := @ISO_8859_1ToUTF8;
659  PlainConvert := @UTF8ToISO_8859_1;
660  end;
661  28592: //ISO_8859_2
662  begin
663  DbcConvert := @ISO_8859_2ToUTF8;
664  PlainConvert := @UTF8ToISO_8859_2;
665  end;
666  1250: //WIN1250
667  begin
668  DbcConvert := @CP1250ToUTF8;
669  PlainConvert := @UTF8ToCP1250;
670  end;
671  1251: //WIN1251
672  begin
673  DbcConvert := @CP1251ToUTF8;
674  PlainConvert := @UTF8ToCP1251;
675  end;
676  1252: //WIN1252
677  begin
678  DbcConvert := @CP1252ToUTF8;
679  PlainConvert := @UTF8ToCP1252;
680  end;
681  1253: //WIN1253
682  begin
683  DbcConvert := @CP1253ToUTF8;
684  PlainConvert := @UTF8ToCP1253;
685  end;
686  1254: //WIN1254
687  begin
688  DbcConvert := @CP1254ToUTF8;
689  PlainConvert := @UTF8ToCP1254;
690  end;
691  1255: //WIN1255
692  begin
693  DbcConvert := @CP1255ToUTF8;
694  PlainConvert := @UTF8ToCP1255;
695  end;
696  1256: //WIN1256
697  begin
698  DbcConvert := @CP1256ToUTF8;
699  PlainConvert := @UTF8ToCP1256;
700  end;
701  1257: //WIN1257
702  begin
703  DbcConvert := @CP1257ToUTF8;
704  PlainConvert := @UTF8ToCP1257;
705  end;
706  1258: //WIN1258
707  begin
708  DbcConvert := @CP1258ToUTF8;
709  PlainConvert := @UTF8ToCP1258;
710  end;
711  437: //CP437
712  begin
713  DbcConvert := @CP437ToUTF8;
714  PlainConvert := @UTF8ToCP437;
715  end;
716  850: //CP850
717  begin
718  DbcConvert := @CP850ToUTF8;
719  PlainConvert := @UTF8ToCP850;
720  end;
721  {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
722  852: //CP852
723  begin
724  DbcConvert := @CP852ToUTF8;
725  PlainConvert := @UTF8ToCP852;
726  end;
727  {$ENDIF}
728  866: //CP866
729  begin
730  DbcConvert := @CP866ToUTF8;
731  PlainConvert := @UTF8ToCP866;
732  end;
733  874: //CP874
734  begin
735  DbcConvert := @CP874ToUTF8;
736  PlainConvert := @UTF8ToCP874;
737  end;
738  20866: //KOI8 (Russian)
739  begin
740  DbcConvert := @KOI8ToUTF8;
741  PlainConvert := @UTF8ToKOI8;
742  end
743  else
744  begin
745  DbcConvert := @NoConvert;
746  PlainConvert := @NoConvert;
747  end;
748  end;
749  end;
750 end;
751 {$ENDIF}
752 
753 function ZDefaultSystemCodePage: Word;
754 begin
755  {$IFDEF WITH_DEFAULTSYSTEMCODEPAGE}
756  Result := Word(DefaultSystemCodePage);
757  {$ELSE}
758  {$IFDEF MSWINDOWS}
759  Result := GetACP; //available for Windows and WinCE
760  {$ELSE}
761  Result := zCP_UTF8; //how to determine the current OS CP?
762  {$ENDIF}
763  {$ENDIF}
764 end;
765 
766 {**
767  Is the codepage equal or compatible?
768  @param CP1 word the first codepage to compare
769  @param CP2 word the second codepage to compare
770  @returns Boolean True if codepage is equal or compatible
771 }
772 function ZCompatibleCodePages(const CP1, CP2: Word): Boolean;
773 begin
774  Result := (CP1 = CP2) or (CP1 = zCP_us_ascii) or (CP2 = zCP_us_ascii);
775 end;
776 
777 {$IFDEF FPC}
778  {$HINTS OFF}
779 {$ENDIF}
780 function TestEncoding(const Bytes: TByteDynArray; const Size: Cardinal;
781  const ConSettings: PZConSettings): TZCharEncoding;
782 begin
783  Result := ceDefault;
784  {EgonHugeist:
785  Step one: Findout, wat's comming in! To avoid User-Bugs as good as possible
786  it is possible that a PAnsiChar OR a PWideChar was written into
787  the Stream!!! And these chars could be trunced with changing the
788  Stream.Size.
789  I know this can lead to pain with two byte ansi chars, but what else can i do?
790  step two: detect the encoding }
791 
792  if ( {$IFDEF WITH_STRLEN_DEPRECATED}AnsiStrings.{$ENDIF}StrLen(PAnsiChar(Bytes)) < Size ) then //Sure PWideChar written!! A #0 was in the byte-sequence!
793  result := ceUTF16
794  else
795  if ConSettings.AutoEncode then
796  case DetectUTF8Encoding(PAnsichar(Bytes)) of
797  etUSASCII: Result := ceDefault; //Exact!
798  etAnsi:
799  { Sure this isn't right in all cases!
800  Two/four byte WideChars causing the same result!
801  Leads to pain! Is there a way to get a better test?
802  I've to start from the premise the function which calls this func
803  should decide wether ansi or unicode}
804  Result := ceAnsi;
805  etUTF8: Result := ceUTF8; //Exact!
806  end
807  else
808  Result := ceDefault
809 end;
810 {$IFDEF FPC}
811  {$HINTS ON}
812 {$ENDIF}
813 
814 function ZConvertAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
815 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
816 begin
817  if Src = '' then
818  Result := ''
819  else
820  begin
821  US := ZWideString(Src);
822  Result := ZUnicodeToRaw(US, RawCP);
823  end;
824 end;
825 
826 function ZConvertRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
827 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
828 begin
829  if Src = '' then
830  Result := ''
831  else
832  begin
833  US := ZRawToUnicode(Src, RawCP);
834  Result := AnsiString(US); //use compiler convertation
835  end;
836 end;
837 
838 function ZConvertAnsiToUTF8(const Src: AnsiString): UTF8String;
839 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
840 begin
841  if Src = '' then
842  Result := ''
843  else
844  begin
845  US := ZWideString(Src);
846  Result := {$IFDEF WITH_RAWBYTESTRING}UTF8String{$ELSE}UTF8Encode{$ENDIF}(US);
847  end;
848 end;
849 
850 function ZConvertUTF8ToAnsi(const Src: UTF8String): AnsiString;
851 var US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
852 begin
853  if Src = '' then
854  Result := ''
855  else
856  begin
857  US := {$IFDEF WITH_RAWBYTESTRING}ZWideString{$ELSE}UTF8Decode{$ENDIF}(Src);
858  Result := AnsiString(US);
859  end;
860 end;
861 
862 function ZConvertRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
863 var
864  {$IFDEF WITH_LCONVENCODING}
865  sUTF8: String;
866  {$ELSE}
867  US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
868  {$ENDIF}
869 begin
870  if Src = '' then
871  Result := ''
872  else
873  begin
874  {$IFDEF WITH_LCONVENCODING}
875  Result := '';
876  case CP of
877  28591: //ISO_8859_1
878  sUTF8 := ISO_8859_1ToUTF8(PAnsiChar(Src));
879  28592: //ISO_8859_2
880  sUTF8 := ISO_8859_2ToUTF8(PAnsiChar(Src));
881  1250: //WIN1250
882  sUTF8 := CP1250ToUTF8(PAnsiChar(Src));
883  1251: //WIN1251
884  sUTF8 := CP1251ToUTF8(PAnsiChar(Src));
885  1252: //WIN1252
886  sUTF8 := CP1252ToUTF8(PAnsiChar(Src));
887  1253: //WIN1253
888  sUTF8 := CP1253ToUTF8(PAnsiChar(Src));
889  1254: //WIN1254
890  sUTF8 := CP1254ToUTF8(PAnsiChar(Src));
891  1255: //WIN1255
892  sUTF8 := CP1255ToUTF8(PAnsiChar(Src));
893  1256: //WIN1256
894  sUTF8 := CP1256ToUTF8(PAnsiChar(Src));
895  1257: //WIN1257
896  sUTF8 := CP1257ToUTF8(PAnsiChar(Src));
897  1258: //WIN1258
898  sUTF8 := CP1258ToUTF8(PAnsiChar(Src));
899  437: //CP437
900  sUTF8 := CP437ToUTF8(PAnsiChar(Src));
901  850: //CP850
902  sUTF8 := CP850ToUTF8(PAnsiChar(Src));
903  852: //CP852
904  sUTF8 := CP852ToUTF8(PAnsiChar(Src));
905  866: //CP866
906  sUTF8 := CP866ToUTF8(PAnsiChar(Src));
907  874: //CP874
908  sUTF8 := CP874ToUTF8(PAnsiChar(Src));
909  20866: //KOI8 (Russian)
910  sUTF8 := KOI8ToUTF8(PAnsiChar(Src));
911  65001: //utf8
912  sUTF8 := PAnsiChar(Src);
913  else
914  sUTF8 := PAnsiChar(Src);
915  end;
916  ZSetString(PAnsichar(sUTF8), Result);
917  {$ELSE}
918  US := ZRawToUnicode(Src, CP);
919  Result := {$IFDEF WITH_RAWBYTESTRING}UTF8String{$ELSE}UTF8Encode{$ENDIF}(US);
920  {$ENDIF}
921  end;
922 end;
923 
924 function ZConvertUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
925 var
926  {$IFDEF WITH_LCONVENCODING}
927  sUTF8: String;
928  {$ELSE}
929  US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
930  {$ENDIF}
931 begin
932  if Src = '' then
933  Result := ''
934  else
935  begin
936  {$IFDEF WITH_LCONVENCODING}
937  case CP of
938  28591: //ISO_8859_1
939  sUTF8 := UTF8ToISO_8859_1(PAnsiChar(Src));
940  28592: //ISO_8859_2
941  sUTF8 := UTF8ToISO_8859_2(PAnsiChar(Src));
942  1250: //WIN1250
943  sUTF8 := UTF8ToCP1250(PAnsiChar(Src));
944  1251: //WIN1251
945  sUTF8 := UTF8ToCP1251(PAnsiChar(Src));
946  1252: //WIN1252
947  sUTF8 := UTF8ToCP1252(PAnsiChar(Src));
948  1253: //WIN1253
949  sUTF8 := UTF8ToCP1253(PAnsiChar(Src));
950  1254: //WIN1254
951  sUTF8 := UTF8ToCP1254(PAnsiChar(Src));
952  1255: //WIN1255
953  sUTF8 := UTF8ToCP1255(PAnsiChar(Src));
954  1256: //WIN1256
955  sUTF8 := UTF8ToCP1256(PAnsiChar(Src));
956  1257: //WIN1257
957  sUTF8 := UTF8ToCP1257(PAnsiChar(Src));
958  1258: //WIN1258
959  sUTF8 := UTF8ToCP1258(PAnsiChar(Src));
960  437: //CP437
961  sUTF8 := UTF8ToCP437(PAnsiChar(Src));
962  850: //CP850
963  sUTF8 := UTF8ToCP850(PAnsiChar(Src));
964  852: //CP852
965  sUTF8 := UTF8ToCP852(PAnsiChar(Src));
966  866: //CP866
967  sUTF8 := UTF8ToCP866(PAnsiChar(Src));
968  874: //CP874
969  sUTF8 := UTF8ToCP874(PAnsiChar(Src));
970  20866: //KOI8 (Russian)
971  sUTF8 := UTF8ToKOI8(PAnsiChar(Src));
972  65001: //UTF8
973  sUTF8 := PAnsiChar(Src);
974  else
975  sUTF8 := PAnsiChar(Src);
976  end;
977  Result := ''; //Makes compler happy
978  ZSetString(PAnsiChar(sUTF8), Result);
979  {$ELSE}
980  US := UTF8ToString(PAnsiChar(Src));
981  Result := ZUnicodeToRaw(US, CP);
982  {$ENDIF}
983  end;
984 end;
985 
986 {$IFDEF FPC}
987  {$HINTS OFF}
988 {$ENDIF}
989 function ZConvertRawToString(const Src: RawByteString;
990  const RawCP, StringCP: Word): String;
991 {$IFNDEF UNICODE}
992 var
993  {$IFDEF WITH_LCONVENCODING}
994  sUTF8: String;
995  {$ELSE}
996  US: ZWideString; //COM based. So localize the String to avoid Buffer overrun
997  {$ENDIF}
998 {$ENDIF}
999 begin
1000  if Src = '' then
1001  Result := ''
1002  else
1003  begin
1004  {$IFDEF WITH_LCONVENCODING}
1005  sUTF8 := ''; //Makes Compiler happy
1006  ZSetString(PAnsichar(Src), sUTF8);
1007  case RawCP of
1008  28591: //ISO_8859_1
1009  Result := ISO_8859_1ToUTF8(PAnsiChar(sUTF8));
1010  28592: //ISO_8859_2
1011  Result := ISO_8859_2ToUTF8(PAnsiChar(sUTF8));
1012  1250: //WIN1250
1013  Result := CP1250ToUTF8(PAnsiChar(sUTF8));
1014  1251: //WIN1251
1015  Result := CP1251ToUTF8(PAnsiChar(sUTF8));
1016  1252: //WIN1252
1017  Result := CP1252ToUTF8(PAnsiChar(sUTF8));
1018  1253: //WIN1253
1019  Result := CP1253ToUTF8(PAnsiChar(sUTF8));
1020  1254: //WIN1254
1021  Result := CP1254ToUTF8(PAnsiChar(sUTF8));
1022  1255: //WIN1255
1023  Result := CP1255ToUTF8(PAnsiChar(sUTF8));
1024  1256: //WIN1256
1025  Result := CP1256ToUTF8(PAnsiChar(sUTF8));
1026  1257: //WIN1257
1027  Result := CP1257ToUTF8(PAnsiChar(sUTF8));
1028  1258: //WIN1258
1029  Result := CP1258ToUTF8(PAnsiChar(sUTF8));
1030  437: //CP437
1031  Result := CP437ToUTF8(PAnsiChar(sUTF8));
1032  850: //CP850
1033  Result := CP850ToUTF8(PAnsiChar(sUTF8));
1034  {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
1035  852: //CP852
1036  Result := CP852ToUTF8(PAnsiChar(sUTF8));
1037  {$ENDIF}
1038  866: //CP866
1039  Result := CP866ToUTF8(PAnsiChar(sUTF8));
1040  874: //CP874
1041  Result := CP874ToUTF8(PAnsiChar(sUTF8));
1042  20866: //KOI8 (Russian)
1043  Result := KOI8ToUTF8(PAnsiChar(sUTF8));
1044  65001: //utf8
1045  Result := PAnsiChar(sUTF8);
1046  else
1047  Result := PAnsiChar(sUTF8);
1048  end;
1049  {$ELSE}
1050  {$IFDEF UNICODE}
1051  Result := ZRawToUnicode(Src, RawCP);
1052  {$ELSE}
1053  US := ZRawToUnicode(Src, RawCP);
1054  ZSetString(PAnsiChar(ZUnicodeToRaw(US, StringCP)), Result);
1055  {$ENDIF}
1056  {$ENDIF}
1057  end;
1058 end;
1059 
1060 function ZConvertStringToRaw(const Src: String; const StringCP, RawCP: Word): RawByteString;
1061 {$IFNDEF UNICODE}
1062 var
1063  {$IFDEF WITH_LCONVENCODING}
1064  sUTF8: String;
1065  {$ELSE}
1066  US: ZWideString; //COM based, so let's localize the value to avoid Buffer overrun
1067  {$ENDIF}
1068 {$ENDIF}
1069 begin
1070  if Src = '' then
1071  Result := ''
1072  else
1073  {$IFDEF WITH_LCONVENCODING}
1074  begin
1075  case RawCP of
1076  28591: //ISO_8859_1
1077  sUTF8 := UTF8ToISO_8859_1(Src);
1078  28592: //ISO_8859_2
1079  sUTF8 := UTF8ToISO_8859_2(Src);
1080  1250: //WIN1250
1081  sUTF8 := UTF8ToCP1250(Src);
1082  1251: //WIN1251
1083  sUTF8 := UTF8ToCP1251(Src);
1084  1252: //WIN1252
1085  sUTF8 := UTF8ToCP1252(Src);
1086  1253: //WIN1253
1087  sUTF8 := UTF8ToCP1253(Src);
1088  1254: //WIN1254
1089  sUTF8 := UTF8ToCP1254(Src);
1090  1255: //WIN1255
1091  sUTF8 := UTF8ToCP1255(Src);
1092  1256: //WIN1256
1093  sUTF8 := UTF8ToCP1256(Src);
1094  1257: //WIN1257
1095  sUTF8 := UTF8ToCP1257(Src);
1096  1258: //WIN1258
1097  sUTF8 := UTF8ToCP1258(Src);
1098  437: //CP437
1099  sUTF8 := UTF8ToCP437(Src);
1100  850: //CP850
1101  sUTF8 := UTF8ToCP850(Src);
1102  {$IFDEF LCONVENCODING_HAS_CP852_FUNCTIONS}
1103  852: //CP852
1104  sUTF8 := UTF8ToCP852(Src);
1105  {$ENDIF}
1106  866: //CP866
1107  sUTF8 := UTF8ToCP866(Src);
1108  874: //CP874
1109  sUTF8 := UTF8ToCP874(Src);
1110  20866: //KOI8 (Russian)
1111  sUTF8 := UTF8ToKOI8(Src);
1112  65001: //UTF8
1113  sUTF8 := Src;
1114  else
1115  sUTF8 := Src;
1116  end;
1117  Result := ''; //Makes compler happy
1118  ZSetString(PAnsiChar(sUTF8), Result);
1119  end;
1120  {$ELSE}
1121  {$IFDEF UNICODE}
1122  Result := ZUnicodeToRaw(Src, RawCP);
1123  {$ELSE}
1124  begin
1125  US := ZRawToUnicode(Src, StringCP);
1126  Result := ZUnicodeToRaw(US, RawCP);
1127  end;
1128  {$ENDIF}
1129  {$ENDIF}
1130 end;
1131 {$IFDEF FPC}
1132  {$HINTS ON}
1133 {$ENDIF}
1134 
1135 {$WARNINGS OFF}
1136 function ZConvertStringToRawWithAutoEncode(const Src: String;
1137  const StringCP, RawCP: Word): RawByteString;
1138 begin
1139  {$IFDEF UNICODE}
1140  Result := ZUnicodeToRaw(Src, RawCP);
1141  {$ELSE}
1142  Result := ''; //init for FPC
1143  case DetectUTF8Encoding(Src) of
1144  etUSASCII: ZSetString(PAnsiChar(Src), Result);
1145  etAnsi:
1146  if (RawCP = zCP_UTF8) then
1147  if ZCompatibleCodePages(StringCP, zCP_UTF8 ) then
1148  Result := ZUnicodeToRaw(ZWideString(Src), RawCP) //Random success unknown String CP
1149  else
1150  Result := ZConvertStringToRaw(Src, StringCP, RawCP)
1151  else
1152  ZSetString(PAnsiChar(Src), Result);
1153  etUTF8:
1154  if (RawCP = zCP_UTF8) then
1155  ZSetString(PAnsiChar(Src), Result)
1156  else
1157  Result := ZConvertStringToRaw(Src, zCP_UTF8, RawCP);
1158  end;
1159  {$ENDIF}
1160 end;
1161 {$WARNINGS ON}
1162 
1163 function ZConvertUTF8ToString(const Src: UTF8String;
1164  const StringCP: Word): String;
1165 {$IFNDEF UNICODE}
1166 var
1167  US: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1168  S: RawByteString;
1169 {$ENDIF}
1170 begin
1171  if Src = '' then
1172  Result := ''
1173  else
1174  {$IFDEF UNICODE}
1175  Result := UTF8ToString(PAnsiChar(Src));
1176  {$ELSE}
1177  begin
1178  Result := ''; //Makes Compiler happy
1179  US := UTF8ToString(PAnsiChar(Src));
1180  S := ZUnicodeToRaw(US, StringCP);
1181  ZSetString(PAnsiChar(S), Result);
1182  end;
1183  {$ENDIF}
1184 end;
1185 
1186 function ZConvertStringToUTF8(const Src: String;
1187  const StringCP: Word): UTF8String;
1188 {$IFNDEF UNICODE}
1189 var
1190  US: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1191 {$ENDIF}
1192 begin
1193  if Src = '' then
1194  Result := ''
1195  else
1196  {$IFDEF UNICODE}
1197  Result := UTF8String(Src);
1198  {$ELSE}
1199  begin
1200  US := ZRawToUnicode(Src, StringCP);
1201  {$IFDEF WITH_RAWBYTESTRING}
1202  Result := UTF8String(US);
1203  {$ELSE}
1204  Result := UTF8Encode(US);
1205  {$ENDIF}
1206  end;
1207  {$ENDIF}
1208 end;
1209 
1210 function ZConvertStringToUTF8WithAutoEncode(const Src: String;
1211  const StringCP: Word): UTF8String;
1212 {$IFNDEF UNICODE}
1213 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1214 {$ENDIF}
1215 begin
1216  {$IFDEF UNICODE}
1217  Result := UTF8String(Src);
1218  {$ELSE}
1219  Result := '';
1220  If DetectUTF8Encoding(PAnsiChar(Src)) in [etUSASCII, etUTF8] then
1221  ZSetString(PAnsiChar(Src), Result)
1222  else //Ansi
1223  begin
1224  if ZCompatibleCodePages(StringCP, zCP_UTF8) then
1225  Tmp := ZWideString(Src)
1226  else
1227  Tmp := ZRawToUnicode(PAnsiChar(Src), StringCP);
1228  {$IFDEF WITH_RAWBYTESTRING}
1229  Result := UTF8String(Tmp);
1230  {$ELSE}
1231  Result := UTF8Encode(Tmp);
1232  {$ENDIF}
1233  end;
1234  {$ENDIF}
1235 end;
1236 
1237 function ZConvertStringToAnsi(const Src: String;
1238  const StringCP: Word): AnsiString;
1239 {$IFNDEF UNICODE}
1240 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1241 {$ENDIF}
1242 begin
1243  {$IFDEF UNICODE}
1244  Result := AnsiString(Src);
1245  {$ELSE}
1246  Tmp := ZRawToUnicode(PAnsiChar(Src), StringCP);
1247  Result := AnsiString(Tmp);
1248  {$ENDIF}
1249 end;
1250 
1251 {$IFDEF FPC}
1252  {$HINTS OFF}
1253 {$ENDIF}
1254 function ZConvertStringToAnsiWithAutoEncode(const Src: String;
1255  const StringCP: Word): AnsiString;
1256 {$IFNDEF UNICODE}
1257 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1258 {$ENDIF}
1259 begin
1260  {$IFDEF UNICODE}
1261  Result := AnsiString(Src);
1262  {$ELSE}
1263  Result := '';
1264  If DetectUTF8Encoding(PAnsiChar(Src)) in [etUSASCII, etAnsi] then
1265  ZSetString(PAnsiChar(Src), Result)
1266  else
1267  begin
1268  Tmp := UTF8ToString(PAnsiChar(Src));
1269  Result := AnsiString(Tmp);
1270  end;
1271  {$ENDIF}
1272 end;
1273 {$IFDEF FPC}
1274  {$HINTS ON}
1275 {$ENDIF}
1276 
1277 function ZConvertAnsiToString(const Src: AnsiString;
1278  const StringCP: Word): String;
1279 {$IFNDEF UNICODE}
1280 var Tmp: ZWideString; //COM based. Localize the Value to avoid buffer overrun
1281 {$ENDIF}
1282 begin
1283  {$IFDEF UNICODE}
1284  Result := String(Src);
1285  {$ELSE}
1286  Result := '';
1287  Tmp := ZRawToUnicode(PAnsiChar(Src), ZDefaultSystemCodePage);
1288  ZSetString(PAnsiChar(ZUnicodeToRaw(Tmp, StringCP)), Result);
1289  {$ENDIF}
1290 end;
1291 
1292 function ZConvertUnicodeToString(const Src: ZWideString;
1293  const StringCP: Word): String;
1294 {$IFNDEF UNICODE}
1295 var Tmp: RawByteString;
1296 {$ENDIF}
1297 begin
1298  {$IFDEF UNICODE}
1299  Result := Src;
1300  {$ELSE}
1301  Tmp := ZUnicodeToRaw(Src, StringCP);
1302  ZSetString(PAnsiChar(Tmp), Result);
1303  {$ENDIF}
1304 end;
1305 
1306 {$IFDEF FPC}
1307  {$HINTS OFF}
1308 {$ENDIF}
1309 function ZConvertUnicodeToString_CPUTF8(const Src: ZWideString;
1310  const StringCP: Word): String;
1311 {$IFNDEF UNICODE}
1312 var Tmp: RawByteString;
1313 {$ENDIF}
1314 begin
1315  {$IFDEF UNICODE}
1316  Result := Src;
1317  {$ELSE}
1318  Tmp := UTF8Encode(Src);
1319  ZSetString(PAnsiChar(Tmp), Result);
1320  {$ENDIF}
1321 end;
1322 {$IFDEF FPC}
1323  {$HINTS ON}
1324 {$ENDIF}
1325 
1326 function ZConvertStringToUnicode(const Src: String;
1327  const StringCP: Word): ZWideString;
1328 {$IFNDEF UNICODE}
1329 var Tmp: RawByteString;
1330 {$ENDIF}
1331 begin
1332  {$IFDEF UNICODE}
1333  Result := Src;
1334  {$ELSE}
1335  Tmp := ''; //Makes compiler Happy
1336  ZSetString(PAnsiChar(Src), Tmp);
1337  Result := ZRawToUnicode(Tmp, StringCP);
1338  {$ENDIF}
1339 end;
1340 
1341 {$IFDEF FPC}
1342  {$HINTS OFF}
1343 {$ENDIF}
1344 function ZConvertString_CPUTF8ToUnicode(const Src: String;
1345  const StringCP: Word): ZWideString;
1346 {$IFNDEF UNICODE}
1347 var Tmp: RawByteString;
1348 {$ENDIF}
1349 begin
1350  {$IFDEF UNICODE}
1351  Result := Src;
1352  {$ELSE}
1353  Tmp := ''; //Makes Compiler happy
1354  ZSetString(PAnsiChar(Src), Tmp);
1355  Result := UTF8ToString(Tmp);
1356  {$ENDIF}
1357 end;
1358 {$IFDEF FPC}
1359  {$HINTS ON}
1360 {$ENDIF}
1361 
1362 
1363 function ZConvertStringToUnicodeWithAutoEncode(const Src: String;
1364  const StringCP: Word): ZWideString;
1365 begin
1366  {$IFDEF UNICODE}
1367  Result := Src;
1368  {$ELSE}
1369  If DetectUTF8Encoding(PAnsiChar(Src)) in [etUSASCII, etUTF8] then
1370  Result := UTF8ToString(PAnsiChar(Src))
1371  else
1372  if ZCompatibleCodePages(StringCP, zCP_UTF8) then
1373  Result := ZWideString(Src)
1374  else
1375  Result := ZRawToUnicode(PAnsiChar(Src), StringCP);
1376  {$ENDIF}
1377 end;
1378 
1379 
1380 {$IFDEF FPC}
1381  {$HINTS OFF}
1382 {$ENDIF}
1383 function ZMoveAnsiToRaw(const Src: AnsiString; const RawCP: Word): RawByteString;
1384 begin
1385  ZSetString(PAnsiChar(Src), Result);
1386 end;
1387 
1388 function ZMoveRawToAnsi(const Src: RawByteString; const RawCP: Word): AnsiString;
1389 begin
1390  ZSetString(PAnsiChar(Src), Result);
1391 end;
1392 
1393 function ZMoveAnsiToUTF8(const Src: AnsiString): UTF8String;
1394 begin
1395  ZSetString(PAnsiChar(Src), Result);
1396 end;
1397 
1398 function ZMoveUTF8ToAnsi(const Src: UTF8String): AnsiString;
1399 begin
1400  ZSetString(PAnsiChar(Src), Result);
1401 end;
1402 
1403 function ZMoveRawToUTF8(const Src: RawByteString; const CP: Word): UTF8String;
1404 begin
1405  ZSetString(PAnsiChar(Src), Result);
1406 end;
1407 
1408 function ZMoveUTF8ToRaw(Const Src: UTF8String; const CP: Word): RawByteString;
1409 begin
1410  ZSetString(PAnsiChar(Src), Result);
1411 end;
1412 
1413 function ZMoveStringToAnsi(Const Src: String; const StringCP: Word): AnsiString;
1414 begin
1415  {$IFDEF UNICODE}
1416  Result := AnsiString(Src);
1417  {$ELSE}
1418  ZSetString(PAnsiChar(Src), Result);
1419  {$ENDIF}
1420 end;
1421 
1422 function ZMoveAnsiToString(const Src: AnsiString; const StringCP: Word): String;
1423 begin
1424  {$IFDEF UNICODE}
1425  Result := String(Src);
1426  {$ELSE}
1427  ZSetString(PAnsiChar(Src), Result);
1428  {$ENDIF}
1429 end;
1430 
1431 
1432 function ZMoveRawToString(const Src: RawByteString;
1433  const RawCP, StringCP: Word): String;
1434 begin
1435  {$IFDEF UNICODE}
1436  Result := ZRawToUnicode(Src, RawCP);
1437  {$ELSE}
1438  ZSetString(PAnsiChar(Src), Result);
1439  {$ENDIF}
1440 end;
1441 
1442 function ZMoveStringToRaw(const Src: String;
1443  const StringCP, RawCP: Word): RawByteString;
1444 begin
1445  {$IFDEF UNICODE}
1446  Result := ZUnicodeToRaw(Src, RawCP);
1447  {$ELSE}
1448  ZSetString(PAnsiChar(Src), Result);
1449  {$ENDIF}
1450 end;
1451 
1452 function ZMoveUTF8ToString(const Src: UTF8String; StringCP: Word): String;
1453 begin
1454  {$IFDEF UNICODE}
1455  Result := String(Src);
1456  {$ELSE}
1457  ZSetString(PAnsiChar(Src), Result);
1458  {$ENDIF}
1459 end;
1460 
1461 function ZMoveStringToUTF8(const Src: String; const StringCP: Word): UTF8String;
1462 begin
1463  {$IFDEF UNICODE}
1464  Result := UTF8String(Src);
1465  {$ELSE}
1466  ZSetString(PAnsiChar(Src), Result);
1467  {$ENDIF}
1468 end;
1469 
1470 {$IFDEF FPC}
1471  {$HINTS ON}
1472 {$ENDIF}
1473 
1474 {**
1475  GetValidatedTextStream the incoming Stream for his given Memory and
1476  returns a valid UTF8/Ansi StringStream
1477  @param Stream the Stream with the unknown format and data
1478  @return a valid utf8 encoded stringstram
1479 }
1480 {$WARNINGS OFF}
1481 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
1482  ConSettings: PZConSettings): RawByteString;
1483 var
1484  US: ZWideString;
1485  Bytes: TByteDynArray;
1486 begin
1487  if Size = 0 then
1488  Result := ''
1489  else
1490  begin
1491  SetLength(Bytes, Size +2);
1492  System.move(Buffer^, Pointer(Bytes)^, Size);
1493  case TestEncoding(Bytes, Size, ConSettings) of
1494  ceDefault: Result := PAnsiChar(Bytes);
1495  ceAnsi:
1496  if ConSettings.ClientCodePage.Encoding = ceAnsi then
1497  if ( ConSettings.CTRL_CP = zCP_UTF8) or (ConSettings.CTRL_CP = ConSettings.ClientCodePage.CP) then //second test avoids encode the string twice
1498  Result := PAnsiChar(Bytes) //should be exact
1499  else
1500  {$IFDEF WITH_LCONVENCODING}
1501  Result := Consettings.PlainConvertFunc(AnsiToUTF8(PAnsiChar(Bytes))) //no other possibility
1502  {$ELSE}
1503  Result := ZUnicodeToRaw(ZRawToUnicode(PAnsiChar(Bytes), ConSettings.CTRL_CP), ConSettings.ClientCodePage.CP)
1504  {$ENDIF}
1505  else //Database expects UTF8
1506  if ( ConSettings.CTRL_CP = zCP_UTF8) then
1507  Result := AnsiToUTF8(String(PAnsiChar(Bytes))) //Can't localize the ansi CP
1508  else
1509  {$IFDEF WITH_LCONVENCODING}
1510  Result := AnsiToUTF8(PAnsiChar(Bytes));
1511  {$ELSE}
1512  Result := UTF8Encode(ZRawToUnicode(PAnsiChar(Bytes), ConSettings.CTRL_CP));
1513  {$ENDIF}
1514  ceUTF8:
1515  if ConSettings.ClientCodePage.Encoding = ceAnsi then //ansi expected
1516  {$IFDEF WITH_LCONVENCODING}
1517  Result := Consettings.PlainConvertFunc(String(PAnsiChar(Bytes)))
1518  {$ELSE}
1519  Result := ZUnicodeToRaw(UTF8ToString(PAnsiChar(Bytes)), ConSettings.ClientCodePage.CP)
1520  {$ENDIF}
1521  else //UTF8 Expected
1522  Result := PAnsiChar(Bytes);
1523  ceUTF16:
1524  begin
1525  SetLength(US, Size div 2);
1526  System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1527  if ConSettings.ClientCodePage.Encoding = ceAnsi then
1528  {$IFDEF WITH_LCONVENCODING}
1529  Result := Consettings.PlainConvertFunc(UTF8Encode(US))
1530  {$ELSE}
1531  Result := ZUnicodeToRaw(US, ConSettings.ClientCodePage.CP)
1532  {$ENDIF}
1533  else
1534  Result := UTF8Encode(US);
1535  end;
1536  else
1537  Result := '';
1538  end;
1539  end;
1540 end;
1541 {$WARNINGS ON}
1542 
1543 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
1544  ConSettings: PZConSettings; ToCP: Word): RawByteString;
1545 var DB_CP: Word;
1546 begin
1547  DB_CP := ConSettings.ClientCodePage.CP;
1548  ConSettings.ClientCodePage.CP := ToCP;
1549  Result := GetValidatedAnsiStringFromBuffer(Buffer, Size, ConSettings);
1550  ConSettings.ClientCodePage.CP := DB_CP;
1551 end;
1552 
1553 function GetValidatedAnsiString(const Ansi: RawByteString;
1554  ConSettings: PZConSettings; const FromDB: Boolean): RawByteString;
1555 begin
1556  if FromDB then
1557  if ( ConSettings.CTRL_CP = ConSettings.ClientCodePage.CP ) or not ConSettings.AutoEncode then
1558  Result := Ansi
1559  else
1560  {$IFDEF WITH_LCONVENCODING}
1561  Result := Consettings.DbcConvertFunc(Ansi)
1562  {$ELSE}
1563  Result := ZUnicodeToRaw(ZRawToUnicode(Ansi, ConSettings^.ClientCodePage^.CP), ConSettings^.CTRL_CP)
1564  {$ENDIF}
1565  else
1566  Result := ''; // not done yet and not needed. Makes the compiler happy
1567 end;
1568 
1569 function GetValidatedAnsiString(const Uni: ZWideString;
1570  ConSettings: PZConSettings; const FromDB: Boolean): RawByteString;
1571 begin
1572  if FromDB then
1573  {$IFDEF WITH_LCONVENCODING}
1574  Result := UTF8Encode(Uni)
1575  {$ELSE}
1576  Result := ZUnicodeToRaw(Uni, ConSettings^.CTRL_CP)
1577  {$ENDIF}
1578  else
1579  Result := ''; // not done yet and not needed. Makes the compiler happy
1580 end;
1581 
1582 function GetValidatedAnsiStringFromBuffer(const Buffer: Pointer; Size: Cardinal;
1583  WasDecoded: Boolean; ConSettings: PZConSettings): RawByteString;
1584 var
1585  US: ZWideString;
1586 begin
1587  if WasDecoded then
1588  begin
1589  SetLength(US, Size div 2);
1590  System.Move(Buffer^, PWideChar(US)^, Size);
1591  Result := ZUnicodeToRaw(US, ConSettings.ClientCodePage.CP);
1592  end
1593  else
1594  Result := GetValidatedAnsiStringFromBuffer(Buffer, Size, ConSettings);
1595 end;
1596 {**
1597  GetValidatedUnicodeStream the incoming Stream for his given Memory and
1598  returns a valid Unicode/Widestring Stream
1599  @param Stream the Stream with the unknown format and data
1600  @return a valid Unicode encoded stringstram
1601 }
1602 function GetValidatedUnicodeStream(const Buffer: Pointer; Size: Cardinal;
1603  ConSettings: PZConSettings; FromDB: Boolean): TStream;
1604 var
1605  Len: Integer;
1606  US: ZWideString;
1607  Bytes: TByteDynArray;
1608 
1609  procedure SetFromWide;
1610  begin
1611  SetLength(US, Size div 2);
1612  System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1613  end;
1614 begin
1615  Result := nil;
1616  US := '';
1617  if Assigned(Buffer) and ( Size > 0 ) then
1618  begin
1619  SetLength(Bytes, Size +2);
1620  System.move(Buffer^, Pointer(Bytes)^, Size);
1621  if FromDB then //do not check encoding twice
1622  Result := GetValidatedUnicodeStream(PAnsiChar(Bytes), ConSettings, FromDB)
1623  else
1624  case TestEncoding(Bytes, Size, ConSettings) of
1625  ceDefault:
1626  case Consettings.ClientCodePage.Encoding of
1627  ceUTF8: US := UTF8ToString(PAnsiChar(Bytes));
1628  ceAnsi:
1629  {$IFDEF WITH_LCONVENCODING}
1630  US := ZWideString(PAnsiChar(Bytes)); //cast means random success
1631  {$ELSE}
1632  if ( ConSettings.CTRL_CP = zCP_UTF8) then
1633  US := ZWideString(PAnsiChar(Bytes)) //random success
1634  else
1635  US := ZRawToUnicode(PAnsiChar(Bytes), ConSettings.CTRL_CP);
1636  {$ENDIF}
1637  end;
1638  ceAnsi: //We've to start from the premisse we've got a Unicode string i there
1639  begin
1640  SetLength(US, Size div 2);
1641  System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1642  end;
1643  ceUTF8: US := UTF8ToString(PAnsiChar(Bytes));
1644  ceUTF16:
1645  begin
1646  SetLength(US, Size div 2);
1647  System.Move(PWideChar(Bytes)^, PWideChar(US)^, Size);
1648  end;
1649  end;
1650 
1651  Len := Length(US)*2;
1652  if not Assigned(Result) and (Len > 0) then
1653  begin
1654  Result := TMemoryStream.Create;
1655  Result.Size := Len;
1656  System.Move(PWideChar(US)^, TMemoryStream(Result).Memory^, Len);
1657  Result.Position := 0;
1658  end;
1659  SetLength(Bytes, 0);
1660  end;
1661 end;
1662 
1663 function GetValidatedUnicodeStream(const Ansi: RawByteString;
1664  ConSettings: PZConSettings; FromDB: Boolean): TStream;
1665 var
1666  Len: Integer;
1667  US: ZWideString;
1668 begin
1669  Result := nil;
1670  if Ansi <> '' then
1671  begin
1672  if FromDB then
1673  {$IFDEF WITH_LCONVENCODING}
1674  US := UTF8ToString(Consettings.DbcConvertFunc(Ansi))
1675  {$ELSE}
1676  US := ZRawToUnicode(Ansi, ConSettings.ClientCodePage.CP)
1677  {$ENDIF}
1678  else
1679  case DetectUTF8Encoding(Ansi) of
1680  etUSASCII, etUTF8: US := UTF8ToString(Ansi);
1681  etAnsi:
1682  {$IFDEF WITH_LCONVENCODING}
1683  US := ZWideString(Ansi); //random success
1684  {$ELSE}
1685  if ( ConSettings.CTRL_CP = zCP_UTF8) then
1686  US := ZWideString(Ansi) //random success
1687  else
1688  US := ZRawToUnicode(Ansi, ConSettings.CTRL_CP);
1689  {$ENDIF}
1690  end;
1691 
1692  Len := Length(US)*2;
1693  if Len > 0 then
1694  begin
1695  Result := TMemoryStream.Create;
1696  Result.Size := Len;
1697  System.Move(PWideChar(US)^, TMemoryStream(Result).Memory^, Len);
1698  Result.Position := 0;
1699  end;
1700  end;
1701 end;
1702 
1703 procedure SetConvertFunctions(ConSettings: PZConSettings);
1704 begin
1705  ConSettings^.ConvFuncs.ZAnsiToUTF8 := nil;
1706  ConSettings^.ConvFuncs.ZUTF8ToAnsi:= nil;
1707  ConSettings^.ConvFuncs.ZUTF8ToString:= nil;
1708  ConSettings^.ConvFuncs.ZStringToUTF8:= nil;
1709  ConSettings^.ConvFuncs.ZAnsiToRaw:= nil;
1710  ConSettings^.ConvFuncs.ZRawToAnsi:= nil;
1711  ConSettings^.ConvFuncs.ZRawToUTF8:= nil;
1712  ConSettings^.ConvFuncs.ZUTF8ToRaw:= nil;
1713  ConSettings^.ConvFuncs.ZStringToRaw:= nil;
1714  ConSettings^.ConvFuncs.ZAnsiToString := nil;
1715  ConSettings^.ConvFuncs.ZStringToAnsi := nil;
1716  ConSettings^.ConvFuncs.ZRawToString:= nil;
1717  ConSettings^.ConvFuncs.ZUnicodeToRaw:= nil;
1718  ConSettings^.ConvFuncs.ZRawToUnicode:= nil;
1719  ConSettings^.ConvFuncs.ZUnicodeToString:= nil;
1720  ConSettings^.ConvFuncs.ZStringToUnicode:= nil;
1721 
1722  //Let's start with the AnsiTo/From types..
1723  // Ansi to/from UTF8String
1724  if ZCompatibleCodePages(ZDefaultSystemCodePage, zCP_UTF8) then
1725  begin
1726  ConSettings^.ConvFuncs.ZAnsiToUTF8 := @ZMoveAnsiToUTF8;
1727  ConSettings^.ConvFuncs.ZUTF8ToAnsi := @ZMoveUTF8ToAnsi;
1728  end
1729  else
1730  begin
1731  ConSettings^.ConvFuncs.ZAnsiToUTF8 := @ZConvertAnsiToUTF8;
1732  ConSettings^.ConvFuncs.ZUTF8ToAnsi := @ZConvertUTF8ToAnsi;
1733  end;
1734 
1735  // Ansi to/from String
1736  if ZCompatibleCodePages(ZDefaultSystemCodePage, ConSettings^.CTRL_CP) then
1737  begin
1738  ConSettings^.ConvFuncs.ZAnsiToString := @ZMoveAnsiToString;
1739  if ConSettings^.AutoEncode then
1740  ConSettings^.ConvFuncs.ZStringToAnsi := @ZConvertStringToAnsiWithAutoEncode
1741  else
1742  ConSettings^.ConvFuncs.ZStringToAnsi := @ZMoveStringToAnsi;
1743  end
1744  else
1745  begin
1746  ConSettings^.ConvFuncs.ZAnsiToString := @ZConvertAnsiToString;
1747  if ConSettings^.AutoEncode then
1748  ConSettings^.ConvFuncs.ZStringToAnsi := @ZConvertStringToAnsiWithAutoEncode
1749  else
1750  ConSettings^.ConvFuncs.ZStringToAnsi := @ZConvertStringToAnsi;
1751  end;
1752 
1753  if ConSettings^.ClientCodePage^.IsStringFieldCPConsistent then
1754  begin
1755  // raw to/from UTF8
1756  if ZCompatibleCodePages(ConSettings^.ClientCodePage^.CP, zCP_UTF8) then
1757  begin
1758  ConSettings^.ConvFuncs.ZRawToUTF8 := @ZMoveRawToUTF8;
1759  ConSettings^.ConvFuncs.ZUTF8ToRaw := @ZMoveUTF8ToRaw;
1760  end
1761  else
1762  begin
1763  ConSettings^.ConvFuncs.ZRawToUTF8 := @ZConvertRawToUTF8;
1764  ConSettings^.ConvFuncs.ZUTF8ToRaw := @ZConvertUTF8ToRaw;
1765  end;
1766 
1767  // raw to/from ansi
1768  if ZCompatibleCodePages(ConSettings^.ClientCodePage^.CP, ZDefaultSystemCodePage) then
1769  begin
1770  ConSettings^.ConvFuncs.ZAnsiToRaw := @ZMoveAnsiToRaw;
1771  ConSettings^.ConvFuncs.ZRawToAnsi := @ZMoveRawToAnsi;
1772  end
1773  else
1774  begin
1775  ConSettings^.ConvFuncs.ZAnsiToRaw := @ZConvertAnsiToRaw;
1776  ConSettings^.ConvFuncs.ZRawToAnsi := @ZConvertRawToAnsi;
1777  end;
1778 
1779  // raw to/from unicode
1780  if ConSettings^.ClientCodePage^.CP = zCP_NONE then
1781  begin
1782  if ConSettings^.AutoEncode then
1783  ConSettings^.ConvFuncs.ZRawToUnicode := @ZUnknownRawToUnicodeWithAutoEncode
1784  else
1785  ConSettings^.ConvFuncs.ZRawToUnicode := @ZUnknownRawToUnicode;
1786  ConSettings^.ConvFuncs.ZUnicodeToRaw := @ZUnicodeToUnknownRaw;
1787  end
1788  else
1789  begin
1790  ConSettings^.ConvFuncs.ZRawToUnicode := @ZRawToUnicode;
1791  ConSettings^.ConvFuncs.ZUnicodeToRaw := @ZUnicodeToRaw;
1792  end;
1793 
1794  //last but not least the String to/from converters
1795  //string represents the DataSet/IZResultSet Strings
1796  if ZCompatibleCodePages(ConSettings^.CTRL_CP, zCP_UTF8) then
1797  begin
1798  ConSettings^.ConvFuncs.ZUTF8ToString := @ZMoveUTF8ToString;
1799  if ConSettings^.AutoEncode then
1800  ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8WithAutoEncode
1801  else
1802  ConSettings^.ConvFuncs.ZStringToUTF8 := @ZMoveStringToUTF8;
1803  end
1804  else
1805  begin
1806  ConSettings^.ConvFuncs.ZUTF8ToString := @ZConvertUTF8ToString;
1807  if ConSettings^.AutoEncode then
1808  ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8WithAutoEncode
1809  else
1810  ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8
1811  end;
1812 
1813  {$IFDEF UNICODE}
1814  Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRaw;
1815  Consettings^.ConvFuncs.ZRawToString := @ZConvertRawToString;
1816 
1817  ConSettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString;
1818  Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicode;
1819  {$ELSE}
1820  {String To/From Raw}
1821  if ZCompatibleCodePages(ConSettings^.ClientCodePage^.CP, ConSettings^.CTRL_CP) then
1822  begin
1823  Consettings^.ConvFuncs.ZRawToString := @ZMoveRawToString;
1824  if ConSettings^.AutoEncode then
1825  Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRawWithAutoEncode
1826  else
1827  Consettings^.ConvFuncs.ZStringToRaw := @ZMoveStringToRaw;
1828  end
1829  else
1830  if ConSettings^.AutoEncode then
1831  begin
1832  Consettings^.ConvFuncs.ZRawToString := @ZConvertRawToString;
1833  Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRawWithAutoEncode;
1834  end
1835  else
1836  begin
1837  Consettings^.ConvFuncs.ZStringToRaw := @ZMoveStringToRaw;
1838  Consettings^.ConvFuncs.ZRawToString := @ZMoveRawToString;
1839  end;
1840 
1841  {String To/From Unicode}
1842  if ConSettings^.CTRL_CP = zCP_UTF8 then
1843  Consettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString_CPUTF8
1844  else
1845  Consettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString;
1846 
1847  if ConSettings^.AutoEncode then
1848  Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicodeWithAutoEncode
1849  else
1850  if ConSettings^.CTRL_CP = zCP_UTF8 then
1851  Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertString_CPUTF8ToUnicode
1852  else
1853  Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicode;
1854  {$ENDIF}
1855  end
1856  else //autoencode strings is allways true
1857  begin
1858  ConSettings^.ConvFuncs.ZUTF8ToString := @ZConvertUTF8ToString;
1859  ConSettings^.ConvFuncs.ZStringToUTF8 := @ZConvertStringToUTF8WithAutoEncode;
1860  ConSettings^.ConvFuncs.ZAnsiToRaw := @ZConvertAnsiToRaw;
1861  ConSettings^.ConvFuncs.ZRawToAnsi := @ZConvertRawToAnsi;
1862  ConSettings^.ConvFuncs.ZRawToUTF8 := @ZConvertRawToUTF8;
1863  ConSettings^.ConvFuncs.ZUTF8ToRaw := @ZConvertUTF8ToRaw;
1864  Consettings^.ConvFuncs.ZStringToRaw := @ZConvertStringToRawWithAutoEncode;
1865  Consettings^.ConvFuncs.ZRawToString := @ZConvertRawToString;
1866  Consettings^.ConvFuncs.ZUnicodeToRaw := @ZUnicodeToRaw;
1867  Consettings^.ConvFuncs.ZRawToUnicode := @ZRawToUnicode;
1868  ConSettings^.ConvFuncs.ZUnicodeToString := @ZConvertUnicodeToString;
1869  Consettings^.ConvFuncs.ZStringToUnicode := @ZConvertStringToUnicodeWithAutoEncode;
1870  end;
1871 end;
1872 
1873 end.
1874