869 lines
23 KiB
C
869 lines
23 KiB
C
|
|
// CkCharsetW.h: interface for the CkCharsetW class.
|
||
|
|
//
|
||
|
|
//////////////////////////////////////////////////////////////////////
|
||
|
|
|
||
|
|
// This header is generated for Chilkat 11.3.0
|
||
|
|
|
||
|
|
#ifndef _CkCharsetW_H
|
||
|
|
#define _CkCharsetW_H
|
||
|
|
|
||
|
|
#include "chilkatDefs.h"
|
||
|
|
#include "CkString.h"
|
||
|
|
#include "CkWideCharBase.h"
|
||
|
|
|
||
|
|
class CkByteData;
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
#if !defined(__sun__) && !defined(__sun)
|
||
|
|
#pragma pack (push, 8)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
|
||
|
|
// CLASS: CkCharsetW
|
||
|
|
class CK_VISIBLE_PUBLIC CkCharsetW : public CkWideCharBase
|
||
|
|
{
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
|
||
|
|
// Don't allow assignment or copying these objects.
|
||
|
|
CkCharsetW(const CkCharsetW &);
|
||
|
|
CkCharsetW &operator=(const CkCharsetW &);
|
||
|
|
|
||
|
|
public:
|
||
|
|
CkCharsetW(void);
|
||
|
|
virtual ~CkCharsetW(void);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
static CkCharsetW *createNew(void);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
void CK_VISIBLE_PRIVATE inject(void *impl);
|
||
|
|
|
||
|
|
// May be called when finished with the object to free/dispose of any
|
||
|
|
// internal resources held by the object.
|
||
|
|
void dispose(void);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
// BEGIN PUBLIC INTERFACE
|
||
|
|
|
||
|
|
// ----------------------
|
||
|
|
// Properties
|
||
|
|
// ----------------------
|
||
|
|
// If the ErrorAction property is set to 6, then this property controls how errors
|
||
|
|
// are handled. It specifies an alternate To charset. When a character in the input
|
||
|
|
// data cannot be converted to the target charset, an attempt is made to convert it
|
||
|
|
// to the AltToCharset. If that fails, the input character is dropped.
|
||
|
|
void get_AltToCharset(CkString &str);
|
||
|
|
// If the ErrorAction property is set to 6, then this property controls how errors
|
||
|
|
// are handled. It specifies an alternate To charset. When a character in the input
|
||
|
|
// data cannot be converted to the target charset, an attempt is made to convert it
|
||
|
|
// to the AltToCharset. If that fails, the input character is dropped.
|
||
|
|
const wchar_t *altToCharset(void);
|
||
|
|
// If the ErrorAction property is set to 6, then this property controls how errors
|
||
|
|
// are handled. It specifies an alternate To charset. When a character in the input
|
||
|
|
// data cannot be converted to the target charset, an attempt is made to convert it
|
||
|
|
// to the AltToCharset. If that fails, the input character is dropped.
|
||
|
|
void put_AltToCharset(const wchar_t *newVal);
|
||
|
|
|
||
|
|
// Controls how errors are handled. When a character in the input data cannot be
|
||
|
|
// converted to the target charset, the action taken is controlled by this
|
||
|
|
// property. The possible settings are: (0) drop the error characters, (1)
|
||
|
|
// substitute the data set by the SetErrorString method, (2) convert to a
|
||
|
|
// hex-escaped string (
|
||
|
|
int get_ErrorAction(void);
|
||
|
|
// Controls how errors are handled. When a character in the input data cannot be
|
||
|
|
// converted to the target charset, the action taken is controlled by this
|
||
|
|
// property. The possible settings are: (0) drop the error characters, (1)
|
||
|
|
// substitute the data set by the SetErrorString method, (2) convert to a
|
||
|
|
// hex-escaped string (
|
||
|
|
void put_ErrorAction(int newVal);
|
||
|
|
|
||
|
|
// Tells the charset converter the charset of the input data for a conversion.
|
||
|
|
// Possible values are:2 face=MS Sans Serif>
|
||
|
|
// us-ascii
|
||
|
|
// unicode (also known as UTF16LE or simply UTF16)
|
||
|
|
// unicodefffe (also known as UTF16BE)
|
||
|
|
// ebcdic
|
||
|
|
// iso-8859-1
|
||
|
|
// iso-8859-2
|
||
|
|
// iso-8859-3
|
||
|
|
// iso-8859-4
|
||
|
|
// iso-8859-5
|
||
|
|
// iso-8859-6
|
||
|
|
// iso-8859-7
|
||
|
|
// iso-8859-8
|
||
|
|
// iso-8859-9
|
||
|
|
// iso-8859-13
|
||
|
|
// iso-8859-15
|
||
|
|
// windows-874
|
||
|
|
// windows-1250
|
||
|
|
// windows-1251
|
||
|
|
// windows-1252
|
||
|
|
// windows-1253
|
||
|
|
// windows-1254
|
||
|
|
// windows-1255
|
||
|
|
// windows-1256
|
||
|
|
// windows-1257
|
||
|
|
// windows-1258
|
||
|
|
// utf-7
|
||
|
|
// utf-8
|
||
|
|
// utf-32
|
||
|
|
// utf-32be
|
||
|
|
// shift_jis
|
||
|
|
// gb2312
|
||
|
|
// ks_c_5601-1987
|
||
|
|
// big5
|
||
|
|
// iso-2022-jp
|
||
|
|
// iso-2022-kr
|
||
|
|
// euc-jp
|
||
|
|
// euc-kr
|
||
|
|
// macintosh
|
||
|
|
// x-mac-japanese
|
||
|
|
// x-mac-chinesetrad
|
||
|
|
// x-mac-korean
|
||
|
|
// x-mac-arabic
|
||
|
|
// x-mac-hebrew
|
||
|
|
// x-mac-greek
|
||
|
|
// x-mac-cyrillic
|
||
|
|
// x-mac-chinesesimp
|
||
|
|
// x-mac-romanian
|
||
|
|
// x-mac-ukrainian
|
||
|
|
// x-mac-thai
|
||
|
|
// x-mac-ce
|
||
|
|
// x-mac-icelandic
|
||
|
|
// x-mac-turkish
|
||
|
|
// x-mac-croatian
|
||
|
|
// asmo-708
|
||
|
|
// dos-720
|
||
|
|
// dos-862
|
||
|
|
// ibm01140
|
||
|
|
// ibm01141
|
||
|
|
// ibm01142
|
||
|
|
// ibm01143
|
||
|
|
// ibm01144
|
||
|
|
// ibm01145
|
||
|
|
// ibm01146
|
||
|
|
// ibm01147
|
||
|
|
// ibm01148
|
||
|
|
// ibm01149
|
||
|
|
// ibm037
|
||
|
|
// ibm437
|
||
|
|
// ibm500
|
||
|
|
// ibm737
|
||
|
|
// ibm775
|
||
|
|
// ibm850
|
||
|
|
// ibm852
|
||
|
|
// ibm855
|
||
|
|
// ibm857
|
||
|
|
// ibm00858
|
||
|
|
// ibm860
|
||
|
|
// ibm861
|
||
|
|
// ibm863
|
||
|
|
// ibm864
|
||
|
|
// ibm865
|
||
|
|
// cp866
|
||
|
|
// ibm869
|
||
|
|
// ibm870
|
||
|
|
// cp875
|
||
|
|
// koi8-r
|
||
|
|
// koi8-u
|
||
|
|
void get_FromCharset(CkString &str);
|
||
|
|
// Tells the charset converter the charset of the input data for a conversion.
|
||
|
|
// Possible values are:2 face=MS Sans Serif>
|
||
|
|
// us-ascii
|
||
|
|
// unicode (also known as UTF16LE or simply UTF16)
|
||
|
|
// unicodefffe (also known as UTF16BE)
|
||
|
|
// ebcdic
|
||
|
|
// iso-8859-1
|
||
|
|
// iso-8859-2
|
||
|
|
// iso-8859-3
|
||
|
|
// iso-8859-4
|
||
|
|
// iso-8859-5
|
||
|
|
// iso-8859-6
|
||
|
|
// iso-8859-7
|
||
|
|
// iso-8859-8
|
||
|
|
// iso-8859-9
|
||
|
|
// iso-8859-13
|
||
|
|
// iso-8859-15
|
||
|
|
// windows-874
|
||
|
|
// windows-1250
|
||
|
|
// windows-1251
|
||
|
|
// windows-1252
|
||
|
|
// windows-1253
|
||
|
|
// windows-1254
|
||
|
|
// windows-1255
|
||
|
|
// windows-1256
|
||
|
|
// windows-1257
|
||
|
|
// windows-1258
|
||
|
|
// utf-7
|
||
|
|
// utf-8
|
||
|
|
// utf-32
|
||
|
|
// utf-32be
|
||
|
|
// shift_jis
|
||
|
|
// gb2312
|
||
|
|
// ks_c_5601-1987
|
||
|
|
// big5
|
||
|
|
// iso-2022-jp
|
||
|
|
// iso-2022-kr
|
||
|
|
// euc-jp
|
||
|
|
// euc-kr
|
||
|
|
// macintosh
|
||
|
|
// x-mac-japanese
|
||
|
|
// x-mac-chinesetrad
|
||
|
|
// x-mac-korean
|
||
|
|
// x-mac-arabic
|
||
|
|
// x-mac-hebrew
|
||
|
|
// x-mac-greek
|
||
|
|
// x-mac-cyrillic
|
||
|
|
// x-mac-chinesesimp
|
||
|
|
// x-mac-romanian
|
||
|
|
// x-mac-ukrainian
|
||
|
|
// x-mac-thai
|
||
|
|
// x-mac-ce
|
||
|
|
// x-mac-icelandic
|
||
|
|
// x-mac-turkish
|
||
|
|
// x-mac-croatian
|
||
|
|
// asmo-708
|
||
|
|
// dos-720
|
||
|
|
// dos-862
|
||
|
|
// ibm01140
|
||
|
|
// ibm01141
|
||
|
|
// ibm01142
|
||
|
|
// ibm01143
|
||
|
|
// ibm01144
|
||
|
|
// ibm01145
|
||
|
|
// ibm01146
|
||
|
|
// ibm01147
|
||
|
|
// ibm01148
|
||
|
|
// ibm01149
|
||
|
|
// ibm037
|
||
|
|
// ibm437
|
||
|
|
// ibm500
|
||
|
|
// ibm737
|
||
|
|
// ibm775
|
||
|
|
// ibm850
|
||
|
|
// ibm852
|
||
|
|
// ibm855
|
||
|
|
// ibm857
|
||
|
|
// ibm00858
|
||
|
|
// ibm860
|
||
|
|
// ibm861
|
||
|
|
// ibm863
|
||
|
|
// ibm864
|
||
|
|
// ibm865
|
||
|
|
// cp866
|
||
|
|
// ibm869
|
||
|
|
// ibm870
|
||
|
|
// cp875
|
||
|
|
// koi8-r
|
||
|
|
// koi8-u
|
||
|
|
const wchar_t *fromCharset(void);
|
||
|
|
// Tells the charset converter the charset of the input data for a conversion.
|
||
|
|
// Possible values are:2 face=MS Sans Serif>
|
||
|
|
// us-ascii
|
||
|
|
// unicode (also known as UTF16LE or simply UTF16)
|
||
|
|
// unicodefffe (also known as UTF16BE)
|
||
|
|
// ebcdic
|
||
|
|
// iso-8859-1
|
||
|
|
// iso-8859-2
|
||
|
|
// iso-8859-3
|
||
|
|
// iso-8859-4
|
||
|
|
// iso-8859-5
|
||
|
|
// iso-8859-6
|
||
|
|
// iso-8859-7
|
||
|
|
// iso-8859-8
|
||
|
|
// iso-8859-9
|
||
|
|
// iso-8859-13
|
||
|
|
// iso-8859-15
|
||
|
|
// windows-874
|
||
|
|
// windows-1250
|
||
|
|
// windows-1251
|
||
|
|
// windows-1252
|
||
|
|
// windows-1253
|
||
|
|
// windows-1254
|
||
|
|
// windows-1255
|
||
|
|
// windows-1256
|
||
|
|
// windows-1257
|
||
|
|
// windows-1258
|
||
|
|
// utf-7
|
||
|
|
// utf-8
|
||
|
|
// utf-32
|
||
|
|
// utf-32be
|
||
|
|
// shift_jis
|
||
|
|
// gb2312
|
||
|
|
// ks_c_5601-1987
|
||
|
|
// big5
|
||
|
|
// iso-2022-jp
|
||
|
|
// iso-2022-kr
|
||
|
|
// euc-jp
|
||
|
|
// euc-kr
|
||
|
|
// macintosh
|
||
|
|
// x-mac-japanese
|
||
|
|
// x-mac-chinesetrad
|
||
|
|
// x-mac-korean
|
||
|
|
// x-mac-arabic
|
||
|
|
// x-mac-hebrew
|
||
|
|
// x-mac-greek
|
||
|
|
// x-mac-cyrillic
|
||
|
|
// x-mac-chinesesimp
|
||
|
|
// x-mac-romanian
|
||
|
|
// x-mac-ukrainian
|
||
|
|
// x-mac-thai
|
||
|
|
// x-mac-ce
|
||
|
|
// x-mac-icelandic
|
||
|
|
// x-mac-turkish
|
||
|
|
// x-mac-croatian
|
||
|
|
// asmo-708
|
||
|
|
// dos-720
|
||
|
|
// dos-862
|
||
|
|
// ibm01140
|
||
|
|
// ibm01141
|
||
|
|
// ibm01142
|
||
|
|
// ibm01143
|
||
|
|
// ibm01144
|
||
|
|
// ibm01145
|
||
|
|
// ibm01146
|
||
|
|
// ibm01147
|
||
|
|
// ibm01148
|
||
|
|
// ibm01149
|
||
|
|
// ibm037
|
||
|
|
// ibm437
|
||
|
|
// ibm500
|
||
|
|
// ibm737
|
||
|
|
// ibm775
|
||
|
|
// ibm850
|
||
|
|
// ibm852
|
||
|
|
// ibm855
|
||
|
|
// ibm857
|
||
|
|
// ibm00858
|
||
|
|
// ibm860
|
||
|
|
// ibm861
|
||
|
|
// ibm863
|
||
|
|
// ibm864
|
||
|
|
// ibm865
|
||
|
|
// cp866
|
||
|
|
// ibm869
|
||
|
|
// ibm870
|
||
|
|
// cp875
|
||
|
|
// koi8-r
|
||
|
|
// koi8-u
|
||
|
|
void put_FromCharset(const wchar_t *newVal);
|
||
|
|
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last input data in a
|
||
|
|
// hexidecimalized string.
|
||
|
|
void get_LastInputAsHex(CkString &str);
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last input data in a
|
||
|
|
// hexidecimalized string.
|
||
|
|
const wchar_t *lastInputAsHex(void);
|
||
|
|
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last input data in a
|
||
|
|
// quoted-printable string.
|
||
|
|
void get_LastInputAsQP(CkString &str);
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last input data in a
|
||
|
|
// quoted-printable string.
|
||
|
|
const wchar_t *lastInputAsQP(void);
|
||
|
|
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last output data in a
|
||
|
|
// hexidecimalized string.
|
||
|
|
void get_LastOutputAsHex(CkString &str);
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last output data in a
|
||
|
|
// hexidecimalized string.
|
||
|
|
const wchar_t *lastOutputAsHex(void);
|
||
|
|
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last output data in a
|
||
|
|
// quoted-printable string.
|
||
|
|
void get_LastOutputAsQP(CkString &str);
|
||
|
|
// If SaveLast is set to true, then the input and output of a conversion is saved
|
||
|
|
// to allow the exact bytes that are sent to the converter to be seen (for
|
||
|
|
// debugging purposes). This property shows the last output data in a
|
||
|
|
// quoted-printable string.
|
||
|
|
const wchar_t *lastOutputAsQP(void);
|
||
|
|
|
||
|
|
// Tells the component to keep the input/output byte data in memory after a
|
||
|
|
// conversion is complete so the data can be examined via the LastInputAsHex/QP and
|
||
|
|
// LastOutputAsHex/QP properties. (for debugging purposes)
|
||
|
|
bool get_SaveLast(void);
|
||
|
|
// Tells the component to keep the input/output byte data in memory after a
|
||
|
|
// conversion is complete so the data can be examined via the LastInputAsHex/QP and
|
||
|
|
// LastOutputAsHex/QP properties. (for debugging purposes)
|
||
|
|
void put_SaveLast(bool newVal);
|
||
|
|
|
||
|
|
// Tells the charset converter the target charset for a conversion. Possible values
|
||
|
|
// are:2 face=MS Sans Serif>
|
||
|
|
// us-ascii
|
||
|
|
// unicode (also known as UTF16LE or simply UTF16)
|
||
|
|
// unicodefffe (also known as UTF16BE)
|
||
|
|
// ebcdic
|
||
|
|
// iso-8859-1
|
||
|
|
// iso-8859-2
|
||
|
|
// iso-8859-3
|
||
|
|
// iso-8859-4
|
||
|
|
// iso-8859-5
|
||
|
|
// iso-8859-6
|
||
|
|
// iso-8859-7
|
||
|
|
// iso-8859-8
|
||
|
|
// iso-8859-9
|
||
|
|
// iso-8859-13
|
||
|
|
// iso-8859-15
|
||
|
|
// windows-874
|
||
|
|
// windows-1250
|
||
|
|
// windows-1251
|
||
|
|
// windows-1252
|
||
|
|
// windows-1253
|
||
|
|
// windows-1254
|
||
|
|
// windows-1255
|
||
|
|
// windows-1256
|
||
|
|
// windows-1257
|
||
|
|
// windows-1258
|
||
|
|
// utf-7
|
||
|
|
// utf-8
|
||
|
|
// utf-32
|
||
|
|
// utf-32be
|
||
|
|
// shift_jis
|
||
|
|
// gb2312
|
||
|
|
// ks_c_5601-1987
|
||
|
|
// big5
|
||
|
|
// iso-2022-jp
|
||
|
|
// iso-2022-kr
|
||
|
|
// euc-jp
|
||
|
|
// euc-kr
|
||
|
|
// macintosh
|
||
|
|
// x-mac-japanese
|
||
|
|
// x-mac-chinesetrad
|
||
|
|
// x-mac-korean
|
||
|
|
// x-mac-arabic
|
||
|
|
// x-mac-hebrew
|
||
|
|
// x-mac-greek
|
||
|
|
// x-mac-cyrillic
|
||
|
|
// x-mac-chinesesimp
|
||
|
|
// x-mac-romanian
|
||
|
|
// x-mac-ukrainian
|
||
|
|
// x-mac-thai
|
||
|
|
// x-mac-ce
|
||
|
|
// x-mac-icelandic
|
||
|
|
// x-mac-turkish
|
||
|
|
// x-mac-croatian
|
||
|
|
// asmo-708
|
||
|
|
// dos-720
|
||
|
|
// dos-862
|
||
|
|
// ibm01140
|
||
|
|
// ibm01141
|
||
|
|
// ibm01142
|
||
|
|
// ibm01143
|
||
|
|
// ibm01144
|
||
|
|
// ibm01145
|
||
|
|
// ibm01146
|
||
|
|
// ibm01147
|
||
|
|
// ibm01148
|
||
|
|
// ibm01149
|
||
|
|
// ibm037
|
||
|
|
// ibm437
|
||
|
|
// ibm500
|
||
|
|
// ibm737
|
||
|
|
// ibm775
|
||
|
|
// ibm850
|
||
|
|
// ibm852
|
||
|
|
// ibm855
|
||
|
|
// ibm857
|
||
|
|
// ibm00858
|
||
|
|
// ibm860
|
||
|
|
// ibm861
|
||
|
|
// ibm863
|
||
|
|
// ibm864
|
||
|
|
// ibm865
|
||
|
|
// cp866
|
||
|
|
// ibm869
|
||
|
|
// ibm870
|
||
|
|
// cp875
|
||
|
|
// koi8-r
|
||
|
|
// koi8-u
|
||
|
|
void get_ToCharset(CkString &str);
|
||
|
|
// Tells the charset converter the target charset for a conversion. Possible values
|
||
|
|
// are:2 face=MS Sans Serif>
|
||
|
|
// us-ascii
|
||
|
|
// unicode (also known as UTF16LE or simply UTF16)
|
||
|
|
// unicodefffe (also known as UTF16BE)
|
||
|
|
// ebcdic
|
||
|
|
// iso-8859-1
|
||
|
|
// iso-8859-2
|
||
|
|
// iso-8859-3
|
||
|
|
// iso-8859-4
|
||
|
|
// iso-8859-5
|
||
|
|
// iso-8859-6
|
||
|
|
// iso-8859-7
|
||
|
|
// iso-8859-8
|
||
|
|
// iso-8859-9
|
||
|
|
// iso-8859-13
|
||
|
|
// iso-8859-15
|
||
|
|
// windows-874
|
||
|
|
// windows-1250
|
||
|
|
// windows-1251
|
||
|
|
// windows-1252
|
||
|
|
// windows-1253
|
||
|
|
// windows-1254
|
||
|
|
// windows-1255
|
||
|
|
// windows-1256
|
||
|
|
// windows-1257
|
||
|
|
// windows-1258
|
||
|
|
// utf-7
|
||
|
|
// utf-8
|
||
|
|
// utf-32
|
||
|
|
// utf-32be
|
||
|
|
// shift_jis
|
||
|
|
// gb2312
|
||
|
|
// ks_c_5601-1987
|
||
|
|
// big5
|
||
|
|
// iso-2022-jp
|
||
|
|
// iso-2022-kr
|
||
|
|
// euc-jp
|
||
|
|
// euc-kr
|
||
|
|
// macintosh
|
||
|
|
// x-mac-japanese
|
||
|
|
// x-mac-chinesetrad
|
||
|
|
// x-mac-korean
|
||
|
|
// x-mac-arabic
|
||
|
|
// x-mac-hebrew
|
||
|
|
// x-mac-greek
|
||
|
|
// x-mac-cyrillic
|
||
|
|
// x-mac-chinesesimp
|
||
|
|
// x-mac-romanian
|
||
|
|
// x-mac-ukrainian
|
||
|
|
// x-mac-thai
|
||
|
|
// x-mac-ce
|
||
|
|
// x-mac-icelandic
|
||
|
|
// x-mac-turkish
|
||
|
|
// x-mac-croatian
|
||
|
|
// asmo-708
|
||
|
|
// dos-720
|
||
|
|
// dos-862
|
||
|
|
// ibm01140
|
||
|
|
// ibm01141
|
||
|
|
// ibm01142
|
||
|
|
// ibm01143
|
||
|
|
// ibm01144
|
||
|
|
// ibm01145
|
||
|
|
// ibm01146
|
||
|
|
// ibm01147
|
||
|
|
// ibm01148
|
||
|
|
// ibm01149
|
||
|
|
// ibm037
|
||
|
|
// ibm437
|
||
|
|
// ibm500
|
||
|
|
// ibm737
|
||
|
|
// ibm775
|
||
|
|
// ibm850
|
||
|
|
// ibm852
|
||
|
|
// ibm855
|
||
|
|
// ibm857
|
||
|
|
// ibm00858
|
||
|
|
// ibm860
|
||
|
|
// ibm861
|
||
|
|
// ibm863
|
||
|
|
// ibm864
|
||
|
|
// ibm865
|
||
|
|
// cp866
|
||
|
|
// ibm869
|
||
|
|
// ibm870
|
||
|
|
// cp875
|
||
|
|
// koi8-r
|
||
|
|
// koi8-u
|
||
|
|
const wchar_t *toCharset(void);
|
||
|
|
// Tells the charset converter the target charset for a conversion. Possible values
|
||
|
|
// are:2 face=MS Sans Serif>
|
||
|
|
// us-ascii
|
||
|
|
// unicode (also known as UTF16LE or simply UTF16)
|
||
|
|
// unicodefffe (also known as UTF16BE)
|
||
|
|
// ebcdic
|
||
|
|
// iso-8859-1
|
||
|
|
// iso-8859-2
|
||
|
|
// iso-8859-3
|
||
|
|
// iso-8859-4
|
||
|
|
// iso-8859-5
|
||
|
|
// iso-8859-6
|
||
|
|
// iso-8859-7
|
||
|
|
// iso-8859-8
|
||
|
|
// iso-8859-9
|
||
|
|
// iso-8859-13
|
||
|
|
// iso-8859-15
|
||
|
|
// windows-874
|
||
|
|
// windows-1250
|
||
|
|
// windows-1251
|
||
|
|
// windows-1252
|
||
|
|
// windows-1253
|
||
|
|
// windows-1254
|
||
|
|
// windows-1255
|
||
|
|
// windows-1256
|
||
|
|
// windows-1257
|
||
|
|
// windows-1258
|
||
|
|
// utf-7
|
||
|
|
// utf-8
|
||
|
|
// utf-32
|
||
|
|
// utf-32be
|
||
|
|
// shift_jis
|
||
|
|
// gb2312
|
||
|
|
// ks_c_5601-1987
|
||
|
|
// big5
|
||
|
|
// iso-2022-jp
|
||
|
|
// iso-2022-kr
|
||
|
|
// euc-jp
|
||
|
|
// euc-kr
|
||
|
|
// macintosh
|
||
|
|
// x-mac-japanese
|
||
|
|
// x-mac-chinesetrad
|
||
|
|
// x-mac-korean
|
||
|
|
// x-mac-arabic
|
||
|
|
// x-mac-hebrew
|
||
|
|
// x-mac-greek
|
||
|
|
// x-mac-cyrillic
|
||
|
|
// x-mac-chinesesimp
|
||
|
|
// x-mac-romanian
|
||
|
|
// x-mac-ukrainian
|
||
|
|
// x-mac-thai
|
||
|
|
// x-mac-ce
|
||
|
|
// x-mac-icelandic
|
||
|
|
// x-mac-turkish
|
||
|
|
// x-mac-croatian
|
||
|
|
// asmo-708
|
||
|
|
// dos-720
|
||
|
|
// dos-862
|
||
|
|
// ibm01140
|
||
|
|
// ibm01141
|
||
|
|
// ibm01142
|
||
|
|
// ibm01143
|
||
|
|
// ibm01144
|
||
|
|
// ibm01145
|
||
|
|
// ibm01146
|
||
|
|
// ibm01147
|
||
|
|
// ibm01148
|
||
|
|
// ibm01149
|
||
|
|
// ibm037
|
||
|
|
// ibm437
|
||
|
|
// ibm500
|
||
|
|
// ibm737
|
||
|
|
// ibm775
|
||
|
|
// ibm850
|
||
|
|
// ibm852
|
||
|
|
// ibm855
|
||
|
|
// ibm857
|
||
|
|
// ibm00858
|
||
|
|
// ibm860
|
||
|
|
// ibm861
|
||
|
|
// ibm863
|
||
|
|
// ibm864
|
||
|
|
// ibm865
|
||
|
|
// cp866
|
||
|
|
// ibm869
|
||
|
|
// ibm870
|
||
|
|
// cp875
|
||
|
|
// koi8-r
|
||
|
|
// koi8-u
|
||
|
|
void put_ToCharset(const wchar_t *newVal);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
// ----------------------
|
||
|
|
// Methods
|
||
|
|
// ----------------------
|
||
|
|
// Converts a character set name, such as iso-8859-1, to its corresponding code
|
||
|
|
// page number, like 28591.
|
||
|
|
int CharsetToCodePage(const wchar_t *charsetName);
|
||
|
|
|
||
|
|
// Converts a code page number to a charset name. For example, 65001 converts to
|
||
|
|
// utf-8 .
|
||
|
|
bool CodePageToCharset(int codePage, CkString &outCharset);
|
||
|
|
// Converts a code page number to a charset name. For example, 65001 converts to
|
||
|
|
// utf-8 .
|
||
|
|
const wchar_t *codePageToCharset(int codePage);
|
||
|
|
|
||
|
|
// Converts character data from one charset to another. Before calling ConvertData,
|
||
|
|
// the FromCharset and ToCharset properties must be set to the source and
|
||
|
|
// destination charset names, such as iso-8859-1 or Shift_JIS .
|
||
|
|
bool ConvertData(CkByteData &inData, CkByteData &outData);
|
||
|
|
|
||
|
|
// Converts a file from one character encoding to another. The FromCharset and
|
||
|
|
// ToCharset properties specify the source and destination character encodings. If
|
||
|
|
// the ToCharset is utf-16 or utf-8, then the preamble (also known as BOM) is
|
||
|
|
// included in the output. (Call ConvertFileNoPreamble to suppress the output of
|
||
|
|
// the BOM.)
|
||
|
|
bool ConvertFile(const wchar_t *inPath, const wchar_t *destPath);
|
||
|
|
|
||
|
|
// Converts a file from one character encoding to another. The FromCharset and
|
||
|
|
// ToCharset properties specify the source and destination character encodings. No
|
||
|
|
// preamble (also known as BOM) is included in the output.
|
||
|
|
bool ConvertFileNoPreamble(const wchar_t *inPath, const wchar_t *destPath);
|
||
|
|
|
||
|
|
// Converts Unicode (utf-16) text to the charset specified by the ToCharset
|
||
|
|
// property.
|
||
|
|
bool ConvertFromUnicode(const wchar_t *inData, CkByteData &outBytes);
|
||
|
|
|
||
|
|
// Converts utf-16 text to the charset specified by the ToCharset property.
|
||
|
|
bool ConvertFromUtf16(CkByteData &uniData, CkByteData &outMbData);
|
||
|
|
|
||
|
|
// Converts HTML text from one character encoding to another. The FromCharset and
|
||
|
|
// ToCharset properties must be set prior to calling this method. This method
|
||
|
|
// automatically edits the META tag within the HTML that indicates the charset.
|
||
|
|
bool ConvertHtml(CkByteData &inData, CkByteData &outHtml);
|
||
|
|
|
||
|
|
// Converts an HTML file from one character encoding to another. The ToCharset
|
||
|
|
// properties must be set prior to calling this method. If the FromCharset is not
|
||
|
|
// set, it is obtained from the HTML META tag that indicates the charset. This
|
||
|
|
// method automatically edits the META tag within the HTML that indicates the
|
||
|
|
// charset.
|
||
|
|
bool ConvertHtmlFile(const wchar_t *inPath, const wchar_t *destPath);
|
||
|
|
|
||
|
|
// Converts multibyte character data to a Unicode string. The FromCharset property
|
||
|
|
// should be set before calling this method.
|
||
|
|
bool ConvertToUnicode(CkByteData &inData, CkString &outStr);
|
||
|
|
// Converts multibyte character data to a Unicode string. The FromCharset property
|
||
|
|
// should be set before calling this method.
|
||
|
|
const wchar_t *convertToUnicode(CkByteData &inData);
|
||
|
|
|
||
|
|
// Converts the mbData to utf-16 bytes.
|
||
|
|
bool ConvertToUtf16(CkByteData &mbData, CkByteData &outUniData);
|
||
|
|
|
||
|
|
// Converts non-US-ASCII characters to Unicode decimal entities (_AMP_#xxxxx;)
|
||
|
|
bool EntityEncodeDec(const wchar_t *str, CkString &outStr);
|
||
|
|
// Converts non-US-ASCII characters to Unicode decimal entities (_AMP_#xxxxx;)
|
||
|
|
const wchar_t *entityEncodeDec(const wchar_t *str);
|
||
|
|
|
||
|
|
// Converts non-US-ASCII characters to Unicode hex entities (_AMP_#xXXXX;)
|
||
|
|
bool EntityEncodeHex(const wchar_t *str, CkString &outStr);
|
||
|
|
// Converts non-US-ASCII characters to Unicode hex entities (_AMP_#xXXXX;)
|
||
|
|
const wchar_t *entityEncodeHex(const wchar_t *str);
|
||
|
|
|
||
|
|
// Examines HTML text and extracts the charset name specified by the META tag, if
|
||
|
|
// present.
|
||
|
|
bool GetHtmlCharset(CkByteData &inData, CkString &outCharset);
|
||
|
|
// Examines HTML text and extracts the charset name specified by the META tag, if
|
||
|
|
// present.
|
||
|
|
const wchar_t *getHtmlCharset(CkByteData &inData);
|
||
|
|
// Examines HTML text and extracts the charset name specified by the META tag, if
|
||
|
|
// present.
|
||
|
|
const wchar_t *htmlCharset(CkByteData &inData);
|
||
|
|
|
||
|
|
// Examines an HTML file and extracts the charset name specified by the META tag,
|
||
|
|
// if present.
|
||
|
|
bool GetHtmlFileCharset(const wchar_t *htmlFilePath, CkString &outCharset);
|
||
|
|
// Examines an HTML file and extracts the charset name specified by the META tag,
|
||
|
|
// if present.
|
||
|
|
const wchar_t *getHtmlFileCharset(const wchar_t *htmlFilePath);
|
||
|
|
// Examines an HTML file and extracts the charset name specified by the META tag,
|
||
|
|
// if present.
|
||
|
|
const wchar_t *htmlFileCharset(const wchar_t *htmlFilePath);
|
||
|
|
|
||
|
|
// Converts HTML entities to Unicode characters.
|
||
|
|
bool HtmlDecodeToStr(const wchar_t *inStr, CkString &outStr);
|
||
|
|
// Converts HTML entities to Unicode characters.
|
||
|
|
const wchar_t *htmlDecodeToStr(const wchar_t *inStr);
|
||
|
|
|
||
|
|
// Decodes HTML entities. See http://www.w3.org/TR/REC-html40/sgml/entities.html
|
||
|
|
// for information on HTML entities. Examples of HTML entities are _AMP_lt; ,
|
||
|
|
// _AMP_#229; , _AMP_#xE5; , _AMP_#x6C34; , _AMP_Iacute; , etc.
|
||
|
|
bool HtmlEntityDecode(CkByteData &inHtml, CkByteData &outData);
|
||
|
|
|
||
|
|
// Decodes HTML entities in a file and creates a new HTML file with the entities
|
||
|
|
// decoded. See http://www.w3.org/TR/REC-html40/sgml/entities.html for information
|
||
|
|
// on HTML entities. Examples of HTML entities are _AMP_lt; , _AMP_#229; ,
|
||
|
|
// _AMP_#xE5; , _AMP_#x6C34; , _AMP_Iacute; , etc.
|
||
|
|
bool HtmlEntityDecodeFile(const wchar_t *inPath, const wchar_t *destPath);
|
||
|
|
|
||
|
|
// Converts a string to lowercase.
|
||
|
|
bool LowerCase(const wchar_t *inStr, CkString &outStr);
|
||
|
|
// Converts a string to lowercase.
|
||
|
|
const wchar_t *lowerCase(const wchar_t *inStr);
|
||
|
|
|
||
|
|
// Convenience method for reading the entire contents of a file into a byte array.
|
||
|
|
bool ReadFile(const wchar_t *path, CkByteData &outData);
|
||
|
|
|
||
|
|
// Reads a text file and returns the text converted to a Unicode string. The
|
||
|
|
// filename is specified by the first method argument, and the charset of the text
|
||
|
|
// data is specified by the 2nd method argument.
|
||
|
|
bool ReadFileToString(const wchar_t *path, const wchar_t *charset, CkString &outStr);
|
||
|
|
// Reads a text file and returns the text converted to a Unicode string. The
|
||
|
|
// filename is specified by the first method argument, and the charset of the text
|
||
|
|
// data is specified by the 2nd method argument.
|
||
|
|
const wchar_t *readFileToString(const wchar_t *path, const wchar_t *charset);
|
||
|
|
|
||
|
|
// If the ErrorAction property is set to 1, the string passed to this method is
|
||
|
|
// used as the result for any characters that cannot be converted during a
|
||
|
|
// conversion.
|
||
|
|
void SetErrorString(const wchar_t *str, const wchar_t *charset);
|
||
|
|
|
||
|
|
// Converts a string to uppercase.
|
||
|
|
bool UpperCase(const wchar_t *inStr, CkString &outStr);
|
||
|
|
// Converts a string to uppercase.
|
||
|
|
const wchar_t *upperCase(const wchar_t *inStr);
|
||
|
|
|
||
|
|
// URL decodes a string.
|
||
|
|
//
|
||
|
|
// This method assumes the ANSI byte representation was used for encoding. For
|
||
|
|
// example, the letter É, where the ANSI charset is Latin-1, would appear as %C9
|
||
|
|
// when URL encoded. (As opposed to the utf-8 encoding where É is represented by 2
|
||
|
|
// bytes 0xC3, 0x89 and would appear as %C3%89 when URl encoded. This method
|
||
|
|
// assumes ANSI encoding.
|
||
|
|
//
|
||
|
|
// You can use encoding/decoding methods in Chilkat's StringBuilder class to
|
||
|
|
// specify the charset from which to decode or encode.
|
||
|
|
//
|
||
|
|
bool UrlDecodeStr(const wchar_t *inStr, CkString &outStr);
|
||
|
|
// URL decodes a string.
|
||
|
|
//
|
||
|
|
// This method assumes the ANSI byte representation was used for encoding. For
|
||
|
|
// example, the letter É, where the ANSI charset is Latin-1, would appear as %C9
|
||
|
|
// when URL encoded. (As opposed to the utf-8 encoding where É is represented by 2
|
||
|
|
// bytes 0xC3, 0x89 and would appear as %C3%89 when URl encoded. This method
|
||
|
|
// assumes ANSI encoding.
|
||
|
|
//
|
||
|
|
// You can use encoding/decoding methods in Chilkat's StringBuilder class to
|
||
|
|
// specify the charset from which to decode or encode.
|
||
|
|
//
|
||
|
|
const wchar_t *urlDecodeStr(const wchar_t *inStr);
|
||
|
|
|
||
|
|
// Returns true if the byte data conforms to the charset passed in the first
|
||
|
|
// argument.
|
||
|
|
bool VerifyData(const wchar_t *charset, CkByteData &inData);
|
||
|
|
|
||
|
|
// Returns true if the file contains character data that conforms to the charset
|
||
|
|
// passed in the 1st argument.
|
||
|
|
bool VerifyFile(const wchar_t *charset, const wchar_t *path);
|
||
|
|
|
||
|
|
// Convenience method for saving an entire byte array to a file.
|
||
|
|
bool WriteFile(const wchar_t *path, CkByteData &byteData);
|
||
|
|
|
||
|
|
// Converts a Unicode string to a multibyte charset and writes the multibyte text
|
||
|
|
// data to a file. The destination charset is specified in the 2nd method argument.
|
||
|
|
bool WriteStringToFile(const wchar_t *textData, const wchar_t *path, const wchar_t *charset);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
// END PUBLIC INTERFACE
|
||
|
|
|
||
|
|
|
||
|
|
};
|
||
|
|
#if !defined(__sun__) && !defined(__sun)
|
||
|
|
#pragma pack (pop)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#endif
|