#include <vtkStringConverter.h>
Several funtions are included here, forming a complete set of conversions between the three formats. Conversion between UTF-7 and UCS-2 is also included here.
Each of these routines takes pointers to input buffers and output buffers. The input buffers are const.
Each routine converts the text between *sourceStart and sourceEnd, putting the result into the buffer between *targetStart and targetEnd. Note: the end pointers are *after* the last item: e.g. *(sourceEnd - 1) is the last item.
The return result indicates whether the conversion was successful, and if not, whether the problem was in the source or target buffers. (Only the first encountered problem is indicated.)
After the conversion, *sourceStart and *targetStart are both updated to point to the end of last text successfully converted in the respective buffers.
Input parameters: sourceStart - pointer to a pointer to the source buffer. The contents of this are modified on return so that it points at the next thing to be converted. targetStart - similarly, pointer to pointer to the target buffer. sourceEnd, targetEnd - respectively pointers to the ends of the two buffers, for overflow checking only.
These conversion functions take a ConversionFlags argument. When this flag is set to strict, both irregular sequences and isolated surrogates will cause an error. When the flag is set to lenient, both irregular sequences and isolated surrogates are converted.
Whether the flag is strict or lenient, all illegal sequences will cause an error return. This includes sequences such as: {F4 90 80 80}, {C0 80}, or {A0} in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code must check for illegal sequences.
When the flag is set to lenient, characters over 0x10FFFF are converted to the replacement character; otherwise (when the flag is set to strict) they constitute an error.
Output parameters: The value "eSourceIllegal" is returned from some routines if the input sequence is malformed. When "eSourceIllegal" is returned, the source value will point to the illegal value that caused the problem. E.g., in UTF-8 when a sequence is malformed, it points to the start of the malformed sequence.
Definition at line 142 of file vtkStringConverter.h.
Public Types | |
enum | ConversionResult { eConversionOK = 0, eSourceExhausted, eTargetExhausted, eSourceIllegal, eSourceCorrupt, eInvalidEncoding } |
enum | ConversionFlags { eStrictConversion = 0, eLenientConversion } |
enum | EncodingEnum { eUnknownEncoding = 0, eIso8859_1 = 1, eIso8859_2 = 2, eIso8859_3 = 3, eIso8859_4 = 4, eIso8859_5 = 5, eIso8859_6 = 6, eIso8859_7 = 7, eIso8859_8 = 8, eIso8859_9 = 9, eIso8859_10 = 10, eIso8859_11 = 11, eIso8859_12 = eUnknownEncoding, eIso8859_13 = 13, eIso8859_14 = 14, eIso8859_15 = 15, eIso8859_16 = 16, eLatin1 = eIso8859_1, eLatin2 = eIso8859_2, eLatin3 = eIso8859_3, eLatin4 = eIso8859_4, eCyrillic = eIso8859_5, eArabic = eIso8859_6, eGreek = eIso8859_7, eHebrew = eIso8859_8, eLatin5 = eIso8859_9, eLatin6 = eIso8859_10, eThai = eIso8859_11, eLatin7 = eIso8859_13, eLatin8 = eIso8859_14, eLatin9 = eIso8859_15, eLatin10 = eIso8859_16, eUCS4, eUCS2, eUTF8, eUTF16, eUTF32 } |
Static Public Member Functions | |
int | SizeofUTF8 (const vtkUTF16ChType *aSrc, const vtkUTF16ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-8 data converted from UTF-16. | |
int | SizeofUTF8 (const vtkUTF32ChType *aSrc, const vtkUTF32ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-8 data converted from UTF-32. | |
int | SizeofUTF8 (const char *aSrc, const char *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-8 data converted from Latin-1. | |
int | SizeofUTF8fromUCS4 (const vtkUCS4ChType *aSrc, const vtkUCS4ChType *aEnd, ConversionFlags aFlags) THROW_SPEC() |
int | SizeofUTF16 (const vtkUTF32ChType *aSrc, const vtkUTF32ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-16 data converted from UTF-32. | |
int | SizeofUTF16 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-16 data converted from UTF-8. | |
int | SizeofUTF16 (const char *aSrc, const char *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-16 data converted from Latin-1. | |
int | SizeofUTF32 (const vtkUTF16ChType *aSrc, const vtkUTF16ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-32 data converted from UTF-16. | |
int | SizeofUTF32 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold UTF-32 data converted from UTF-8. | |
int | SizeofUCS4 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | SizeofUCS4 (const char *aSrc, const char *aEnd, EncodingEnum=eLatin1) THROW_SPEC() |
int | SizeofLatin1 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold Latin-1 data converted from UTF-8. | |
int | SizeofLatin1 (const vtkUTF16ChType *aSrc, const vtkUTF16ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Determines buffer size needed to hold Latin-1 data converted from UTF-16. | |
int | ConvertUTF8toUTF16 (const vtkUTF8ChType **srcStart, const vtkUTF8ChType *srcEnd, vtkUTF16ChType **dstStart, vtkUTF16ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF16toUTF8 (const vtkUTF16ChType **srcStart, const vtkUTF16ChType *srcEnd, vtkUTF8ChType **dstStart, vtkUTF8ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF8toUTF32 (const vtkUTF8ChType **srcStart, const vtkUTF8ChType *srcEnd, vtkUTF32ChType **dstStart, vtkUTF32ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF32toUTF8 (const vtkUTF32ChType **srcStart, const vtkUTF32ChType *srcEnd, vtkUTF8ChType **dstStart, vtkUTF8ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF16toUTF32 (const vtkUTF16ChType **srcStart, const vtkUTF16ChType *srcEnd, vtkUTF32ChType **dstStart, vtkUTF32ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF32toUTF16 (const vtkUTF32ChType **srcStart, const vtkUTF32ChType *srcEnd, vtkUTF16ChType **dstStart, vtkUTF16ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUCS4toUTF8 (const vtkUCS4ChType **aSrcStart, const vtkUCS4ChType *aSrcEnd, vtkUTF8ChType **aDstStart, vtkUTF8ChType *aDstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF8toUCS4 (const vtkUTF8ChType **aSrcStart, const vtkUTF8ChType *aSrcEnd, vtkUCS4ChType **aDstStart, vtkUCS4ChType *aDstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertISO8859toUCS4 (const unsigned char **aSrcStart, const unsigned char *aSrcEnd, vtkUCS4ChType **aDstStart, vtkUCS4ChType *aDstEnd, EncodingEnum=eLatin1) THROW_SPEC() |
bool | IsLegalUTF8Sequence (const vtkUTF8ChType *src, const vtkUTF8ChType *srcEnd) THROW_SPEC() |
int | SizeofUTF7 (const vtkUCS2ChType *start, const vtkUCS2ChType *end, int optional, int verbose) THROW_SPEC() |
Determines buffer size needed to hold UTF-7 data converted from UCS-2. | |
int | SizeofUCS2 (const char *start, const char *end) THROW_SPEC() |
Determines buffer size needed to hold UCS-2 data converted from UTF-7. | |
int | ConvertUTF8toLatin1 (const vtkUTF8ChType **srcStart, const vtkUTF8ChType *srcEnd, char **dstStart, char *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUTF16toLatin1 (const vtkUTF16ChType **srcStart, const vtkUTF16ChType *srcEnd, char **dstStart, char *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertLatin1toUTF8 (const char **srcStart, const char *srcEnd, vtkUTF8ChType **dstStart, vtkUTF8ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
Read in Latin-1 (ISO-8859-1) characters and convert them to UTF-8. | |
int | ConvertLatin1toUTF16 (const char **srcStart, const char *srcEnd, vtkUTF16ChType **dstStart, vtkUTF16ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC() |
int | ConvertUCS2toUTF7 (const vtkUCS2ChType **srcStart, const vtkUCS2ChType *srcEnd, char **dstStart, char *dstEnd, int optional, int verbose) THROW_SPEC() |
Converts the text between *sourceStart and sourceEnd , putting the result into the buffer between *targetStart and targetEnd . | |
int | ConvertUTF7toUCS2 (const char **srcStart, const char *srcEnd, vtkUCS2ChType **dstStart, vtkUCS2ChType *dstEnd) THROW_SPEC() |
Converts the text between *sourceStart and sourceEnd , putting the result into the buffer between *targetStart and targetEnd . | |
Static Public Attributes | |
const vtkUCS4ChType | UNI_REPLACEMENT_CHAR |
Some fundamental constants for unicode. | |
const vtkUCS4ChType | UNI_MAX_BMP |
Some fundamental constants for unicode. | |
const vtkUCS4ChType | UNI_MAX_UTF16 |
Some fundamental constants for unicode. | |
const vtkUCS4ChType | UNI_MAX_UTF32 |
Some fundamental constants for unicode. | |
const vtkUCS2ChType | UNI_SIGNATURE_UCS2 [2] |
ISO/IEC 10646-1 signatures. | |
const vtkUCS4ChType | UNI_SIGNATURE_UCS4 [2] |
ISO/IEC 10646-1 signatures. | |
const vtkUTF8ChType | UNI_SIGNATURE_UTF8 [4] |
ISO/IEC 10646-1 signatures. | |
const vtkUTF16ChType | UNI_SIGNATURE_UTF16 [2] |
ISO/IEC 10646-1 signatures. | |
const vtkUCS4ChType | ISO8859_REPLACEMENT_CHAR |
Some fundamental constants for ISO-8859 encodings. | |
const vtkUCS4ChType | ISO8859_MAX_CHAR |
Some fundamental constants for ISO-8859 encodings. |
|
Definition at line 222 of file vtkStringConverter.h. |
|
Definition at line 212 of file vtkStringConverter.h. |
|
Definition at line 228 of file vtkStringConverter.h. |
|
|
|
|
|
Read in Latin-1 (ISO-8859-1) characters and convert them to UTF-8.
|
|
Converts the text between
After the conversion,
In
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Converts the text between
After the conversion,
In
|
|
|
|
|
|
|
|
|
|
|
|
Determines buffer size needed to hold Latin-1 data converted from UTF-16.
|
|
Determines buffer size needed to hold Latin-1 data converted from UTF-8.
|
|
Determines buffer size needed to hold UCS-2 data converted from UTF-7.
|
|
|
|
|
|
Determines buffer size needed to hold UTF-16 data converted from Latin-1.
|
|
Determines buffer size needed to hold UTF-16 data converted from UTF-8.
|
|
Determines buffer size needed to hold UTF-16 data converted from UTF-32.
|
|
Determines buffer size needed to hold UTF-32 data converted from UTF-8.
|
|
Determines buffer size needed to hold UTF-32 data converted from UTF-16.
|
|
Determines buffer size needed to hold UTF-7 data converted from UCS-2.
|
|
Determines buffer size needed to hold UTF-8 data converted from Latin-1.
|
|
Determines buffer size needed to hold UTF-8 data converted from UTF-32.
|
|
Determines buffer size needed to hold UTF-8 data converted from UTF-16.
|
|
|
|
Some fundamental constants for ISO-8859 encodings.
Definition at line 209 of file vtkStringConverter.h. |
|
Some fundamental constants for ISO-8859 encodings.
Definition at line 208 of file vtkStringConverter.h. |
|
Some fundamental constants for unicode.
Definition at line 148 of file vtkStringConverter.h. |
|
Some fundamental constants for unicode.
Definition at line 149 of file vtkStringConverter.h. |
|
Some fundamental constants for unicode.
Definition at line 150 of file vtkStringConverter.h. |
|
Some fundamental constants for unicode.
Definition at line 147 of file vtkStringConverter.h. |
|
ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal
Definition at line 200 of file vtkStringConverter.h. |
|
ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal
Definition at line 201 of file vtkStringConverter.h. |
|
ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal
Definition at line 203 of file vtkStringConverter.h. |
|
ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal
Definition at line 202 of file vtkStringConverter.h. |