vtkStringConverter Class Reference

#include <vtkStringConverter.h>

Detailed Description

Conversions between UTF-32, UTF-16, and UTF-8.

Several funtions are included here, forming a complete set of conversions between the three formats. Conversion between UTF-7 and UCS-2 is also included here.

Each of these routines takes pointers to input buffers and output buffers. The input buffers are const.

Each routine converts the text between *sourceStart and sourceEnd, putting the result into the buffer between *targetStart and targetEnd. Note: the end pointers are *after* the last item: e.g. *(sourceEnd - 1) is the last item.

The return result indicates whether the conversion was successful, and if not, whether the problem was in the source or target buffers. (Only the first encountered problem is indicated.)

After the conversion, *sourceStart and *targetStart are both updated to point to the end of last text successfully converted in the respective buffers.

Input parameters: sourceStart - pointer to a pointer to the source buffer. The contents of this are modified on return so that it points at the next thing to be converted. targetStart - similarly, pointer to pointer to the target buffer. sourceEnd, targetEnd - respectively pointers to the ends of the two buffers, for overflow checking only.

These conversion functions take a ConversionFlags argument. When this flag is set to strict, both irregular sequences and isolated surrogates will cause an error. When the flag is set to lenient, both irregular sequences and isolated surrogates are converted.

Whether the flag is strict or lenient, all illegal sequences will cause an error return. This includes sequences such as: {F4 90 80 80}, {C0 80}, or {A0} in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code must check for illegal sequences.

When the flag is set to lenient, characters over 0x10FFFF are converted to the replacement character; otherwise (when the flag is set to strict) they constitute an error.

Output parameters: The value "eSourceIllegal" is returned from some routines if the input sequence is malformed. When "eSourceIllegal" is returned, the source value will point to the illegal value that caused the problem. E.g., in UTF-8 when a sequence is malformed, it points to the start of the malformed sequence.

Author:: Sean McInerney

Version:

Revision: 1.3

Date:

Date: 2004/08/10 07:37:21

Author:: Rick McGowan

Version:: Fixes & updates

Date:: Sept 2001.

Version:: Fixes & updates

Date:: May 2001.

Author:: Mark E. Davis

Date:: 1994

Definition at line 142 of file vtkStringConverter.h.

Public Types

enum ConversionResult {
  eConversionOK = 0, eSourceExhausted, eTargetExhausted, eSourceIllegal,
  eSourceCorrupt, eInvalidEncoding
}

enum ConversionFlags { eStrictConversion = 0, eLenientConversion }

enum EncodingEnum {
  eUnknownEncoding = 0, eIso8859_1 = 1, eIso8859_2 = 2, eIso8859_3 = 3,
  eIso8859_4 = 4, eIso8859_5 = 5, eIso8859_6 = 6, eIso8859_7 = 7,
  eIso8859_8 = 8, eIso8859_9 = 9, eIso8859_10 = 10, eIso8859_11 = 11,
  eIso8859_12 = eUnknownEncoding, eIso8859_13 = 13, eIso8859_14 = 14, eIso8859_15 = 15,
  eIso8859_16 = 16, eLatin1 = eIso8859_1, eLatin2 = eIso8859_2, eLatin3 = eIso8859_3,
  eLatin4 = eIso8859_4, eCyrillic = eIso8859_5, eArabic = eIso8859_6, eGreek = eIso8859_7,
  eHebrew = eIso8859_8, eLatin5 = eIso8859_9, eLatin6 = eIso8859_10, eThai = eIso8859_11,
  eLatin7 = eIso8859_13, eLatin8 = eIso8859_14, eLatin9 = eIso8859_15, eLatin10 = eIso8859_16,
  eUCS4, eUCS2, eUTF8, eUTF16,
  eUTF32
}

Static Public Member Functions

int SizeofUTF8 (const vtkUTF16ChType *aSrc, const vtkUTF16ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-8 data converted from UTF-16.

int SizeofUTF8 (const vtkUTF32ChType *aSrc, const vtkUTF32ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-8 data converted from UTF-32.

int SizeofUTF8 (const char *aSrc, const char *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-8 data converted from Latin-1.

int SizeofUTF8fromUCS4 (const vtkUCS4ChType *aSrc, const vtkUCS4ChType *aEnd, ConversionFlags aFlags) THROW_SPEC()

int SizeofUTF16 (const vtkUTF32ChType *aSrc, const vtkUTF32ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-16 data converted from UTF-32.

int SizeofUTF16 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-16 data converted from UTF-8.

int SizeofUTF16 (const char *aSrc, const char *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-16 data converted from Latin-1.

int SizeofUTF32 (const vtkUTF16ChType *aSrc, const vtkUTF16ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-32 data converted from UTF-16.

int SizeofUTF32 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold UTF-32 data converted from UTF-8.

int SizeofUCS4 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int SizeofUCS4 (const char *aSrc, const char *aEnd, EncodingEnum=eLatin1) THROW_SPEC()

int SizeofLatin1 (const vtkUTF8ChType *aSrc, const vtkUTF8ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold Latin-1 data converted from UTF-8.

int SizeofLatin1 (const vtkUTF16ChType *aSrc, const vtkUTF16ChType *aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Determines buffer size needed to hold Latin-1 data converted from UTF-16.

int ConvertUTF8toUTF16 (const vtkUTF8ChType **srcStart, const vtkUTF8ChType *srcEnd, vtkUTF16ChType **dstStart, vtkUTF16ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF16toUTF8 (const vtkUTF16ChType **srcStart, const vtkUTF16ChType *srcEnd, vtkUTF8ChType **dstStart, vtkUTF8ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF8toUTF32 (const vtkUTF8ChType **srcStart, const vtkUTF8ChType *srcEnd, vtkUTF32ChType **dstStart, vtkUTF32ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF32toUTF8 (const vtkUTF32ChType **srcStart, const vtkUTF32ChType *srcEnd, vtkUTF8ChType **dstStart, vtkUTF8ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF16toUTF32 (const vtkUTF16ChType **srcStart, const vtkUTF16ChType *srcEnd, vtkUTF32ChType **dstStart, vtkUTF32ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF32toUTF16 (const vtkUTF32ChType **srcStart, const vtkUTF32ChType *srcEnd, vtkUTF16ChType **dstStart, vtkUTF16ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUCS4toUTF8 (const vtkUCS4ChType **aSrcStart, const vtkUCS4ChType *aSrcEnd, vtkUTF8ChType **aDstStart, vtkUTF8ChType *aDstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF8toUCS4 (const vtkUTF8ChType **aSrcStart, const vtkUTF8ChType *aSrcEnd, vtkUCS4ChType **aDstStart, vtkUCS4ChType *aDstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertISO8859toUCS4 (const unsigned char **aSrcStart, const unsigned char *aSrcEnd, vtkUCS4ChType **aDstStart, vtkUCS4ChType *aDstEnd, EncodingEnum=eLatin1) THROW_SPEC()

bool IsLegalUTF8Sequence (const vtkUTF8ChType *src, const vtkUTF8ChType *srcEnd) THROW_SPEC()

int SizeofUTF7 (const vtkUCS2ChType *start, const vtkUCS2ChType *end, int optional, int verbose) THROW_SPEC()

Determines buffer size needed to hold UTF-7 data converted from UCS-2.

int SizeofUCS2 (const char *start, const char *end) THROW_SPEC()

Determines buffer size needed to hold UCS-2 data converted from UTF-7.

int ConvertUTF8toLatin1 (const vtkUTF8ChType **srcStart, const vtkUTF8ChType *srcEnd, char **dstStart, char *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUTF16toLatin1 (const vtkUTF16ChType **srcStart, const vtkUTF16ChType *srcEnd, char **dstStart, char *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertLatin1toUTF8 (const char **srcStart, const char *srcEnd, vtkUTF8ChType **dstStart, vtkUTF8ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

Read in Latin-1 (ISO-8859-1) characters and convert them to UTF-8.

int ConvertLatin1toUTF16 (const char **srcStart, const char *srcEnd, vtkUTF16ChType **dstStart, vtkUTF16ChType *dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int ConvertUCS2toUTF7 (const vtkUCS2ChType **srcStart, const vtkUCS2ChType *srcEnd, char **dstStart, char *dstEnd, int optional, int verbose) THROW_SPEC()

Converts the text between *sourceStart and sourceEnd, putting the result into the buffer between *targetStart and targetEnd.

int ConvertUTF7toUCS2 (const char **srcStart, const char *srcEnd, vtkUCS2ChType **dstStart, vtkUCS2ChType *dstEnd) THROW_SPEC()

Converts the text between *sourceStart and sourceEnd, putting the result into the buffer between *targetStart and targetEnd.

Static Public Attributes

const vtkUCS4ChType UNI_REPLACEMENT_CHAR

Some fundamental constants for unicode.

const vtkUCS4ChType UNI_MAX_BMP

Some fundamental constants for unicode.

const vtkUCS4ChType UNI_MAX_UTF16

Some fundamental constants for unicode.

const vtkUCS4ChType UNI_MAX_UTF32

Some fundamental constants for unicode.

const vtkUCS2ChType UNI_SIGNATURE_UCS2 [2]

ISO/IEC 10646-1 signatures.

const vtkUCS4ChType UNI_SIGNATURE_UCS4 [2]

ISO/IEC 10646-1 signatures.

const vtkUTF8ChType UNI_SIGNATURE_UTF8 [4]

ISO/IEC 10646-1 signatures.

const vtkUTF16ChType UNI_SIGNATURE_UTF16 [2]

ISO/IEC 10646-1 signatures.

const vtkUCS4ChType ISO8859_REPLACEMENT_CHAR

Some fundamental constants for ISO-8859 encodings.

const vtkUCS4ChType ISO8859_MAX_CHAR

Some fundamental constants for ISO-8859 encodings.

Member Enumeration Documentation

enum vtkStringConverter::ConversionFlags

Enumeration values:

eStrictConversion

eLenientConversion

Definition at line 222 of file vtkStringConverter.h.

enum vtkStringConverter::ConversionResult

Enumeration values:

eConversionOK conversion successful

eSourceExhausted partial character in source, but hit end

eTargetExhausted insuff. room in target for conversion

eSourceIllegal source sequence is illegal/malformed

eSourceCorrupt source contains invalid UTF-7

eInvalidEncoding invalid target encoding specified

Definition at line 212 of file vtkStringConverter.h.

enum vtkStringConverter::EncodingEnum

Enumeration values:

eUnknownEncoding Unknown

eIso8859_1 ISO-8858-1

eIso8859_2 ISO-8858-2

eIso8859_3 ISO-8858-3

eIso8859_4 ISO-8858-4

eIso8859_5 ISO-8858-5

eIso8859_6 ISO-8858-6

eIso8859_7 ISO-8858-7

eIso8859_8 ISO-8858-8

eIso8859_9 ISO-8858-9

eIso8859_10 ISO-8858-10

eIso8859_11 ISO-8858-11

eIso8859_12 Not assigned

eIso8859_13 ISO-8858-13

eIso8859_14 ISO-8858-14

eIso8859_15 ISO-8858-15

eIso8859_16 ISO-8858-16

eLatin1 Latin1 (ISO-8858-1 alias)

eLatin2 Latin2 (ISO-8858-2 alias)

eLatin3 Latin3 (ISO-8858-3 alias)

eLatin4 Latin4 (ISO-8858-4 alias)

eCyrillic Cyryllic (ISO-8858-5 alias)

eArabic Arabic (ISO-8858-6 alias)

eGreek Greek (ISO-8858-7 alias)

eHebrew Hebrew (ISO-8858-8 alias)

eLatin5 Latin5 (ISO-8858-9 alias)

eLatin6 Latin6 (ISO-8858-10 alias)

eThai Thai (ISO-8858-11 alias)

eLatin7 Latin7 (ISO-8858-13 alias)

eLatin8 Latin8 (ISO-8858-14 alias)

eLatin9 Latin9 (ISO-8858-15 alias)

eLatin10 Latin10 (ISO-8858-16 alias)

eUCS4 UCS-4

eUCS2 UCS-2

eUTF8 UTF-8

eUTF16 UTF-16

eUTF32 UTF-32

Definition at line 228 of file vtkStringConverter.h.

Member Function Documentation

int vtkStringConverter::ConvertISO8859toUCS4 ( const unsigned char ** aSrcStart,

const unsigned char * aSrcEnd,

vtkUCS4ChType ** aDstStart,

vtkUCS4ChType * aDstEnd,

EncodingEnum = eLatin1

) [static]

int vtkStringConverter::ConvertLatin1toUTF16 ( const char ** srcStart,

const char * srcEnd,

vtkUTF16ChType ** dstStart,

vtkUTF16ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertLatin1toUTF8 ( const char ** srcStart,

const char * srcEnd,

vtkUTF8ChType ** dstStart,

vtkUTF8ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

Read in Latin-1 (ISO-8859-1) characters and convert them to UTF-8.

int vtkStringConverter::ConvertUCS2toUTF7 ( const vtkUCS2ChType ** srcStart,

const vtkUCS2ChType * srcEnd,

char ** dstStart,

char * dstEnd,

int optional,

int verbose

) [static]

Converts the text between *sourceStart and sourceEnd, putting the result into the buffer between *targetStart and targetEnd.
After the conversion, *sourceStart and *targetStart are both updated to point to the end of last text successfully converted in the respective buffers.
In ConvertUCS2toUTF7, optional indicates whether UTF-7 optional characters should be directly encoded, and verbose controls whether the shift-out character, "-", is always emitted at the end of a shifted sequence.

Returns:
Result indicates whether the conversion was successful, and, if not, whether the problem was in the source or target buffers.

Note:
The end pointers are *after* the last item: e.g. *(sourceEnd - 1) is the last item.

int vtkStringConverter::ConvertUCS4toUTF8 ( const vtkUCS4ChType ** aSrcStart,

const vtkUCS4ChType * aSrcEnd,

vtkUTF8ChType ** aDstStart,

vtkUTF8ChType * aDstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF16toLatin1 ( const vtkUTF16ChType ** srcStart,

const vtkUTF16ChType * srcEnd,

char ** dstStart,

char * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF16toUTF32 ( const vtkUTF16ChType ** srcStart,

const vtkUTF16ChType * srcEnd,

vtkUTF32ChType ** dstStart,

vtkUTF32ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF16toUTF8 ( const vtkUTF16ChType ** srcStart,

const vtkUTF16ChType * srcEnd,

vtkUTF8ChType ** dstStart,

vtkUTF8ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF32toUTF16 ( const vtkUTF32ChType ** srcStart,

const vtkUTF32ChType * srcEnd,

vtkUTF16ChType ** dstStart,

vtkUTF16ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF32toUTF8 ( const vtkUTF32ChType ** srcStart,

const vtkUTF32ChType * srcEnd,

vtkUTF8ChType ** dstStart,

vtkUTF8ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF7toUCS2 ( const char ** srcStart,

const char * srcEnd,

vtkUCS2ChType ** dstStart,

vtkUCS2ChType * dstEnd

) [static]

Converts the text between *sourceStart and sourceEnd, putting the result into the buffer between *targetStart and targetEnd.
After the conversion, *sourceStart and *targetStart are both updated to point to the end of last text successfully converted in the respective buffers.
In ConvertUCS2toUTF7, optional indicates whether UTF-7 optional characters should be directly encoded, and verbose controls whether the shift-out character, "-", is always emitted at the end of a shifted sequence.

Returns:
Result indicates whether the conversion was successful, and, if not, whether the problem was in the source or target buffers.

Note:
The end pointers are *after* the last item: e.g. *(sourceEnd - 1) is the last item.

int vtkStringConverter::ConvertUTF8toLatin1 ( const vtkUTF8ChType ** srcStart,

const vtkUTF8ChType * srcEnd,

char ** dstStart,

char * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF8toUCS4 ( const vtkUTF8ChType ** aSrcStart,

const vtkUTF8ChType * aSrcEnd,

vtkUCS4ChType ** aDstStart,

vtkUCS4ChType * aDstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF8toUTF16 ( const vtkUTF8ChType ** srcStart,

const vtkUTF8ChType * srcEnd,

vtkUTF16ChType ** dstStart,

vtkUTF16ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::ConvertUTF8toUTF32 ( const vtkUTF8ChType ** srcStart,

const vtkUTF8ChType * srcEnd,

vtkUTF32ChType ** dstStart,

vtkUTF32ChType * dstEnd,

ConversionFlags = eStrictConversion

) [static]

bool vtkStringConverter::IsLegalUTF8Sequence ( const vtkUTF8ChType * src,

const vtkUTF8ChType * srcEnd

) [static]

int vtkStringConverter::SizeofLatin1 ( const vtkUTF16ChType * aSrc,

const vtkUTF16ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold Latin-1 data converted from UTF-16.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofLatin1 ( const vtkUTF8ChType * aSrc,

const vtkUTF8ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold Latin-1 data converted from UTF-8.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUCS2 ( const char * start,

const char * end

) [static]

Determines buffer size needed to hold UCS-2 data converted from UTF-7.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUCS4 ( const char * aSrc,

const char * aEnd,

EncodingEnum = eLatin1

) [static]

int vtkStringConverter::SizeofUCS4 ( const vtkUTF8ChType * aSrc,

const vtkUTF8ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

int vtkStringConverter::SizeofUTF16 ( const char * aSrc,

const char * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-16 data converted from Latin-1.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF16 ( const vtkUTF8ChType * aSrc,

const vtkUTF8ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-16 data converted from UTF-8.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF16 ( const vtkUTF32ChType * aSrc,

const vtkUTF32ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-16 data converted from UTF-32.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF32 ( const vtkUTF8ChType * aSrc,

const vtkUTF8ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-32 data converted from UTF-8.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF32 ( const vtkUTF16ChType * aSrc,

const vtkUTF16ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-32 data converted from UTF-16.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF7 ( const vtkUCS2ChType * start,

const vtkUCS2ChType * end,

int optional,

int verbose

) [static]

Determines buffer size needed to hold UTF-7 data converted from UCS-2.

Returns:
Number of characters required or -1 on error.

Note:
optional and verbose parameter used as in ConvertUCS2toUTF7.

int vtkStringConverter::SizeofUTF8 ( const char * aSrc,

const char * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-8 data converted from Latin-1.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF8 ( const vtkUTF32ChType * aSrc,

const vtkUTF32ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-8 data converted from UTF-32.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF8 ( const vtkUTF16ChType * aSrc,

const vtkUTF16ChType * aEnd,

ConversionFlags = eStrictConversion

) [static]

Determines buffer size needed to hold UTF-8 data converted from UTF-16.

Returns:
Number of characters required or -1 on error.

int vtkStringConverter::SizeofUTF8fromUCS4 ( const vtkUCS4ChType * aSrc,

const vtkUCS4ChType * aEnd,

ConversionFlags aFlags

) [static]

Member Data Documentation

const vtkUCS4ChType vtkStringConverter::ISO8859_MAX_CHAR [static]

Some fundamental constants for ISO-8859 encodings.

Definition at line 209 of file vtkStringConverter.h.

const vtkUCS4ChType vtkStringConverter::ISO8859_REPLACEMENT_CHAR [static]

Some fundamental constants for ISO-8859 encodings.

Definition at line 208 of file vtkStringConverter.h.

const vtkUCS4ChType vtkStringConverter::UNI_MAX_BMP [static]

Some fundamental constants for unicode.

Definition at line 148 of file vtkStringConverter.h.

const vtkUCS4ChType vtkStringConverter::UNI_MAX_UTF16 [static]

Some fundamental constants for unicode.

Definition at line 149 of file vtkStringConverter.h.

const vtkUCS4ChType vtkStringConverter::UNI_MAX_UTF32 [static]

Some fundamental constants for unicode.

Definition at line 150 of file vtkStringConverter.h.

const vtkUCS4ChType vtkStringConverter::UNI_REPLACEMENT_CHAR [static]

Some fundamental constants for unicode.

Definition at line 147 of file vtkStringConverter.h.

const vtkUCS2ChType vtkStringConverter::UNI_SIGNATURE_UCS2[2] [static]

ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character ZERO WIDTH NO-BREAK SPACE, and is applied by a certain class of applications.
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the UCS-2 or UCS-4 coded representation, and indicates the ordering of the octets within the coded representation of each character (see 6.3). It is typical of the class of applications mentioned above, that some make use of the signatures when receiving data, while others do not. The signatures are therefore designed in a way that makes it easy to ignore them.In this convention, the ZERO WIDTH NO-BREAK SPACE character has the following significance when it is present at the beginning of a stream of coded characters:
UCS-2 signature: FEFF UCS-4 signature: 0000 FEFF UTF-8 signature: EF BB BF UTF-16 signature: FEFF
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat FEFF as the ZERO WIDTH NO-BREAK SPACE character.
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal FFFE), the application can identify that the coded representations of the following characters use the opposite octet sequence to the sequence expected, and may take the necessary action to recognise the characters correctly.

Note:
The hexadecimal value FFFE does not correspond to any coded character within ISO/IEC 10646.

Definition at line 200 of file vtkStringConverter.h.

const vtkUCS4ChType vtkStringConverter::UNI_SIGNATURE_UCS4[2] [static]

ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character ZERO WIDTH NO-BREAK SPACE, and is applied by a certain class of applications.
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the UCS-2 or UCS-4 coded representation, and indicates the ordering of the octets within the coded representation of each character (see 6.3). It is typical of the class of applications mentioned above, that some make use of the signatures when receiving data, while others do not. The signatures are therefore designed in a way that makes it easy to ignore them.In this convention, the ZERO WIDTH NO-BREAK SPACE character has the following significance when it is present at the beginning of a stream of coded characters:
UCS-2 signature: FEFF UCS-4 signature: 0000 FEFF UTF-8 signature: EF BB BF UTF-16 signature: FEFF
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat FEFF as the ZERO WIDTH NO-BREAK SPACE character.
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal FFFE), the application can identify that the coded representations of the following characters use the opposite octet sequence to the sequence expected, and may take the necessary action to recognise the characters correctly.

Note:
The hexadecimal value FFFE does not correspond to any coded character within ISO/IEC 10646.

Definition at line 201 of file vtkStringConverter.h.

const vtkUTF16ChType vtkStringConverter::UNI_SIGNATURE_UTF16[2] [static]

ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character ZERO WIDTH NO-BREAK SPACE, and is applied by a certain class of applications.
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the UCS-2 or UCS-4 coded representation, and indicates the ordering of the octets within the coded representation of each character (see 6.3). It is typical of the class of applications mentioned above, that some make use of the signatures when receiving data, while others do not. The signatures are therefore designed in a way that makes it easy to ignore them.In this convention, the ZERO WIDTH NO-BREAK SPACE character has the following significance when it is present at the beginning of a stream of coded characters:
UCS-2 signature: FEFF UCS-4 signature: 0000 FEFF UTF-8 signature: EF BB BF UTF-16 signature: FEFF
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat FEFF as the ZERO WIDTH NO-BREAK SPACE character.
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal FFFE), the application can identify that the coded representations of the following characters use the opposite octet sequence to the sequence expected, and may take the necessary action to recognise the characters correctly.

Note:
The hexadecimal value FFFE does not correspond to any coded character within ISO/IEC 10646.

Definition at line 203 of file vtkStringConverter.h.

const vtkUTF8ChType vtkStringConverter::UNI_SIGNATURE_UTF8[4] [static]

ISO/IEC 10646-1 signatures.
This annex describes a convention for the identification of features of the UCS, by the use of "signatures" within data streams of coded characters. The convention makes use of the character ZERO WIDTH NO-BREAK SPACE, and is applied by a certain class of applications.
When this convention is used, a signature at the beginning of a stream of coded characters indicates that the characters following are encoded in the UCS-2 or UCS-4 coded representation, and indicates the ordering of the octets within the coded representation of each character (see 6.3). It is typical of the class of applications mentioned above, that some make use of the signatures when receiving data, while others do not. The signatures are therefore designed in a way that makes it easy to ignore them.In this convention, the ZERO WIDTH NO-BREAK SPACE character has the following significance when it is present at the beginning of a stream of coded characters:
UCS-2 signature: FEFF UCS-4 signature: 0000 FEFF UTF-8 signature: EF BB BF UTF-16 signature: FEFF
An application receiving data may either use these signatures to identify the coded representation form, or may ignore them and treat FEFF as the ZERO WIDTH NO-BREAK SPACE character.
If an application which uses one of these signatures recognises its coded representation in reverse sequence (e.g. hexadecimal FFFE), the application can identify that the coded representations of the following characters use the opposite octet sequence to the sequence expected, and may take the necessary action to recognise the characters correctly.

Note:
The hexadecimal value FFFE does not correspond to any coded character within ISO/IEC 10646.

Definition at line 202 of file vtkStringConverter.h.

The documentation for this class was generated from the following file:

vtkStringConverter.h

Generated on Tue Aug 10 03:44:47 2004 for vtkExtensions by

1.3.7


Public Types
enum	ConversionResult { eConversionOK = 0, eSourceExhausted, eTargetExhausted, eSourceIllegal, eSourceCorrupt, eInvalidEncoding }
enum	ConversionFlags { eStrictConversion = 0, eLenientConversion }
enum	EncodingEnum { eUnknownEncoding = 0, eIso8859_1 = 1, eIso8859_2 = 2, eIso8859_3 = 3, eIso8859_4 = 4, eIso8859_5 = 5, eIso8859_6 = 6, eIso8859_7 = 7, eIso8859_8 = 8, eIso8859_9 = 9, eIso8859_10 = 10, eIso8859_11 = 11, eIso8859_12 = eUnknownEncoding, eIso8859_13 = 13, eIso8859_14 = 14, eIso8859_15 = 15, eIso8859_16 = 16, eLatin1 = eIso8859_1, eLatin2 = eIso8859_2, eLatin3 = eIso8859_3, eLatin4 = eIso8859_4, eCyrillic = eIso8859_5, eArabic = eIso8859_6, eGreek = eIso8859_7, eHebrew = eIso8859_8, eLatin5 = eIso8859_9, eLatin6 = eIso8859_10, eThai = eIso8859_11, eLatin7 = eIso8859_13, eLatin8 = eIso8859_14, eLatin9 = eIso8859_15, eLatin10 = eIso8859_16, eUCS4, eUCS2, eUTF8, eUTF16, eUTF32 }
Static Public Member Functions
int	SizeofUTF8 (const vtkUTF16ChType aSrc, const vtkUTF16ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-8 data converted from UTF-16.
int	SizeofUTF8 (const vtkUTF32ChType aSrc, const vtkUTF32ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-8 data converted from UTF-32.
int	SizeofUTF8 (const char aSrc, const char aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-8 data converted from Latin-1.
int	SizeofUTF8fromUCS4 (const vtkUCS4ChType aSrc, const vtkUCS4ChType aEnd, ConversionFlags aFlags) THROW_SPEC()
int	SizeofUTF16 (const vtkUTF32ChType aSrc, const vtkUTF32ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-16 data converted from UTF-32.
int	SizeofUTF16 (const vtkUTF8ChType aSrc, const vtkUTF8ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-16 data converted from UTF-8.
int	SizeofUTF16 (const char aSrc, const char aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-16 data converted from Latin-1.
int	SizeofUTF32 (const vtkUTF16ChType aSrc, const vtkUTF16ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-32 data converted from UTF-16.
int	SizeofUTF32 (const vtkUTF8ChType aSrc, const vtkUTF8ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold UTF-32 data converted from UTF-8.
int	SizeofUCS4 (const vtkUTF8ChType aSrc, const vtkUTF8ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	SizeofUCS4 (const char aSrc, const char aEnd, EncodingEnum=eLatin1) THROW_SPEC()
int	SizeofLatin1 (const vtkUTF8ChType aSrc, const vtkUTF8ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold Latin-1 data converted from UTF-8.
int	SizeofLatin1 (const vtkUTF16ChType aSrc, const vtkUTF16ChType aEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Determines buffer size needed to hold Latin-1 data converted from UTF-16.
int	ConvertUTF8toUTF16 (const vtkUTF8ChType *srcStart, const vtkUTF8ChType srcEnd, vtkUTF16ChType *dstStart, vtkUTF16ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF16toUTF8 (const vtkUTF16ChType *srcStart, const vtkUTF16ChType srcEnd, vtkUTF8ChType *dstStart, vtkUTF8ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF8toUTF32 (const vtkUTF8ChType *srcStart, const vtkUTF8ChType srcEnd, vtkUTF32ChType *dstStart, vtkUTF32ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF32toUTF8 (const vtkUTF32ChType *srcStart, const vtkUTF32ChType srcEnd, vtkUTF8ChType *dstStart, vtkUTF8ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF16toUTF32 (const vtkUTF16ChType *srcStart, const vtkUTF16ChType srcEnd, vtkUTF32ChType *dstStart, vtkUTF32ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF32toUTF16 (const vtkUTF32ChType *srcStart, const vtkUTF32ChType srcEnd, vtkUTF16ChType *dstStart, vtkUTF16ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUCS4toUTF8 (const vtkUCS4ChType *aSrcStart, const vtkUCS4ChType aSrcEnd, vtkUTF8ChType *aDstStart, vtkUTF8ChType aDstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF8toUCS4 (const vtkUTF8ChType *aSrcStart, const vtkUTF8ChType aSrcEnd, vtkUCS4ChType *aDstStart, vtkUCS4ChType aDstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertISO8859toUCS4 (const unsigned char *aSrcStart, const unsigned char aSrcEnd, vtkUCS4ChType *aDstStart, vtkUCS4ChType aDstEnd, EncodingEnum=eLatin1) THROW_SPEC()
bool	IsLegalUTF8Sequence (const vtkUTF8ChType src, const vtkUTF8ChType srcEnd) THROW_SPEC()
int	SizeofUTF7 (const vtkUCS2ChType start, const vtkUCS2ChType end, int optional, int verbose) THROW_SPEC()
	Determines buffer size needed to hold UTF-7 data converted from UCS-2.
int	SizeofUCS2 (const char start, const char end) THROW_SPEC()
	Determines buffer size needed to hold UCS-2 data converted from UTF-7.
int	ConvertUTF8toLatin1 (const vtkUTF8ChType *srcStart, const vtkUTF8ChType srcEnd, char *dstStart, char dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertUTF16toLatin1 (const vtkUTF16ChType *srcStart, const vtkUTF16ChType srcEnd, char *dstStart, char dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
int	ConvertLatin1toUTF8 (const char *srcStart, const char srcEnd, vtkUTF8ChType *dstStart, vtkUTF8ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()
	Read in Latin-1 (ISO-8859-1) characters and convert them to UTF-8.
int	ConvertLatin1toUTF16 (const char *srcStart, const char srcEnd, vtkUTF16ChType *dstStart, vtkUTF16ChType dstEnd, ConversionFlags=eStrictConversion) THROW_SPEC()

int	ConvertUCS2toUTF7 (const vtkUCS2ChType *srcStart, const vtkUCS2ChType srcEnd, char *dstStart, char dstEnd, int optional, int verbose) THROW_SPEC()
	Converts the text between `sourceStart` and `sourceEnd`, putting the result into the buffer between `targetStart` and `targetEnd`.
int	ConvertUTF7toUCS2 (const char *srcStart, const char srcEnd, vtkUCS2ChType *dstStart, vtkUCS2ChType dstEnd) THROW_SPEC()
	Converts the text between `sourceStart` and `sourceEnd`, putting the result into the buffer between `targetStart` and `targetEnd`.
Static Public Attributes

const vtkUCS4ChType	UNI_REPLACEMENT_CHAR
	Some fundamental constants for unicode.
const vtkUCS4ChType	UNI_MAX_BMP
	Some fundamental constants for unicode.
const vtkUCS4ChType	UNI_MAX_UTF16
	Some fundamental constants for unicode.
const vtkUCS4ChType	UNI_MAX_UTF32
	Some fundamental constants for unicode.

const vtkUCS2ChType	UNI_SIGNATURE_UCS2 [2]
	ISO/IEC 10646-1 signatures.
const vtkUCS4ChType	UNI_SIGNATURE_UCS4 [2]
	ISO/IEC 10646-1 signatures.
const vtkUTF8ChType	UNI_SIGNATURE_UTF8 [4]
	ISO/IEC 10646-1 signatures.
const vtkUTF16ChType	UNI_SIGNATURE_UTF16 [2]
	ISO/IEC 10646-1 signatures.

const vtkUCS4ChType	ISO8859_REPLACEMENT_CHAR
	Some fundamental constants for ISO-8859 encodings.
const vtkUCS4ChType	ISO8859_MAX_CHAR
	Some fundamental constants for ISO-8859 encodings.