87 lines
2.5 KiB
C
87 lines
2.5 KiB
C
/*
|
|
* utf.h
|
|
*
|
|
* Created on: Dec 4, 2020
|
|
* Author: FICOM-IT LTD
|
|
*/
|
|
|
|
#ifndef SMS_INC_UTF_H_
|
|
#define SMS_INC_UTF_H_
|
|
|
|
|
|
#include <stddef.h>
|
|
#include <stdbool.h>
|
|
#include <sys/types.h>
|
|
|
|
typedef unsigned int UTF32;
|
|
typedef unsigned short UTF16;
|
|
typedef unsigned char UTF8;
|
|
|
|
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
|
#define UNI_MAX_BMP (UTF32)0x0000FFFF
|
|
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
|
|
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
|
|
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
|
|
|
|
typedef enum {
|
|
conversionOK, /* conversion successful */
|
|
sourceExhausted, /* partial character in source, but hit end */
|
|
targetExhausted, /* insuff. room in target for conversion */
|
|
sourceIllegal /* source sequence is illegal/malformed */
|
|
} ConversionResult;
|
|
|
|
typedef enum {
|
|
strictConversion = 0,
|
|
lenientConversion
|
|
} ConversionFlags;
|
|
|
|
/* This is for C++ and does no harm in C */
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
ConversionResult ConvertUTF8toUTF16(
|
|
const UTF8 **sourceStart, const UTF8 *sourceEnd,
|
|
UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags);
|
|
|
|
ConversionResult ConvertUTF16toUTF8(
|
|
const UTF16 **sourceStart, const UTF16 *sourceEnd,
|
|
UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags);
|
|
|
|
ConversionResult ConvertUTF8toUTF32(
|
|
const UTF8 **sourceStart, const UTF8 *sourceEnd,
|
|
UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags);
|
|
|
|
ConversionResult ConvertUTF32toUTF8(
|
|
const UTF32 **sourceStart, const UTF32 *sourceEnd,
|
|
UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags);
|
|
|
|
ConversionResult ConvertUTF16toUTF32(
|
|
const UTF16 **sourceStart, const UTF16 *sourceEnd,
|
|
UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags);
|
|
|
|
ConversionResult ConvertUTF32toUTF16(
|
|
const UTF32 **sourceStart, const UTF32 *sourceEnd,
|
|
UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags);
|
|
|
|
bool isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
u_int32_t next_char(unsigned char **string);
|
|
|
|
const unsigned char *utf32toutf8(wchar_t *source, unsigned char *target, size_t size, int *len);
|
|
|
|
unsigned char *utf16toutf8(unsigned short *source, unsigned char *target, size_t size, int *len);
|
|
|
|
unsigned short *utf8toutf16(unsigned char *source, unsigned short *target, size_t size, int *len);
|
|
|
|
int utf8len(unsigned char *string);
|
|
|
|
int is_acsii(unsigned char *string);
|
|
|
|
size_t utf8_get_size(unsigned char *source, size_t num);
|
|
|
|
#endif /* SMS_INC_UTF_H_ */ |