|
|
@ -1377,45 +1377,28 @@ int TextToInteger(const char *text) |
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
// Encode codepoint into utf8 text (kt">char array length returned as parameter) |
|
|
|
RLAPI const char *TextToUtf8(int codepoint, int o">*byteLength) |
|
|
|
// Encode text codepoint into utf8 text (n">memory must be freed!) |
|
|
|
char *TextToUtf8(int o">*codepoints, int n">length) |
|
|
|
{ |
|
|
|
static char utf8[6] = { 0 }; |
|
|
|
int length = 0; |
|
|
|
|
|
|
|
if (codepoint <= 0x7f) |
|
|
|
{ |
|
|
|
utf8[0] = (char)codepoint; |
|
|
|
length = 1; |
|
|
|
} |
|
|
|
else if (codepoint <= 0x7ff) |
|
|
|
{ |
|
|
|
utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0); |
|
|
|
utf8[1] = (char)((codepoint & 0x3f) | 0x80); |
|
|
|
length = 2; |
|
|
|
} |
|
|
|
else if (codepoint <= 0xffff) |
|
|
|
{ |
|
|
|
utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0); |
|
|
|
utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80); |
|
|
|
utf8[2] = (char)((codepoint & 0x3f) | 0x80); |
|
|
|
length = 3; |
|
|
|
} |
|
|
|
else if (codepoint <= 0x10ffff) |
|
|
|
// We allocate enough memory fo fit all possible codepoints |
|
|
|
// NOTE: 5 bytes for every codepoint should be enough |
|
|
|
char *text = (char *)calloc(length*5, 1); |
|
|
|
const char *utf8 = NULL; |
|
|
|
int size = 0; |
|
|
|
|
|
|
|
for (int i = 0, bytes = 0; i < length; i++) |
|
|
|
{ |
|
|
|
utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0); |
|
|
|
utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80); |
|
|
|
utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80); |
|
|
|
utf8[3] = (char)((codepoint & 0x3f) | 0x80); |
|
|
|
length = 4; |
|
|
|
utf8 = CodepointToUtf8(codepoints[i], &bytes); |
|
|
|
strncpy(text + size, utf8, bytes); |
|
|
|
size += bytes; |
|
|
|
} |
|
|
|
|
|
|
|
*byteLength = length; |
|
|
|
|
|
|
|
return utf8; |
|
|
|
|
|
|
|
// Resize memory to text length + string NULL terminator |
|
|
|
realloc(text, size + 1); |
|
|
|
|
|
|
|
return text; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Get all codepoints in a string, codepoints count returned by parameters |
|
|
|
int *GetCodepoints(const char *text, int *count) |
|
|
|
{ |
|
|
@ -1570,6 +1553,44 @@ int GetNextCodepoint(const char *text, int *bytesProcessed) |
|
|
|
|
|
|
|
return code; |
|
|
|
} |
|
|
|
|
|
|
|
// Encode codepoint into utf8 text (char array length returned as parameter) |
|
|
|
RLAPI const char *CodepointToUtf8(int codepoint, int *byteLength) |
|
|
|
{ |
|
|
|
static char utf8[6] = { 0 }; |
|
|
|
int length = 0; |
|
|
|
|
|
|
|
if (codepoint <= 0x7f) |
|
|
|
{ |
|
|
|
utf8[0] = (char)codepoint; |
|
|
|
length = 1; |
|
|
|
} |
|
|
|
else if (codepoint <= 0x7ff) |
|
|
|
{ |
|
|
|
utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0); |
|
|
|
utf8[1] = (char)((codepoint & 0x3f) | 0x80); |
|
|
|
length = 2; |
|
|
|
} |
|
|
|
else if (codepoint <= 0xffff) |
|
|
|
{ |
|
|
|
utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0); |
|
|
|
utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80); |
|
|
|
utf8[2] = (char)((codepoint & 0x3f) | 0x80); |
|
|
|
length = 3; |
|
|
|
} |
|
|
|
else if (codepoint <= 0x10ffff) |
|
|
|
{ |
|
|
|
utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0); |
|
|
|
utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80); |
|
|
|
utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80); |
|
|
|
utf8[3] = (char)((codepoint & 0x3f) | 0x80); |
|
|
|
length = 4; |
|
|
|
} |
|
|
|
|
|
|
|
*byteLength = length; |
|
|
|
|
|
|
|
return utf8; |
|
|
|
} |
|
|
|
//---------------------------------------------------------------------------------- |
|
|
|
|
|
|
|
//---------------------------------------------------------------------------------- |
|
|
|