@ -1043,7 +1043,7 @@ void DrawTextEx(Font font, const char *text, Vector2 position, float fontSize, f
{
/ / Get next codepoint from byte string and glyph index in font
int codepointByteCount = 0 ;
int codepoint = GetCodepoint ( & text [ i ] , & codepointByteCount ) ;
int codepoint = GetCodepointNext ( & text [ i ] , & codepointByteCount ) ;
int index = GetGlyphIndex ( font , codepoint ) ;
/ / NOTE : Normally we exit the decoding sequence as soon as a bad byte is found ( and return 0x3f )
@ -1185,7 +1185,7 @@ Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing
byteCounter + + ;
int next = 0 ;
letter = GetCodepoint ( & text [ i ] , & next ) ;
letter = GetCodepointNext ( & text [ i ] , & next ) ;
index = GetGlyphIndex ( font , letter ) ;
/ / NOTE : normally we exit the decoding sequence as soon as a bad byte is found ( and return 0x3f )
@ -1627,7 +1627,7 @@ const char *TextToPascal(const char *text)
/ / Encode text codepoint into UTF - 8 text
/ / REQUIRES : memcpy ( )
/ / WARNING : Allocated memory must be manually freed
char * TextCodepointsTo UTF8( const int * codepoints , int length )
char * Load UTF8( const int * codepoints , int length )
{
/ / We allocate enough memory fo fit all possible codepoints
/ / NOTE : 5 bytes for every codepoint should be enough
@ -1650,43 +1650,10 @@ char *TextCodepointsToUTF8(const int *codepoints, int length)
return text ;
}
/ / Encode codepoint into utf8 text ( char array length returned as parameter )
/ / NOTE : It uses a static array to store UTF - 8 bytes
RLAPI const char * CodepointToUTF8 ( int codepoint , int * byteSize )
/ / Unload UTF - 8 text encoded from codepoints array
void UnloadUTF8 ( char * text )
{
static char utf8 [ 6 ] = { 0 } ;
int size = 0 ; / / Byte size of codepoint
if ( codepoint < = 0x7f )
{
utf8 [ 0 ] = ( char ) codepoint ;
size = 1 ;
}
else if ( codepoint < = 0x7ff )
{
utf8 [ 0 ] = ( char ) ( ( ( codepoint > > 6 ) & 0x1f ) | 0xc0 ) ;
utf8 [ 1 ] = ( char ) ( ( codepoint & 0x3f ) | 0x80 ) ;
size = 2 ;
}
else if ( codepoint < = 0xffff )
{
utf8 [ 0 ] = ( char ) ( ( ( codepoint > > 12 ) & 0x0f ) | 0xe0 ) ;
utf8 [ 1 ] = ( char ) ( ( ( codepoint > > 6 ) & 0x3f ) | 0x80 ) ;
utf8 [ 2 ] = ( char ) ( ( codepoint & 0x3f ) | 0x80 ) ;
size = 3 ;
}
else if ( codepoint < = 0x10ffff )
{
utf8 [ 0 ] = ( char ) ( ( ( codepoint > > 18 ) & 0x07 ) | 0xf0 ) ;
utf8 [ 1 ] = ( char ) ( ( ( codepoint > > 12 ) & 0x3f ) | 0x80 ) ;
utf8 [ 2 ] = ( char ) ( ( ( codepoint > > 6 ) & 0x3f ) | 0x80 ) ;
utf8 [ 3 ] = ( char ) ( ( codepoint & 0x3f ) | 0x80 ) ;
size = 4 ;
}
* byteSize = size ;
return utf8 ;
RL_FREE ( text ) ;
}
/ / Load all codepoints from a UTF - 8 text string , codepoints count returned by parameter
@ -1694,7 +1661,7 @@ int *LoadCodepoints(const char *text, int *count)
{
int textLength = TextLength ( text ) ;
int bytesProcessed = 0 ;
int codepointSize = 0 ;
int codepointCount = 0 ;
/ / Allocate a big enough buffer to store as many codepoints as text bytes
@ -1702,8 +1669,8 @@ int *LoadCodepoints(const char *text, int *count)
for ( int i = 0 ; i < textLength ; codepointCount + + )
{
codepoints [ codepointCount ] = GetCodepoint ( text + i , & bytesProcessed ) ;
i + = bytesProcessed ;
codepoints [ codepointCount ] = GetCodepointNext ( text + i , & codepointSize ) ;
i + = codepointSize ;
}
/ / Re - allocate buffer to the actual number of codepoints loaded
@ -1731,7 +1698,7 @@ int GetCodepointCount(const char *text)
while ( * ptr ! = ' \0 ' )
{
int next = 0 ;
int letter = GetCodepoint ( ptr , & next ) ;
int letter = GetCodepointNext ( ptr , & next ) ;
if ( letter = = 0x3f ) ptr + = 1 ;
else ptr + = next ;
@ -1741,6 +1708,45 @@ int GetCodepointCount(const char *text)
return length ;
}
/ / Encode codepoint into utf8 text ( char array length returned as parameter )
/ / NOTE : It uses a static array to store UTF - 8 bytes
const char * CodepointToUTF8 ( int codepoint , int * utf8Size )
{
static char utf8 [ 6 ] = { 0 } ;
int size = 0 ; / / Byte size of codepoint
if ( codepoint < = 0x7f )
{
utf8 [ 0 ] = ( char ) codepoint ;
size = 1 ;
}
else if ( codepoint < = 0x7ff )
{
utf8 [ 0 ] = ( char ) ( ( ( codepoint > > 6 ) & 0x1f ) | 0xc0 ) ;
utf8 [ 1 ] = ( char ) ( ( codepoint & 0x3f ) | 0x80 ) ;
size = 2 ;
}
else if ( codepoint < = 0xffff )
{
utf8 [ 0 ] = ( char ) ( ( ( codepoint > > 12 ) & 0x0f ) | 0xe0 ) ;
utf8 [ 1 ] = ( char ) ( ( ( codepoint > > 6 ) & 0x3f ) | 0x80 ) ;
utf8 [ 2 ] = ( char ) ( ( codepoint & 0x3f ) | 0x80 ) ;
size = 3 ;
}
else if ( codepoint < = 0x10ffff )
{
utf8 [ 0 ] = ( char ) ( ( ( codepoint > > 18 ) & 0x07 ) | 0xf0 ) ;
utf8 [ 1 ] = ( char ) ( ( ( codepoint > > 12 ) & 0x3f ) | 0x80 ) ;
utf8 [ 2 ] = ( char ) ( ( ( codepoint > > 6 ) & 0x3f ) | 0x80 ) ;
utf8 [ 3 ] = ( char ) ( ( codepoint & 0x3f ) | 0x80 ) ;
size = 4 ;
}
* utf8Size = size ;
return utf8 ;
}
# endif / / SUPPORT_TEXT_MANIPULATION
/ / Get next codepoint in a UTF - 8 encoded text , scanning until ' \0 ' is found
@ -1748,7 +1754,7 @@ int GetCodepointCount(const char *text)
/ / Total number of bytes processed are returned as a parameter
/ / NOTE : The standard says U + FFFD should be returned in case of errors
/ / but that character is not supported by the default font in raylib
int GetCodepoint ( const char * text , int * bytesProcessed )
int GetCodepoint ( const char * text , int * codepointSize )
{
/*
UTF - 8 specs from https : / / www . ietf . org / rfc / rfc3629 . txt
@ -1763,14 +1769,14 @@ int GetCodepoint(const char *text, int *bytesProcessed)
*/
/ / NOTE : on decode errors we return as soon as possible
int code = 0x3f ; / / Codepoint ( defaults to ' ? ' )
int codepoint = 0x3f ; / / Codepoint ( defaults to ' ? ' )
int octet = ( unsigned char ) ( text [ 0 ] ) ; / / The first UTF8 octet
* bytesProcessed = 1 ;
* codepointSize = 1 ;
if ( octet < = 0x7f )
{
/ / Only one octet ( ASCII range x00 - 7F )
code = text [ 0 ] ;
codepoint = text [ 0 ] ;
}
else if ( ( octet & 0xe0 ) = = 0xc0 )
{
@ -1779,12 +1785,12 @@ int GetCodepoint(const char *text, int *bytesProcessed)
/ / [ 0 ] xC2 - DF [ 1 ] UTF8 - tail ( x80 - BF )
unsigned char octet1 = text [ 1 ] ;
if ( ( octet1 = = ' \0 ' ) | | ( ( octet1 > > 6 ) ! = 2 ) ) { * bytesProcessed = 2 ; return code ; } / / Unexpected sequence
if ( ( octet1 = = ' \0 ' ) | | ( ( octet1 > > 6 ) ! = 2 ) ) { * codepointSize = 2 ; return codepoint ; } / / Unexpected sequence
if ( ( octet > = 0xc2 ) & & ( octet < = 0xdf ) )
{
code = ( ( octet & 0x1f ) < < 6 ) | ( octet1 & 0x3f ) ;
* bytesProcessed = 2 ;
codepoint = ( ( octet & 0x1f ) < < 6 ) | ( octet1 & 0x3f ) ;
* codepointSize = 2 ;
}
}
else if ( ( octet & 0xf0 ) = = 0xe0 )
@ -1793,11 +1799,11 @@ int GetCodepoint(const char *text, int *bytesProcessed)
unsigned char octet1 = text [ 1 ] ;
unsigned char octet2 = ' \0 ' ;
if ( ( octet1 = = ' \0 ' ) | | ( ( octet1 > > 6 ) ! = 2 ) ) { * bytesProcessed = 2 ; return code ; } / / Unexpected sequence
if ( ( octet1 = = ' \0 ' ) | | ( ( octet1 > > 6 ) ! = 2 ) ) { * codepointSize = 2 ; return codepoint ; } / / Unexpected sequence
octet2 = text [ 2 ] ;
if ( ( octet2 = = ' \0 ' ) | | ( ( octet2 > > 6 ) ! = 2 ) ) { * bytesProcessed = 3 ; return code ; } / / Unexpected sequence
if ( ( octet2 = = ' \0 ' ) | | ( ( octet2 > > 6 ) ! = 2 ) ) { * codepointSize = 3 ; return codepoint ; } / / Unexpected sequence
/ / [ 0 ] xE0 [ 1 ] xA0 - BF [ 2 ] UTF8 - tail ( x80 - BF )
/ / [ 0 ] xE1 - EC [ 1 ] UTF8 - tail [ 2 ] UTF8 - tail ( x80 - BF )
@ -1805,50 +1811,105 @@ int GetCodepoint(const char *text, int *bytesProcessed)
/ / [ 0 ] xEE - EF [ 1 ] UTF8 - tail [ 2 ] UTF8 - tail ( x80 - BF )
if ( ( ( octet = = 0xe0 ) & & ! ( ( octet1 > = 0xa0 ) & & ( octet1 < = 0xbf ) ) ) | |
( ( octet = = 0xed ) & & ! ( ( octet1 > = 0x80 ) & & ( octet1 < = 0x9f ) ) ) ) { * bytesProcessed = 2 ; return code ; }
( ( octet = = 0xed ) & & ! ( ( octet1 > = 0x80 ) & & ( octet1 < = 0x9f ) ) ) ) { * codepointSize = 2 ; return codepoint ; }
if ( ( octet > = 0xe0 ) & & ( octet < = 0xef ) )
{
code = ( ( octet & 0xf ) < < 12 ) | ( ( octet1 & 0x3f ) < < 6 ) | ( octet2 & 0x3f ) ;
* bytesProcessed = 3 ;
codepoint = ( ( octet & 0xf ) < < 12 ) | ( ( octet1 & 0x3f ) < < 6 ) | ( octet2 & 0x3f ) ;
* codepointSize = 3 ;
}
}
else if ( ( octet & 0xf8 ) = = 0xf0 )
{
/ / Four octets
if ( octet > 0xf4 ) return code ;
if ( octet > 0xf4 ) return codepoint ;
unsigned char octet1 = text [ 1 ] ;
unsigned char octet2 = ' \0 ' ;
unsigned char octet3 = ' \0 ' ;
if ( ( octet1 = = ' \0 ' ) | | ( ( octet1 > > 6 ) ! = 2 ) ) { * bytesProcessed = 2 ; return code ; } / / Unexpected sequence
if ( ( octet1 = = ' \0 ' ) | | ( ( octet1 > > 6 ) ! = 2 ) ) { * codepointSize = 2 ; return codepoint ; } / / Unexpected sequence
octet2 = text [ 2 ] ;
if ( ( octet2 = = ' \0 ' ) | | ( ( octet2 > > 6 ) ! = 2 ) ) { * bytesProcessed = 3 ; return code ; } / / Unexpected sequence
if ( ( octet2 = = ' \0 ' ) | | ( ( octet2 > > 6 ) ! = 2 ) ) { * codepointSize = 3 ; return codepoint ; } / / Unexpected sequence
octet3 = text [ 3 ] ;
if ( ( octet3 = = ' \0 ' ) | | ( ( octet3 > > 6 ) ! = 2 ) ) { * bytesProcessed = 4 ; return code ; } / / Unexpected sequence
if ( ( octet3 = = ' \0 ' ) | | ( ( octet3 > > 6 ) ! = 2 ) ) { * codepointSize = 4 ; return codepoint ; } / / Unexpected sequence
/ / [ 0 ] xF0 [ 1 ] x90 - BF [ 2 ] UTF8 - tail [ 3 ] UTF8 - tail
/ / [ 0 ] xF1 - F3 [ 1 ] UTF8 - tail [ 2 ] UTF8 - tail [ 3 ] UTF8 - tail
/ / [ 0 ] xF4 [ 1 ] x80 - 8F [ 2 ] UTF8 - tail [ 3 ] UTF8 - tail
if ( ( ( octet = = 0xf0 ) & & ! ( ( octet1 > = 0x90 ) & & ( octet1 < = 0xbf ) ) ) | |
( ( octet = = 0xf4 ) & & ! ( ( octet1 > = 0x80 ) & & ( octet1 < = 0x8f ) ) ) ) { * bytesProcessed = 2 ; return code ; } / / Unexpected sequence
( ( octet = = 0xf4 ) & & ! ( ( octet1 > = 0x80 ) & & ( octet1 < = 0x8f ) ) ) ) { * codepointSize = 2 ; return codepoint ; } / / Unexpected sequence
if ( octet > = 0xf0 )
{
code = ( ( octet & 0x7 ) < < 18 ) | ( ( octet1 & 0x3f ) < < 12 ) | ( ( octet2 & 0x3f ) < < 6 ) | ( octet3 & 0x3f ) ;
* bytesProcessed = 4 ;
codepoint = ( ( octet & 0x7 ) < < 18 ) | ( ( octet1 & 0x3f ) < < 12 ) | ( ( octet2 & 0x3f ) < < 6 ) | ( octet3 & 0x3f ) ;
* codepointSize = 4 ;
}
}
if ( code > 0x10ffff ) code = 0x3f ; / / Codepoints after U + 10ff ff are invalid
if ( codepoint > 0x10ffff ) codepoint = 0x3f ; / / Codepoints after U + 10ff ff are invalid
return codepoint ;
}
/ / Get next codepoint in a byte sequence and bytes processed
int GetCodepointNext ( const char * text , int * codepointSize )
{
const char * ptr = text ;
int codepoint = 0x3f ; / / Codepoint ( defaults to ' ? ' )
* codepointSize = 0 ;
/ / Get current codepoint and bytes processed
if ( 0xf0 = = ( 0xf8 & ptr [ 0 ] ) )
{
/ / 4 byte UTF - 8 codepoint
codepoint = ( ( 0x07 & ptr [ 0 ] ) < < 18 ) | ( ( 0x3f & ptr [ 1 ] ) < < 12 ) | ( ( 0x3f & ptr [ 2 ] ) < < 6 ) | ( 0x3f & ptr [ 3 ] ) ;
* codepointSize = 4 ;
}
else if ( 0xe0 = = ( 0xf0 & ptr [ 0 ] ) )
{
/ / 3 byte UTF - 8 codepoint */
codepoint = ( ( 0x0f & ptr [ 0 ] ) < < 12 ) | ( ( 0x3f & ptr [ 1 ] ) < < 6 ) | ( 0x3f & ptr [ 2 ] ) ;
* codepointSize = 3 ;
}
else if ( 0xc0 = = ( 0xe0 & ptr [ 0 ] ) )
{
/ / 2 byte UTF - 8 codepoint
codepoint = ( ( 0x1f & ptr [ 0 ] ) < < 6 ) | ( 0x3f & ptr [ 1 ] ) ;
* codepointSize = 2 ;
}
else
{
/ / 1 byte UTF - 8 codepoint
codepoint = ptr [ 0 ] ;
* codepointSize = 1 ;
}
return codepoint ;
}
/ / Get previous codepoint in a byte sequence and bytes processed
int GetCodepointPrevious ( const char * text , int * codepointSize )
{
const char * ptr = text ;
int codepoint = 0x3f ; / / Codepoint ( defaults to ' ? ' )
int cpSize = 0 ;
* codepointSize = 0 ;
/ / Move to previous codepoint
do ptr - - ;
while ( ( ( 0x80 & ptr [ 0 ] ) ! = 0 ) & & ( ( 0xc0 & ptr [ 0 ] ) = = 0x80 ) ) ;
codepoint = GetCodepointNext ( ptr , & cpSize ) ;
if ( codepoint ! = 0 ) * codepointSize = cpSize ;
return code ;
return codepoint ;
}
/ / - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -