aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRay <raysan5@gmail.com>2019-10-27 23:56:48 +0100
committerRay <raysan5@gmail.com>2019-10-27 23:56:48 +0100
commitf6df47dfe50fd3d760255fa1326be82200a8a84c (patch)
treeb7419bbbafbb8cffabd4fb8d4035624163046186
parent3f12fa54f7c9d9b278cf50dbc2b0aa01689f1072 (diff)
downloadraylib-f6df47dfe50fd3d760255fa1326be82200a8a84c.tar.gz
raylib-f6df47dfe50fd3d760255fa1326be82200a8a84c.zip
ADDED: TextToUtf8() -WIP-
RENAMED: TextCountCodepoints() -> GetCodepointsCount()
-rw-r--r--src/raylib.h11
-rw-r--r--src/text.c349
2 files changed, 201 insertions, 159 deletions
diff --git a/src/raylib.h b/src/raylib.h
index 9bc7c2fa..6c1e36f0 100644
--- a/src/raylib.h
+++ b/src/raylib.h
@@ -1188,14 +1188,11 @@ RLAPI void DrawTextRecEx(Font font, const char *text, Rectangle rec, float fontS
RLAPI int MeasureText(const char *text, int fontSize); // Measure string width for default font
RLAPI Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing); // Measure string size for Font
RLAPI int GetGlyphIndex(Font font, int character); // Get index position for a unicode character on font
-RLAPI int GetNextCodepoint(const char *text, int *bytesProcessed); // Returns next codepoint in a UTF8 encoded string; 0x3f('?') is returned on failure
-RLAPI int *GetCodepoints(const char *text, int *count); // Get all codepoints in a string, codepoints count returned by parameters
-// Text strings management functions
+// Text strings management functions (no utf8 strings, only byte chars)
// NOTE: Some strings allocate memory internally for returned strings, just be careful!
RLAPI bool TextIsEqual(const char *text1, const char *text2); // Check if two text string are equal
RLAPI unsigned int TextLength(const char *text); // Get text length, checks for '\0' ending
-RLAPI unsigned int TextCountCodepoints(const char *text); // Get total number of characters (codepoints) in a UTF8 encoded string
RLAPI const char *TextFormat(const char *text, ...); // Text formatting with variables (sprintf style)
RLAPI const char *TextSubtext(const char *text, int position, int length); // Get a piece of a text string
RLAPI char *TextReplace(char *text, const char *replace, const char *by); // Replace text string (memory should be freed!)
@@ -1208,6 +1205,12 @@ RLAPI const char *TextToUpper(const char *text); // Get upp
RLAPI const char *TextToLower(const char *text); // Get lower case version of provided string
RLAPI const char *TextToPascal(const char *text); // Get Pascal case notation version of provided string
RLAPI int TextToInteger(const char *text); // Get integer value from text (negative values not supported)
+RLAPI const char *TextToUtf8(int codepoint, int *byteLength); // Encode codepoint into utf8 text (char array length returned as parameter)
+
+// UTF8 text strings management functions
+RLAPI int *GetCodepoints(const char *text, int *count); // Get all codepoints in a string, codepoints count returned by parameters
+RLAPI int GetCodepointsCount(const char *text); // Get total number of characters (codepoints) in a UTF8 encoded string
+RLAPI int GetNextCodepoint(const char *text, int *bytesProcessed); // Returns next codepoint in a UTF8 encoded string; 0x3f('?') is returned on failure
//------------------------------------------------------------------------------------
// Basic 3d Shapes Drawing Functions (Module: models)
diff --git a/src/text.c b/src/text.c
index 09295ab7..66d3a725 100644
--- a/src/text.c
+++ b/src/text.c
@@ -777,140 +777,6 @@ void DrawFPS(int posX, int posY)
DrawText(TextFormat("%2i FPS", fps), posX, posY, 20, LIME);
}
-// Returns next codepoint in a UTF8 encoded text, scanning until '\0' is found
-// When a invalid UTF8 byte is encountered we exit as soon as possible and a '?'(0x3f) codepoint is returned
-// Total number of bytes processed are returned as a parameter
-// NOTE: the standard says U+FFFD should be returned in case of errors
-// but that character is not supported by the default font in raylib
-// TODO: optimize this code for speed!!
-int GetNextCodepoint(const char *text, int *bytesProcessed)
-{
-/*
- UTF8 specs from https://www.ietf.org/rfc/rfc3629.txt
-
- Char. number range | UTF-8 octet sequence
- (hexadecimal) | (binary)
- --------------------+---------------------------------------------
- 0000 0000-0000 007F | 0xxxxxxx
- 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
- 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
- 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-*/
-
- // NOTE: on decode errors we return as soon as possible
-
- int code = 0x3f; // Codepoint (defaults to '?')
- int octet = (unsigned char)(text[0]); // The first UTF8 octet
- *bytesProcessed = 1;
-
- if (octet <= 0x7f)
- {
- // Only one octet (ASCII range x00-7F)
- code = text[0];
- }
- else if ((octet & 0xe0) == 0xc0)
- {
- // Two octets
- // [0]xC2-DF [1]UTF8-tail(x80-BF)
- unsigned char octet1 = text[1];
-
- if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
-
- if ((octet >= 0xc2) && (octet <= 0xdf))
- {
- code = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
- *bytesProcessed = 2;
- }
- }
- else if ((octet & 0xf0) == 0xe0)
- {
- // Three octets
- unsigned char octet1 = text[1];
- unsigned char octet2 = '\0';
-
- if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
-
- octet2 = text[2];
-
- if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
-
- /*
- [0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF)
- [0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF)
- [0]xED [1]x80-9F [2]UTF8-tail(x80-BF)
- [0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF)
- */
-
- if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
- ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; }
-
- if ((octet >= 0xe0) && (0 <= 0xef))
- {
- code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
- *bytesProcessed = 3;
- }
- }
- else if ((octet & 0xf8) == 0xf0)
- {
- // Four octets
- if (octet > 0xf4) return code;
-
- unsigned char octet1 = text[1];
- unsigned char octet2 = '\0';
- unsigned char octet3 = '\0';
-
- if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
-
- octet2 = text[2];
-
- if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
-
- octet3 = text[3];
-
- if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; } // Unexpected sequence
-
- /*
- [0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail
- [0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail
- [0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail
- */
-
- if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
- ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence
-
- if (octet >= 0xf0)
- {
- code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
- *bytesProcessed = 4;
- }
- }
-
- if (code > 0x10ffff) code = 0x3f; // Codepoints after U+10ffff are invalid
-
- return code;
-}
-
-// Get all codepoints in a string, codepoints count returned by parameters
-int *GetCodepoints(const char *text, int *count)
-{
- static int codepoints[MAX_TEXT_UNICODE_CHARS] = { 0 };
- memset(codepoints, 0, MAX_TEXT_UNICODE_CHARS*sizeof(int));
-
- int bytesProcessed = 0;
- int textLength = strlen(text);
- int codepointsCount = 0;
-
- for (int i = 0; i < textLength; codepointsCount++)
- {
- codepoints[codepointsCount] = GetNextCodepoint(text + i, &bytesProcessed);
- i += bytesProcessed;
- }
-
- *count = codepointsCount;
-
- return codepoints;
-}
-
// Draw text (using default font)
// NOTE: fontSize work like in any drawing program but if fontSize is lower than font-base-size, then font-base-size is used
// NOTE: chars spacing is proportional to fontSize
@@ -1235,27 +1101,6 @@ unsigned int TextLength(const char *text)
return length;
}
-// Returns total number of characters(codepoints) in a UTF8 encoded text, until '\0' is found
-// NOTE: If an invalid UTF8 sequence is encountered a '?'(0x3f) codepoint is counted instead
-unsigned int TextCountCodepoints(const char *text)
-{
- unsigned int len = 0;
- char *ptr = (char *)&text[0];
-
- while (*ptr != '\0')
- {
- int next = 0;
- int letter = GetNextCodepoint(ptr, &next);
-
- if (letter == 0x3f) ptr += 1;
- else ptr += next;
-
- len++;
- }
-
- return len;
-}
-
// Formatting of text with variables to 'embed'
const char *TextFormat(const char *text, ...)
{
@@ -1531,6 +1376,200 @@ int TextToInteger(const char *text)
return result;
}
+
+// Encode codepoint into utf8 text (char array length returned as parameter)
+RLAPI const char *TextToUtf8(int codepoint, int *byteLength)
+{
+ static char utf8[6] = { 0 };
+ int length = 0;
+
+ if (codepoint <= 0x7f)
+ {
+ utf8[0] = (char)codepoint;
+ length = 1;
+ }
+ else if (codepoint <= 0x7ff)
+ {
+ utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0);
+ utf8[1] = (char)((codepoint & 0x3f) | 0x80);
+ length = 2;
+ }
+ else if (codepoint <= 0xffff)
+ {
+ utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0);
+ utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
+ utf8[2] = (char)((codepoint & 0x3f) | 0x80);
+ length = 3;
+ }
+ else if (codepoint <= 0x10ffff)
+ {
+ utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0);
+ utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80);
+ utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
+ utf8[3] = (char)((codepoint & 0x3f) | 0x80);
+ length = 4;
+ }
+
+ *byteLength = length;
+
+ return utf8;
+}
+
+
+// Get all codepoints in a string, codepoints count returned by parameters
+int *GetCodepoints(const char *text, int *count)
+{
+ static int codepoints[MAX_TEXT_UNICODE_CHARS] = { 0 };
+ memset(codepoints, 0, MAX_TEXT_UNICODE_CHARS*sizeof(int));
+
+ int bytesProcessed = 0;
+ int textLength = strlen(text);
+ int codepointsCount = 0;
+
+ for (int i = 0; i < textLength; codepointsCount++)
+ {
+ codepoints[codepointsCount] = GetNextCodepoint(text + i, &bytesProcessed);
+ i += bytesProcessed;
+ }
+
+ *count = codepointsCount;
+
+ return codepoints;
+}
+
+// Returns total number of characters(codepoints) in a UTF8 encoded text, until '\0' is found
+// NOTE: If an invalid UTF8 sequence is encountered a '?'(0x3f) codepoint is counted instead
+int GetCodepointsCount(const char *text)
+{
+ unsigned int len = 0;
+ char *ptr = (char *)&text[0];
+
+ while (*ptr != '\0')
+ {
+ int next = 0;
+ int letter = GetNextCodepoint(ptr, &next);
+
+ if (letter == 0x3f) ptr += 1;
+ else ptr += next;
+
+ len++;
+ }
+
+ return len;
+}
+
+
+// Returns next codepoint in a UTF8 encoded text, scanning until '\0' is found
+// When a invalid UTF8 byte is encountered we exit as soon as possible and a '?'(0x3f) codepoint is returned
+// Total number of bytes processed are returned as a parameter
+// NOTE: the standard says U+FFFD should be returned in case of errors
+// but that character is not supported by the default font in raylib
+// TODO: optimize this code for speed!!
+int GetNextCodepoint(const char *text, int *bytesProcessed)
+{
+/*
+ UTF8 specs from https://www.ietf.org/rfc/rfc3629.txt
+
+ Char. number range | UTF-8 octet sequence
+ (hexadecimal) | (binary)
+ --------------------+---------------------------------------------
+ 0000 0000-0000 007F | 0xxxxxxx
+ 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+ 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+ 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+*/
+ // NOTE: on decode errors we return as soon as possible
+
+ int code = 0x3f; // Codepoint (defaults to '?')
+ int octet = (unsigned char)(text[0]); // The first UTF8 octet
+ *bytesProcessed = 1;
+
+ if (octet <= 0x7f)
+ {
+ // Only one octet (ASCII range x00-7F)
+ code = text[0];
+ }
+ else if ((octet & 0xe0) == 0xc0)
+ {
+ // Two octets
+ // [0]xC2-DF [1]UTF8-tail(x80-BF)
+ unsigned char octet1 = text[1];
+
+ if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
+
+ if ((octet >= 0xc2) && (octet <= 0xdf))
+ {
+ code = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
+ *bytesProcessed = 2;
+ }
+ }
+ else if ((octet & 0xf0) == 0xe0)
+ {
+ // Three octets
+ unsigned char octet1 = text[1];
+ unsigned char octet2 = '\0';
+
+ if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
+
+ octet2 = text[2];
+
+ if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
+
+ /*
+ [0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF)
+ [0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF)
+ [0]xED [1]x80-9F [2]UTF8-tail(x80-BF)
+ [0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF)
+ */
+
+ if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
+ ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; }
+
+ if ((octet >= 0xe0) && (0 <= 0xef))
+ {
+ code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
+ *bytesProcessed = 3;
+ }
+ }
+ else if ((octet & 0xf8) == 0xf0)
+ {
+ // Four octets
+ if (octet > 0xf4) return code;
+
+ unsigned char octet1 = text[1];
+ unsigned char octet2 = '\0';
+ unsigned char octet3 = '\0';
+
+ if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
+
+ octet2 = text[2];
+
+ if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
+
+ octet3 = text[3];
+
+ if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; } // Unexpected sequence
+
+ /*
+ [0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail
+ [0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail
+ [0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail
+ */
+
+ if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
+ ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence
+
+ if (octet >= 0xf0)
+ {
+ code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
+ *bytesProcessed = 4;
+ }
+ }
+
+ if (code > 0x10ffff) code = 0x3f; // Codepoints after U+10ffff are invalid
+
+ return code;
+}
//----------------------------------------------------------------------------------
//----------------------------------------------------------------------------------