From 5e6fe5ea09669e5b3492915a84a30862294baf45 Mon Sep 17 00:00:00 2001 From: Holger Schemel Date: Tue, 29 Mar 2022 23:12:22 +0200 Subject: [PATCH] added converting certain text files from UTF-8 to internal text format When reading text files with title screen messages, credits, level set information or element descriptions, text in UTF-8 character encoding will be converted on-the-fly to the internally used ISO-Latin-1 format. --- src/libgame/misc.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++ src/libgame/misc.h | 7 ++++++ src/libgame/text.c | 9 +++++++ 3 files changed, 74 insertions(+) diff --git a/src/libgame/misc.c b/src/libgame/misc.c index 239a7d8c..05599b77 100644 --- a/src/libgame/misc.c +++ b/src/libgame/misc.c @@ -1946,6 +1946,64 @@ char *getLatin1FromUTF8(char *utf8) return latin1; } +int getTextEncoding(char *text) +{ + unsigned char *src = (unsigned char *)text; + int encoding = TEXT_ENCODING_ASCII; // default: assume encoding is ASCII + + while (*src) + { + if (*src >= 128) + encoding = TEXT_ENCODING_UTF_8; // non-ASCII character: assume UTF-8 + + if (*src < 128) + { + src++; + } + else if (src[0] >= 192 && src[0] < 224 && + src[1] >= 128 && src[1] < 192) + { + src += 2; + } + else if (src[0] >= 224 && src[0] < 240 && + src[1] >= 128 && src[1] < 192 && + src[2] >= 128 && src[2] < 192) + { + src += 3; + } + else if (src[0] >= 240 && src[0] < 248 && + src[1] >= 128 && src[1] < 192 && + src[2] >= 128 && src[2] < 192 && + src[3] >= 128 && src[3] < 192) + { + src += 4; + } + else if (src[0] >= 248 && src[0] < 252 && + src[1] >= 128 && src[1] < 192 && + src[2] >= 128 && src[2] < 192 && + src[3] >= 128 && src[3] < 192 && + src[4] >= 128 && src[4] < 192) + { + src += 5; + } + else if (src[0] >= 252 && src[0] < 254 && + src[1] >= 128 && src[1] < 192 && + src[2] >= 128 && src[2] < 192 && + src[3] >= 128 && src[3] < 192 && + src[4] >= 128 && src[4] < 192 && + src[5] >= 128 && src[5] < 192) + { + src += 6; + } + else + { + return TEXT_ENCODING_UNKNOWN; // non-UTF-8 character: unknown encoding + } + } + + return encoding; +} + // ---------------------------------------------------------------------------- // functions for JSON handling diff --git a/src/libgame/misc.h b/src/libgame/misc.h index 19ac4be4..6c53d318 100644 --- a/src/libgame/misc.h +++ b/src/libgame/misc.h @@ -45,6 +45,11 @@ #define BIT_ORDER_MSB 0 #define BIT_ORDER_LSB 1 +// values for character encoding +#define TEXT_ENCODING_UNKNOWN 0 +#define TEXT_ENCODING_ASCII 1 +#define TEXT_ENCODING_UTF_8 2 + // values for createDirectory() #define PERMS_PRIVATE 0 #define PERMS_PUBLIC 1 @@ -233,6 +238,8 @@ void WriteUnusedBytesToFile(FILE *, unsigned int); char *getUTF8FromLatin1(char *); char *getLatin1FromUTF8(char *); +int getTextEncoding(char *); + char *getEscapedJSON(char *); char *getKeyNameFromKey(Key); diff --git a/src/libgame/text.c b/src/libgame/text.c index 36096240..eb36ab22 100644 --- a/src/libgame/text.c +++ b/src/libgame/text.c @@ -371,6 +371,15 @@ char *GetTextBufferFromFile(char *filename, int max_lines) closeFile(file); + if (getTextEncoding(buffer) == TEXT_ENCODING_UTF_8) + { + char *body_latin1 = getLatin1FromUTF8(buffer); + + checked_free(buffer); + + buffer = body_latin1; + } + return buffer; } -- 2.34.1