diff --git a/Marlin/Configuration_adv.h b/Marlin/Configuration_adv.h index ba56862d5d..b308b05e19 100644 --- a/Marlin/Configuration_adv.h +++ b/Marlin/Configuration_adv.h @@ -1256,6 +1256,10 @@ // Note: Only affects SCROLL_LONG_FILENAMES with SDSORT_CACHE_NAMES but not SDSORT_DYNAMIC_RAM. #endif + // Allow international symbols in long filenames. To display correctly, the + // LCD's font must contain the characters. Check your selected LCD language. + #define UTF_FILENAME_SUPPORT + // This allows hosts to request long names for files and folders with M33 //#define LONG_FILENAME_HOST_SUPPORT diff --git a/Marlin/src/lcd/fontutils.cpp b/Marlin/src/lcd/fontutils.cpp index 22b54c72de..4aaf621844 100644 --- a/Marlin/src/lcd/fontutils.cpp +++ b/Marlin/src/lcd/fontutils.cpp @@ -9,6 +9,8 @@ #include "../inc/MarlinConfig.h" +#define MAX_UTF8_CHAR_SIZE 4 + #if HAS_WIRED_LCD #include "marlinui.h" #include "../MarlinCore.h" @@ -79,6 +81,8 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t uint32_t val = 0; uint8_t *p = pstart; + #define NEXT_6_BITS() do{ val <<= 6; p++; valcur = cb_read_byte(p); val |= (valcur & 0x3F); }while(0) + uint8_t valcur = cb_read_byte(p); if (0 == (0x80 & valcur)) { val = valcur; @@ -86,74 +90,51 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t } else if (0xC0 == (0xE0 & valcur)) { val = valcur & 0x1F; - val <<= 6; - p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - p++; - } - else if (0xE0 == (0xF0 & valcur)) { - val = valcur & 0x0F; - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - p++; - } - else if (0xF0 == (0xF8 & valcur)) { - val = valcur & 0x07; - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - p++; - } - else if (0xF8 == (0xFC & valcur)) { - val = valcur & 0x03; - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - p++; - } - else if (0xFC == (0xFE & valcur)) { - val = valcur & 0x01; - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); - val <<= 6; p++; - valcur = cb_read_byte(p); - val |= (valcur & 0x3F); + NEXT_6_BITS(); p++; } + #if MAX_UTF8_CHAR_SIZE >= 3 + else if (0xE0 == (0xF0 & valcur)) { + val = valcur & 0x0F; + NEXT_6_BITS(); + NEXT_6_BITS(); + p++; + } + #endif + #if MAX_UTF8_CHAR_SIZE >= 4 + else if (0xF0 == (0xF8 & valcur)) { + val = valcur & 0x07; + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + p++; + } + #endif + #if MAX_UTF8_CHAR_SIZE >= 5 + else if (0xF8 == (0xFC & valcur)) { + val = valcur & 0x03; + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + p++; + } + #endif + #if MAX_UTF8_CHAR_SIZE >= 6 + else if (0xFC == (0xFE & valcur)) { + val = valcur & 0x01; + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + p++; + } + #endif else if (0x80 == (0xC0 & valcur)) for (; 0x80 == (0xC0 & valcur); ) { p++; valcur = cb_read_byte(p); } else - for (; ((0xFE & valcur) > 0xFC); ) { p++; valcur = cb_read_byte(p); } + for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); } if (pval) *pval = val; diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index 46ed9372ab..acc5ba17f2 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -1103,19 +1103,67 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { if (WITHIN(seq, 1, MAX_VFAT_ENTRIES)) { // TODO: Store the filename checksum to verify if a long-filename-unaware system modified the file table. n = (seq - 1) * (FILENAME_LENGTH); - LOOP_L_N(i, FILENAME_LENGTH) - longFilename[n + i] = (i < 5) ? VFAT->name1[i] : (i < 11) ? VFAT->name2[i - 5] : VFAT->name3[i - 11]; + LOOP_L_N(i, FILENAME_LENGTH) { + uint16_t utf16_ch = (i < 5) ? VFAT->name1[i] : (i < 11) ? VFAT->name2[i - 5] : VFAT->name3[i - 11]; + #if ENABLED(UTF_FILENAME_SUPPORT) + // We can't reconvert to UTF-8 here as UTF-8 is variable-size encoding, but joining LFN blocks + // needs static bytes addressing. So here just store full UTF-16LE words to re-convert later. + uint16_t idx = (n + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding + longFilename[idx] = utf16_ch & 0xFF; + longFilename[idx+1] = (utf16_ch >> 8) & 0xFF; + #else + // Replace all multibyte characters to '_' + longFilename[n + i] = (utf16_ch > 0xFF) ? '_' : (utf16_ch & 0xFF); + #endif + } // If this VFAT entry is the last one, add a NUL terminator at the end of the string - if (VFAT->sequenceNumber & 0x40) longFilename[n + FILENAME_LENGTH] = '\0'; + if (VFAT->sequenceNumber & 0x40) longFilename[(n + FILENAME_LENGTH) * LONG_FILENAME_CHARSIZE] = '\0'; } } } + // Return if normal file or subdirectory - if (DIR_IS_FILE_OR_SUBDIR(dir)) return n; + if (DIR_IS_FILE_OR_SUBDIR(dir)) { + #if ENABLED(UTF_FILENAME_SUPPORT) + // Convert filename from utf-16 to utf-8 as Marlin expects + #if LONG_FILENAME_CHARSIZE > 2 + // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte + // codepoints to 3-byte in-place will break the rest of filename) + #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." + #endif + uint16_t currentPos = 0; + LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) { + uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding + + uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' + longFilename[currentPos++] = '_'; + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte utf-8 char + longFilename[currentPos++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte utf-8 char + longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F); + } + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte utf-8 char + longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F); + #else // Encode as '_' + longFilename[currentPos++] = '_'; + #endif + } + + if (0 == utf16_ch) break; // End of filename + } + return currentPos; + #else + return n; + #endif + } } } - // Read next directory entry into the cache // Assumes file is correctly positioned dir_t* SdBaseFile::readDirCache() { diff --git a/Marlin/src/sd/SdFatConfig.h b/Marlin/src/sd/SdFatConfig.h index 8f0596c5dd..13ac3a7487 100644 --- a/Marlin/src/sd/SdFatConfig.h +++ b/Marlin/src/sd/SdFatConfig.h @@ -103,5 +103,10 @@ #define FILENAME_LENGTH 13 // Number of UTF-16 characters per entry +// UTF-8 may use up to 3 bytes to represent single UTF-16 code point. +// We discard 3-byte characters allowing only 2-bytes +// or 1-byte if UTF_FILENAME_SUPPORT disabled. +#define LONG_FILENAME_CHARSIZE TERN(UTF_FILENAME_SUPPORT, 2, 1) + // Total bytes needed to store a single long filename -#define LONG_FILENAME_LENGTH (FILENAME_LENGTH * MAX_VFAT_ENTRIES + 1) +#define LONG_FILENAME_LENGTH (FILENAME_LENGTH * LONG_FILENAME_CHARSIZE * MAX_VFAT_ENTRIES + 1) diff --git a/buildroot/share/fonts/genpages.c b/buildroot/share/fonts/genpages.c index 2a87b19d47..c855ceac50 100644 --- a/buildroot/share/fonts/genpages.c +++ b/buildroot/share/fonts/genpages.c @@ -71,63 +71,49 @@ uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) { assert(NULL != pstart); + #define NEXT_6_BITS() do{ val <<= 6; p++; val |= (*p & 0x3F); }while(0) + if (0 == (0x80 & *p)) { val = (size_t)*p; p++; } else if (0xC0 == (0xE0 & *p)) { val = *p & 0x1F; - val <<= 6; - p++; - val |= (*p & 0x3F); + NEXT_6_BITS(); p++; assert((wchar_t)val == get_val_utf82uni(pstart)); } else if (0xE0 == (0xF0 & *p)) { val = *p & 0x0F; - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); + NEXT_6_BITS(); + NEXT_6_BITS(); p++; assert((wchar_t)val == get_val_utf82uni(pstart)); } else if (0xF0 == (0xF8 & *p)) { val = *p & 0x07; - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); p++; assert((wchar_t)val == get_val_utf82uni(pstart)); } else if (0xF8 == (0xFC & *p)) { val = *p & 0x03; - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); p++; assert((wchar_t)val == get_val_utf82uni(pstart)); } else if (0xFC == (0xFE & *p)) { val = *p & 0x01; - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); - val <<= 6; p++; - val |= (*p & 0x3F); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); + NEXT_6_BITS(); p++; assert((wchar_t)val == get_val_utf82uni(pstart)); }