|
@ -75,6 +75,11 @@ int pf_bsearch_r(void *userdata, size_t num_data, pf_bsearch_cb_comp_t cb_comp, |
|
|
return -1; |
|
|
return -1; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/* Returns true if passed byte is first byte of UTF-8 char sequence */ |
|
|
|
|
|
static inline bool utf8_is_start_byte_of_char(const uint8_t b) { |
|
|
|
|
|
return 0x80 != (b & 0xC0); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
/* This function gets the character at the pstart position, interpreting UTF8 multibyte sequences
|
|
|
/* This function gets the character at the pstart position, interpreting UTF8 multibyte sequences
|
|
|
and returns the pointer to the next character */ |
|
|
and returns the pointer to the next character */ |
|
|
uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t *pval) { |
|
|
uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t *pval) { |
|
@ -131,8 +136,8 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t |
|
|
p++; |
|
|
p++; |
|
|
} |
|
|
} |
|
|
#endif |
|
|
#endif |
|
|
else if (0x80 == (0xC0 & valcur)) |
|
|
else if (!utf8_is_start_byte_of_char(valcur)) |
|
|
for (; 0x80 == (0xC0 & valcur); ) { p++; valcur = cb_read_byte(p); } |
|
|
for (; !utf8_is_start_byte_of_char(valcur); ) { p++; valcur = cb_read_byte(p); } |
|
|
else |
|
|
else |
|
|
for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); } |
|
|
for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); } |
|
|
|
|
|
|
|
@ -143,12 +148,12 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t |
|
|
|
|
|
|
|
|
static inline uint8_t utf8_strlen_cb(const char *pstart, read_byte_cb_t cb_read_byte) { |
|
|
static inline uint8_t utf8_strlen_cb(const char *pstart, read_byte_cb_t cb_read_byte) { |
|
|
uint8_t cnt = 0; |
|
|
uint8_t cnt = 0; |
|
|
uint8_t *pnext = (uint8_t *)pstart; |
|
|
uint8_t *p = (uint8_t *)pstart; |
|
|
for (;;) { |
|
|
for (;;) { |
|
|
wchar_t ch; |
|
|
const uint8_t b = cb_read_byte(p); |
|
|
pnext = get_utf8_value_cb(pnext, cb_read_byte, &ch); |
|
|
if (!b) break; |
|
|
if (!ch) break; |
|
|
if (utf8_is_start_byte_of_char(b)) cnt++; |
|
|
cnt++; |
|
|
p++; |
|
|
} |
|
|
} |
|
|
return cnt; |
|
|
return cnt; |
|
|
} |
|
|
} |
|
@ -160,3 +165,26 @@ uint8_t utf8_strlen(const char *pstart) { |
|
|
uint8_t utf8_strlen_P(PGM_P pstart) { |
|
|
uint8_t utf8_strlen_P(PGM_P pstart) { |
|
|
return utf8_strlen_cb(pstart, read_byte_rom); |
|
|
return utf8_strlen_cb(pstart, read_byte_rom); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static inline uint8_t utf8_byte_pos_by_char_num_cb(const char *pstart, read_byte_cb_t cb_read_byte, const uint8_t charnum) { |
|
|
|
|
|
uint8_t *p = (uint8_t *)pstart; |
|
|
|
|
|
uint8_t char_idx = 0; |
|
|
|
|
|
uint8_t byte_idx = 0; |
|
|
|
|
|
for (;;) { |
|
|
|
|
|
const uint8_t b = cb_read_byte(p + byte_idx); |
|
|
|
|
|
if (!b) return byte_idx; // Termination byte of string
|
|
|
|
|
|
if (utf8_is_start_byte_of_char(b)) { |
|
|
|
|
|
char_idx++; |
|
|
|
|
|
if (char_idx == charnum + 1) return byte_idx; |
|
|
|
|
|
} |
|
|
|
|
|
byte_idx++; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
uint8_t utf8_byte_pos_by_char_num(const char *pstart, const uint8_t charnum) { |
|
|
|
|
|
return utf8_byte_pos_by_char_num_cb(pstart, read_byte_ram, charnum); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
uint8_t utf8_byte_pos_by_char_num_P(PGM_P pstart, const uint8_t charnum) { |
|
|
|
|
|
return utf8_byte_pos_by_char_num_cb(pstart, read_byte_rom, charnum); |
|
|
|
|
|
} |
|
|