You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
208 lines
4.3 KiB
208 lines
4.3 KiB
7 years ago
|
/**
|
||
|
* @file genpages.c
|
||
|
* @brief generate required font page files
|
||
|
* @author Yunhui Fu (yhfudev@gmail.com)
|
||
|
* @version 1.0
|
||
|
* @date 2015-02-19
|
||
|
* @copyright Yunhui Fu (2015)
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <stdint.h> /* uint8_t */
|
||
|
#include <stdlib.h> /* size_t */
|
||
|
#include <string.h>
|
||
|
#include <assert.h>
|
||
|
#include "getline.h"
|
||
|
|
||
|
wchar_t get_val_utf82uni(uint8_t *pstart) {
|
||
|
size_t cntleft;
|
||
|
wchar_t retval = 0;
|
||
|
|
||
|
if (0 == (0x80 & *pstart)) return *pstart;
|
||
|
|
||
|
if (((*pstart & 0xE0) ^ 0xC0) == 0) {
|
||
|
cntleft = 1;
|
||
|
retval = *pstart & ~0xE0;
|
||
|
}
|
||
|
else if (((*pstart & 0xF0) ^ 0xE0) == 0) {
|
||
|
cntleft = 2;
|
||
|
retval = *pstart & ~0xF0;
|
||
|
}
|
||
|
else if (((*pstart & 0xF8) ^ 0xF0) == 0) {
|
||
|
cntleft = 3;
|
||
|
retval = *pstart & ~0xF8;
|
||
|
}
|
||
|
else if (((*pstart & 0xFC) ^ 0xF8) == 0) {
|
||
|
cntleft = 4;
|
||
|
retval = *pstart & ~0xFC;
|
||
|
}
|
||
|
else if (((*pstart & 0xFE) ^ 0xFC) == 0) {
|
||
|
cntleft = 5;
|
||
|
retval = *pstart & ~0xFE;
|
||
|
}
|
||
|
else {
|
||
|
/* encoding error */
|
||
|
cntleft = 0;
|
||
|
retval = 0;
|
||
|
}
|
||
|
pstart++;
|
||
|
for (; cntleft > 0; cntleft --) {
|
||
|
retval <<= 6;
|
||
|
retval |= *pstart & 0x3F;
|
||
|
pstart++;
|
||
|
}
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
|
||
|
*
|
||
|
* @param pstart : 存储 UTF-8 字符的指针
|
||
|
* @param pval : 需要返回的 Unicode 字符存放地址指针
|
||
|
*
|
||
|
* @return 成功返回下个 UTF-8 字符的位置
|
||
|
*
|
||
|
* 转换 UTF-8 编码的一个字符为本地的 Unicode 字符(wchar_t)
|
||
|
*/
|
||
|
uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) {
|
||
|
uint32_t val = 0;
|
||
|
uint8_t *p = pstart;
|
||
|
/*size_t maxlen = strlen(pstart);*/
|
||
|
|
||
|
assert(NULL != pstart);
|
||
|
|
||
|
if (0 == (0x80 & *p)) {
|
||
|
val = (size_t)*p;
|
||
|
p++;
|
||
|
}
|
||
|
else if (0xC0 == (0xE0 & *p)) {
|
||
|
val = *p & 0x1F;
|
||
|
val <<= 6;
|
||
|
p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
p++;
|
||
|
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||
|
}
|
||
|
else if (0xE0 == (0xF0 & *p)) {
|
||
|
val = *p & 0x0F;
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
p++;
|
||
|
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||
|
}
|
||
|
else if (0xF0 == (0xF8 & *p)) {
|
||
|
val = *p & 0x07;
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
p++;
|
||
|
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||
|
}
|
||
|
else if (0xF8 == (0xFC & *p)) {
|
||
|
val = *p & 0x03;
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
p++;
|
||
|
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||
|
}
|
||
|
else if (0xFC == (0xFE & *p)) {
|
||
|
val = *p & 0x01;
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
val <<= 6; p++;
|
||
|
val |= (*p & 0x3F);
|
||
|
p++;
|
||
|
assert((wchar_t)val == get_val_utf82uni(pstart));
|
||
|
}
|
||
|
else if (0x80 == (0xC0 & *p)) {
|
||
|
/* error? */
|
||
|
for (; 0x80 == (0xC0 & *p); p++);
|
||
|
}
|
||
|
else {
|
||
|
/* error */
|
||
|
for (; ((0xFE & *p) > 0xFC); p++);
|
||
|
}
|
||
|
/*
|
||
|
if (val == 0) {
|
||
|
p = NULL;
|
||
|
*/
|
||
|
/*
|
||
|
}
|
||
|
else if (pstart + maxlen < p) {
|
||
|
p = pstart;
|
||
|
if (pval) *pval = 0;
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
if (pval) *pval = val;
|
||
|
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
void usage(char* progname) {
|
||
|
fprintf(stderr, "Usage: %s\n", progname);
|
||
|
fprintf(stderr, " read data from stdin\n");
|
||
|
}
|
||
|
|
||
|
void utf8_parse(const char* msg, unsigned int len) {
|
||
|
uint8_t *pend = NULL;
|
||
|
uint8_t *p;
|
||
|
uint8_t *pre;
|
||
|
wchar_t val;
|
||
|
int page;
|
||
|
|
||
|
pend = (uint8_t *)msg + len;
|
||
|
for (pre = (uint8_t *)msg; pre < pend;) {
|
||
|
val = 0;
|
||
|
p = get_utf8_value(pre, &val);
|
||
|
if (NULL == p) break;
|
||
|
page = val / 128;
|
||
|
if (val >= 256) {
|
||
|
fprintf(stdout, "%d %d ", page, (val % 128));
|
||
|
for (; pre < p; pre++) fprintf(stdout, "%c", *pre);
|
||
|
fprintf(stdout, "\n");
|
||
|
}
|
||
|
pre = p;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int load_file(FILE *fp) {
|
||
|
char * buffer = NULL;
|
||
|
size_t szbuf = 0;
|
||
|
|
||
|
szbuf = 10000;
|
||
|
buffer = (char*)malloc(szbuf);
|
||
|
if (NULL == buffer) return -1;
|
||
|
//pos = ftell (fp);
|
||
|
while (getline( &buffer, &szbuf, fp ) > 0)
|
||
|
utf8_parse((const char*)buffer, (unsigned int)strlen ((char *)buffer));
|
||
|
|
||
|
free(buffer);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int main(int argc, char * argv[]) {
|
||
|
if (argc > 1) {
|
||
|
usage(argv[0]);
|
||
|
exit(1);
|
||
|
}
|
||
|
load_file(stdin);
|
||
|
}
|