44 lines
1.4 KiB
C++
44 lines
1.4 KiB
C++
|
#include "utf8.hpp"
|
||
|
|
||
|
size_t utf8_codepoint_size(char32_t codepoint) {
|
||
|
if (0 == ((char32_t)0xffffff80 & codepoint)) {
|
||
|
return 1;
|
||
|
} else if (0 == ((char32_t)0xfffff800 & codepoint)) {
|
||
|
return 2;
|
||
|
} else if (0 == ((char32_t)0xffff0000 & codepoint)) {
|
||
|
return 3;
|
||
|
} else {
|
||
|
return 4;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char *utf8_write_codepoint(char *str, char32_t codepoint) {
|
||
|
if (0 == ((char32_t)0xffffff80 & codepoint)) {
|
||
|
/* 1-byte/7-bit ascii
|
||
|
* (0b0xxxxxxx) */
|
||
|
str[0] = (char)codepoint;
|
||
|
str += 1;
|
||
|
} else if (0 == ((char32_t)0xfffff800 & codepoint)) {
|
||
|
/* 2-byte/11-bit utf8 code point
|
||
|
* (0b110xxxxx 0b10xxxxxx) */
|
||
|
str[0] = (char)(0xc0 | (char)((codepoint >> 6) & 0x1f));
|
||
|
str[1] = (char)(0x80 | (char)(codepoint & 0x3f));
|
||
|
str += 2;
|
||
|
} else if (0 == ((char32_t)0xffff0000 & codepoint)) {
|
||
|
/* 3-byte/16-bit utf8 code point
|
||
|
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
|
||
|
str[0] = (char)(0xe0 | (char)((codepoint >> 12) & 0x0f));
|
||
|
str[1] = (char)(0x80 | (char)((codepoint >> 6) & 0x3f));
|
||
|
str[2] = (char)(0x80 | (char)(codepoint & 0x3f));
|
||
|
str += 3;
|
||
|
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
|
||
|
str[0] = (char)(0xf0 | (char)((codepoint >> 18) & 0x07));
|
||
|
str[1] = (char)(0x80 | (char)((codepoint >> 12) & 0x3f));
|
||
|
str[2] = (char)(0x80 | (char)((codepoint >> 6) & 0x3f));
|
||
|
str[3] = (char)(0x80 | (char)(codepoint & 0x3f));
|
||
|
str += 4;
|
||
|
}
|
||
|
|
||
|
return str;
|
||
|
}
|