valeri/src/utf8.cpp

43 lines
1.4 KiB
C++

#include "utf8.hpp"
size_t utf8_codepoint_size(char32_t codepoint) {
if (0 == ((char32_t)0xffffff80 & codepoint)) {
return 1;
} else if (0 == ((char32_t)0xfffff800 & codepoint)) {
return 2;
} else if (0 == ((char32_t)0xffff0000 & codepoint)) {
return 3;
} else {
return 4;
}
}
char *utf8_write_codepoint(char *str, char32_t codepoint) {
if (0 == ((char32_t)0xffffff80 & codepoint)) {
/* 1-byte/7-bit ascii
* (0b0xxxxxxx) */
str[0] = (char)codepoint;
str += 1;
} else if (0 == ((char32_t)0xfffff800 & codepoint)) {
/* 2-byte/11-bit utf8 code point
* (0b110xxxxx 0b10xxxxxx) */
str[0] = (char)(0xc0 | (char)((codepoint >> 6) & 0x1f));
str[1] = (char)(0x80 | (char)(codepoint & 0x3f));
str += 2;
} else if (0 == ((char32_t)0xffff0000 & codepoint)) {
/* 3-byte/16-bit utf8 code point
* (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */
str[0] = (char)(0xe0 | (char)((codepoint >> 12) & 0x0f));
str[1] = (char)(0x80 | (char)((codepoint >> 6) & 0x3f));
str[2] = (char)(0x80 | (char)(codepoint & 0x3f));
str += 3;
} else { /* if (0 == ((int)0xffe00000 & chr)) { */
str[0] = (char)(0xf0 | (char)((codepoint >> 18) & 0x07));
str[1] = (char)(0x80 | (char)((codepoint >> 12) & 0x3f));
str[2] = (char)(0x80 | (char)((codepoint >> 6) & 0x3f));
str[3] = (char)(0x80 | (char)(codepoint & 0x3f));
str += 4;
}
return str;
}