#include "utf8.hpp" size_t utf8_codepoint_size(char32_t codepoint) { if (0 == ((char32_t)0xffffff80 & codepoint)) { return 1; } else if (0 == ((char32_t)0xfffff800 & codepoint)) { return 2; } else if (0 == ((char32_t)0xffff0000 & codepoint)) { return 3; } else { return 4; } } char *utf8_write_codepoint(char *str, char32_t codepoint) { if (0 == ((char32_t)0xffffff80 & codepoint)) { /* 1-byte/7-bit ascii * (0b0xxxxxxx) */ str[0] = (char)codepoint; str += 1; } else if (0 == ((char32_t)0xfffff800 & codepoint)) { /* 2-byte/11-bit utf8 code point * (0b110xxxxx 0b10xxxxxx) */ str[0] = (char)(0xc0 | (char)((codepoint >> 6) & 0x1f)); str[1] = (char)(0x80 | (char)(codepoint & 0x3f)); str += 2; } else if (0 == ((char32_t)0xffff0000 & codepoint)) { /* 3-byte/16-bit utf8 code point * (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) */ str[0] = (char)(0xe0 | (char)((codepoint >> 12) & 0x0f)); str[1] = (char)(0x80 | (char)((codepoint >> 6) & 0x3f)); str[2] = (char)(0x80 | (char)(codepoint & 0x3f)); str += 3; } else { /* if (0 == ((int)0xffe00000 & chr)) { */ str[0] = (char)(0xf0 | (char)((codepoint >> 18) & 0x07)); str[1] = (char)(0x80 | (char)((codepoint >> 12) & 0x3f)); str[2] = (char)(0x80 | (char)((codepoint >> 6) & 0x3f)); str[3] = (char)(0x80 | (char)(codepoint & 0x3f)); str += 4; } return str; }