roo_io
API Documentation for roo_io
Loading...
Searching...
No Matches
u8c.h
Go to the documentation of this file.
1/*
2** (C) by Remo Dentato (rdentato@gmail.com)
3**
4** This software is distributed under the terms of the MIT license:
5** https://opensource.org/licenses/MIT
6**
7** *** UTF-8 encode/decode ***
8**
9** int u8next(char *s [, int *c]) --> Returns the number of bytes encoding the first
10** codepoint in the string s.
11** If c is not NULL, stores the codepoint in *c.
12** If the encoding is not valid, returns -1 and stores
13** in *c the first byte of the string.
14** Note that c is optional (implies NULL).
15**
16** int u8strlen(char *s) --> Returns the number of codepoints in the string s.
17** Does NOT validate the string encoding!
18**
19** int u8codepoint(char *s) --> Returns the codepoint encoded in s or the first
20** byte if the encodinging is not valid.
21**
22** int u8encode(int c[, char *s]) --> Stores the encoding of codepoint c in s and
23** place a string terminator ('\0') at the end.
24** There must be *at least* 5 bytes allocated in the
25** string s. s can be NULL (or omitted entirely).
26** Returns the length of the encoding in bytes.
27**
28**
29** The decoding function is based on the work of Bjoern Hoehrmann:
30** http://bjoern.hoehrmann.de/utf-8/decoder/dfa
31**
32** with the following differences:
33** - the implementation is faster than what offered on the original site.
34** - the code is clearer to read and to relate to Bjoern's state machines
35** - it has been extended to include C0 80 as the encoding for U+0000.
36** (see https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8 )
37**
38** To use it, include u8c.h in your code and link against u8c.c
39*/
40
41#ifndef U8C_H__
42#define U8C_H__
43
44#include <string.h>
45
46// #define u8_exp(x) x
47// #define u8_1(x,...) x
48// #define u8_2(x,y,...) y
49
50namespace u8c {
51
52// size_t u8strlen(const char *s);
53size_t u8next_(const char *start, const char* end, char32_t& ch);
54
55} // namespace u8c
56
57#endif
Definition u8c.cpp:64
size_t u8next_(const char *start, const char *end, char32_t &val)
Definition u8c.cpp:66