diff options
author | james <> | 2008-03-02 10:38:18 +0000 |
---|---|---|
committer | james <> | 2008-03-02 10:38:18 +0000 |
commit | ef21e0368adf8ae0e557e9c02fe776f69818ace6 (patch) | |
tree | cdfceba6afd3c2570cfe6d3e0c4f8fb46f182d15 /src/utf8.c | |
parent | f23e17f2eac3f506afe9a1e44302112cc363b59f (diff) | |
download | sympathy-ef21e0368adf8ae0e557e9c02fe776f69818ace6.tar.gz sympathy-ef21e0368adf8ae0e557e9c02fe776f69818ace6.tar.bz2 sympathy-ef21e0368adf8ae0e557e9c02fe776f69818ace6.zip |
*** empty log message ***
Diffstat (limited to 'src/utf8.c')
-rw-r--r-- | src/utf8.c | 187 |
1 files changed, 83 insertions, 104 deletions
@@ -10,6 +10,9 @@ static char rcsid[] = "$Id$"; /* * $Log$ + * Revision 1.10 2008/03/02 10:37:56 james + * *** empty log message *** + * * Revision 1.9 2008/02/27 01:31:14 james * *** empty log message *** * @@ -48,25 +51,24 @@ utf8_flush (Context * c) UTF8 *u = c->u; int i; - switch (u->utf_ptr) - { - case 1: - log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]); - break; - case 2: - log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>", - u->utf_buf[0], u->utf_buf[1]); - break; - case 3: - log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>", - u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); - break; - case 4: - log_f (c->l, - "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>", - u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); - break; - } + switch (u->utf_ptr) { + case 1: + log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]); + break; + case 2: + log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>", + u->utf_buf[0], u->utf_buf[1]); + break; + case 3: + log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); + break; + case 4: + log_f (c->l, + "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); + break; + } for (i = 0; i < u->utf_ptr; ++i) vt102_parse_char (c, u->utf_buf[i]); @@ -80,68 +82,52 @@ utf8_parse (Context * c, uint32_t ch) { UTF8 *u = c->u; - if (ch == SYM_CHAR_RESET) - { - u->in_utf8 = 0; - vt102_parse_char (c, ch); - return; - } + if (ch == SYM_CHAR_RESET) { + u->in_utf8 = 0; + vt102_parse_char (c, ch); + return; + } - if (!u->in_utf8) - { - /*FIXME: for the moment we bodge utf8 support - need to do */ - /* L->R and R->L and double width characters */ - if (ch == 0xb9) - { /*CSI, not a valid utf8 start char */ - vt102_parse_char (c, ch); - } - else if ((ch & 0xe0) == 0xc0) - { /*Start of two byte unicode sequence */ - u->in_utf8 = 1; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x1f) << 6; - u->sh = 0; - } - else if ((ch & 0xf0) == 0xe0) - { /*Start of three byte unicode sequence */ - u->in_utf8 = 2; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x0f) << 12; - u->sh = 6; - } - else if ((ch & 0xf8) == 0xf0) - { - u->in_utf8 = 3; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x07) << 18; - u->sh = 12; - } - else - { - vt102_parse_char (c, ch); - } + if (!u->in_utf8) { + /*FIXME: for the moment we bodge utf8 support - need to do */ + /* L->R and R->L and double width characters */ + if (ch == 0xb9) { /*CSI, not a valid utf8 start char */ + vt102_parse_char (c, ch); + } else if ((ch & 0xe0) == 0xc0) { /*Start of two byte unicode sequence */ + u->in_utf8 = 1; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x1f) << 6; + u->sh = 0; + } else if ((ch & 0xf0) == 0xe0) { /*Start of three byte unicode sequence */ + u->in_utf8 = 2; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x0f) << 12; + u->sh = 6; + } else if ((ch & 0xf8) == 0xf0) { + u->in_utf8 = 3; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x07) << 18; + u->sh = 12; + } else { + vt102_parse_char (c, ch); } - else - { - if ((ch & 0xc0) != 0x80) - { - utf8_flush (c); - vt102_parse_char (c, ch); - } - else - { - u->utf_buf[u->utf_ptr++] = ch; - u->ch |= (ch & 0x3f) << u->sh; - u->sh -= 6; - u->in_utf8--; - - if (!u->in_utf8) - vt102_parse_char (c, u->ch); - } + } else { + if ((ch & 0xc0) != 0x80) { + utf8_flush (c); + vt102_parse_char (c, ch); + } else { + u->utf_buf[u->utf_ptr++] = ch; + u->ch |= (ch & 0x3f) << u->sh; + u->sh -= 6; + u->in_utf8--; + + if (!u->in_utf8) + vt102_parse_char (c, u->ch); } + } } @@ -161,32 +147,25 @@ int utf8_encode (char *ptr, int ch) { - if (ch < 0x80) - { - ptr[0] = ch; - return 1; - } - else if (ch < 0x800) - { - ptr[0] = 0xc0 | (ch >> 6); - ptr[1] = 0x80 | (ch & 0x3f); - return 2; - } - else if (ch < 0x10000) - { - ptr[0] = 0xe0 | (ch >> 12); - ptr[1] = 0x80 | ((ch >> 6) & 0x3f); - ptr[2] = 0x80 | (ch & 0x3f); - return 3; - } - else if (ch < 0x1fffff) - { - ptr[0] = 0xf0 | (ch >> 18); - ptr[1] = 0x80 | ((ch >> 12) & 0x3f); - ptr[2] = 0x80 | ((ch >> 6) & 0x3f); - ptr[3] = 0x80 | (ch & 0x3f); - return 4; - } + if (ch < 0x80) { + ptr[0] = ch; + return 1; + } else if (ch < 0x800) { + ptr[0] = 0xc0 | (ch >> 6); + ptr[1] = 0x80 | (ch & 0x3f); + return 2; + } else if (ch < 0x10000) { + ptr[0] = 0xe0 | (ch >> 12); + ptr[1] = 0x80 | ((ch >> 6) & 0x3f); + ptr[2] = 0x80 | (ch & 0x3f); + return 3; + } else if (ch < 0x1fffff) { + ptr[0] = 0xf0 | (ch >> 18); + ptr[1] = 0x80 | ((ch >> 12) & 0x3f); + ptr[2] = 0x80 | ((ch >> 6) & 0x3f); + ptr[3] = 0x80 | (ch & 0x3f); + return 4; + } return 0; } |