From c945112e89222e697c3733c69eab685f606a5be5 Mon Sep 17 00:00:00 2001 From: james <> Date: Fri, 22 Jun 2012 10:22:25 +0000 Subject: *** empty log message *** --- src/utf8.c | 204 +++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 116 insertions(+), 88 deletions(-) (limited to 'src/utf8.c') diff --git a/src/utf8.c b/src/utf8.c index d4c9248..89ccd04 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -10,6 +10,9 @@ static char rcsid[] = "$Id$"; /* * $Log$ + * Revision 1.17 2012/06/22 10:22:25 james + * *** empty log message *** + * * Revision 1.16 2010/07/27 14:49:35 james * add support for byte logging * @@ -70,24 +73,25 @@ utf8_flush (Context * c) int i; int err = 0; - switch (u->utf_ptr) { - case 1: - log_f (c->l, "", u->utf_buf[0]); - break; - case 2: - log_f (c->l, "", - u->utf_buf[0], u->utf_buf[1]); - break; - case 3: - log_f (c->l, "", - u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); - break; - case 4: - log_f (c->l, - "", - u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); - break; - } + switch (u->utf_ptr) + { + case 1: + log_f (c->l, "", u->utf_buf[0]); + break; + case 2: + log_f (c->l, "", + u->utf_buf[0], u->utf_buf[1]); + break; + case 3: + log_f (c->l, "", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); + break; + case 4: + log_f (c->l, + "", + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); + break; + } for (i = 0; i < u->utf_ptr; ++i) err += vt102_parse_char (c, u->utf_buf[i]); @@ -105,59 +109,75 @@ utf8_parse (Context * c, uint32_t ch) UTF8 *u = c->u; int err = 0; - if (ch == SYM_CHAR_RESET) { - u->in_utf8 = 0; - err += vt102_parse_char (c, ch); - return err; - } - - if (c->l && c->byte_logging) { - uint8_t ch8=(uint8_t) ch; - c->l->log_bytes(c->l,&ch8,1); - } - - if (!u->in_utf8) { - /* FIXME: for the moment we bodge utf8 support - need to do */ - /* L->R and R->L and double width characters */ - if (ch == 0xb9) // FIXME - OTHER 8 bit control chars - { /* CSI, not a valid utf8 start char */ - err += vt102_parse_char (c, ch); - } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */ - u->in_utf8 = 1; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x1f) << 6; - u->sh = 0; - } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence - */ - u->in_utf8 = 2; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x0f) << 12; - u->sh = 6; - } else if ((ch & 0xf8) == 0xf0) { - u->in_utf8 = 3; - u->utf_ptr = 0; - u->utf_buf[u->utf_ptr++] = ch; - u->ch = (ch & 0x07) << 18; - u->sh = 12; - } else { + if (ch == SYM_CHAR_RESET) + { + u->in_utf8 = 0; err += vt102_parse_char (c, ch); + return err; } - } else { - if ((ch & 0xc0) != 0x80) { - err += utf8_flush (c); - err += vt102_parse_char (c, ch); - } else { - u->utf_buf[u->utf_ptr++] = ch; - u->ch |= (ch & 0x3f) << u->sh; - u->sh -= 6; - u->in_utf8--; - - if (!u->in_utf8) - err += vt102_parse_char (c, u->ch); + + if (c->l && c->byte_logging) + { + uint8_t ch8 = (uint8_t) ch; + c->l->log_bytes (c->l, &ch8, 1); + } + + if (!u->in_utf8) + { + /* FIXME: for the moment we bodge utf8 support - need to do */ + /* L->R and R->L and double width characters */ + if (ch == 0xb9) // FIXME - OTHER 8 bit control chars + { /* CSI, not a valid utf8 start char */ + err += vt102_parse_char (c, ch); + } + else if ((ch & 0xe0) == 0xc0) + { /* Start of two byte unicode sequence */ + u->in_utf8 = 1; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x1f) << 6; + u->sh = 0; + } + else if ((ch & 0xf0) == 0xe0) + { /* Start of three byte unicode sequence + */ + u->in_utf8 = 2; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x0f) << 12; + u->sh = 6; + } + else if ((ch & 0xf8) == 0xf0) + { + u->in_utf8 = 3; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x07) << 18; + u->sh = 12; + } + else + { + err += vt102_parse_char (c, ch); + } + } + else + { + if ((ch & 0xc0) != 0x80) + { + err += utf8_flush (c); + err += vt102_parse_char (c, ch); + } + else + { + u->utf_buf[u->utf_ptr++] = ch; + u->ch |= (ch & 0x3f) << u->sh; + u->sh -= 6; + u->in_utf8--; + + if (!u->in_utf8) + err += vt102_parse_char (c, u->ch); + } } - } return err; } @@ -172,31 +192,39 @@ utf8_new (void) ret->in_utf8 = 0; + return ret; } int -utf8_encode (char *ptr, int ch) +utf8_encode (uint8_t * ptr, int ch) { - if (ch < 0x80) { - ptr[0] = ch; - return 1; - } else if (ch < 0x800) { - ptr[0] = 0xc0 | (ch >> 6); - ptr[1] = 0x80 | (ch & 0x3f); - return 2; - } else if (ch < 0x10000) { - ptr[0] = 0xe0 | (ch >> 12); - ptr[1] = 0x80 | ((ch >> 6) & 0x3f); - ptr[2] = 0x80 | (ch & 0x3f); - return 3; - } else if (ch < 0x1fffff) { - ptr[0] = 0xf0 | (ch >> 18); - ptr[1] = 0x80 | ((ch >> 12) & 0x3f); - ptr[2] = 0x80 | ((ch >> 6) & 0x3f); - ptr[3] = 0x80 | (ch & 0x3f); - return 4; - } + if (ch < 0x80) + { + ptr[0] = ch; + return 1; + } + else if (ch < 0x800) + { + ptr[0] = 0xc0 | (ch >> 6); + ptr[1] = 0x80 | (ch & 0x3f); + return 2; + } + else if (ch < 0x10000) + { + ptr[0] = 0xe0 | (ch >> 12); + ptr[1] = 0x80 | ((ch >> 6) & 0x3f); + ptr[2] = 0x80 | (ch & 0x3f); + return 3; + } + else if (ch < 0x1fffff) + { + ptr[0] = 0xf0 | (ch >> 18); + ptr[1] = 0x80 | ((ch >> 12) & 0x3f); + ptr[2] = 0x80 | ((ch >> 6) & 0x3f); + ptr[3] = 0x80 | (ch & 0x3f); + return 4; + } return 0; } -- cgit v1.2.3