aboutsummaryrefslogtreecommitdiffstats
path: root/src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c204
1 files changed, 116 insertions, 88 deletions
diff --git a/src/utf8.c b/src/utf8.c
index d4c9248..89ccd04 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -10,6 +10,9 @@ static char rcsid[] = "$Id$";
/*
* $Log$
+ * Revision 1.17 2012/06/22 10:22:25 james
+ * *** empty log message ***
+ *
* Revision 1.16 2010/07/27 14:49:35 james
* add support for byte logging
*
@@ -70,24 +73,25 @@ utf8_flush (Context * c)
int i;
int err = 0;
- switch (u->utf_ptr) {
- case 1:
- log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
- break;
- case 2:
- log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
- u->utf_buf[0], u->utf_buf[1]);
- break;
- case 3:
- log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
- u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
- break;
- case 4:
- log_f (c->l,
- "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
- u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
- break;
- }
+ switch (u->utf_ptr)
+ {
+ case 1:
+ log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
+ break;
+ case 2:
+ log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
+ u->utf_buf[0], u->utf_buf[1]);
+ break;
+ case 3:
+ log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
+ u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
+ break;
+ case 4:
+ log_f (c->l,
+ "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
+ u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
+ break;
+ }
for (i = 0; i < u->utf_ptr; ++i)
err += vt102_parse_char (c, u->utf_buf[i]);
@@ -105,59 +109,75 @@ utf8_parse (Context * c, uint32_t ch)
UTF8 *u = c->u;
int err = 0;
- if (ch == SYM_CHAR_RESET) {
- u->in_utf8 = 0;
- err += vt102_parse_char (c, ch);
- return err;
- }
-
- if (c->l && c->byte_logging) {
- uint8_t ch8=(uint8_t) ch;
- c->l->log_bytes(c->l,&ch8,1);
- }
-
- if (!u->in_utf8) {
- /* FIXME: for the moment we bodge utf8 support - need to do */
- /* L->R and R->L and double width characters */
- if (ch == 0xb9) // FIXME - OTHER 8 bit control chars
- { /* CSI, not a valid utf8 start char */
- err += vt102_parse_char (c, ch);
- } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */
- u->in_utf8 = 1;
- u->utf_ptr = 0;
- u->utf_buf[u->utf_ptr++] = ch;
- u->ch = (ch & 0x1f) << 6;
- u->sh = 0;
- } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence
- */
- u->in_utf8 = 2;
- u->utf_ptr = 0;
- u->utf_buf[u->utf_ptr++] = ch;
- u->ch = (ch & 0x0f) << 12;
- u->sh = 6;
- } else if ((ch & 0xf8) == 0xf0) {
- u->in_utf8 = 3;
- u->utf_ptr = 0;
- u->utf_buf[u->utf_ptr++] = ch;
- u->ch = (ch & 0x07) << 18;
- u->sh = 12;
- } else {
+ if (ch == SYM_CHAR_RESET)
+ {
+ u->in_utf8 = 0;
err += vt102_parse_char (c, ch);
+ return err;
}
- } else {
- if ((ch & 0xc0) != 0x80) {
- err += utf8_flush (c);
- err += vt102_parse_char (c, ch);
- } else {
- u->utf_buf[u->utf_ptr++] = ch;
- u->ch |= (ch & 0x3f) << u->sh;
- u->sh -= 6;
- u->in_utf8--;
-
- if (!u->in_utf8)
- err += vt102_parse_char (c, u->ch);
+
+ if (c->l && c->byte_logging)
+ {
+ uint8_t ch8 = (uint8_t) ch;
+ c->l->log_bytes (c->l, &ch8, 1);
+ }
+
+ if (!u->in_utf8)
+ {
+ /* FIXME: for the moment we bodge utf8 support - need to do */
+ /* L->R and R->L and double width characters */
+ if (ch == 0xb9) // FIXME - OTHER 8 bit control chars
+ { /* CSI, not a valid utf8 start char */
+ err += vt102_parse_char (c, ch);
+ }
+ else if ((ch & 0xe0) == 0xc0)
+ { /* Start of two byte unicode sequence */
+ u->in_utf8 = 1;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x1f) << 6;
+ u->sh = 0;
+ }
+ else if ((ch & 0xf0) == 0xe0)
+ { /* Start of three byte unicode sequence
+ */
+ u->in_utf8 = 2;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x0f) << 12;
+ u->sh = 6;
+ }
+ else if ((ch & 0xf8) == 0xf0)
+ {
+ u->in_utf8 = 3;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x07) << 18;
+ u->sh = 12;
+ }
+ else
+ {
+ err += vt102_parse_char (c, ch);
+ }
+ }
+ else
+ {
+ if ((ch & 0xc0) != 0x80)
+ {
+ err += utf8_flush (c);
+ err += vt102_parse_char (c, ch);
+ }
+ else
+ {
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch |= (ch & 0x3f) << u->sh;
+ u->sh -= 6;
+ u->in_utf8--;
+
+ if (!u->in_utf8)
+ err += vt102_parse_char (c, u->ch);
+ }
}
- }
return err;
}
@@ -172,31 +192,39 @@ utf8_new (void)
ret->in_utf8 = 0;
+ return ret;
}
int
-utf8_encode (char *ptr, int ch)
+utf8_encode (uint8_t * ptr, int ch)
{
- if (ch < 0x80) {
- ptr[0] = ch;
- return 1;
- } else if (ch < 0x800) {
- ptr[0] = 0xc0 | (ch >> 6);
- ptr[1] = 0x80 | (ch & 0x3f);
- return 2;
- } else if (ch < 0x10000) {
- ptr[0] = 0xe0 | (ch >> 12);
- ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
- ptr[2] = 0x80 | (ch & 0x3f);
- return 3;
- } else if (ch < 0x1fffff) {
- ptr[0] = 0xf0 | (ch >> 18);
- ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
- ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
- ptr[3] = 0x80 | (ch & 0x3f);
- return 4;
- }
+ if (ch < 0x80)
+ {
+ ptr[0] = ch;
+ return 1;
+ }
+ else if (ch < 0x800)
+ {
+ ptr[0] = 0xc0 | (ch >> 6);
+ ptr[1] = 0x80 | (ch & 0x3f);
+ return 2;
+ }
+ else if (ch < 0x10000)
+ {
+ ptr[0] = 0xe0 | (ch >> 12);
+ ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
+ ptr[2] = 0x80 | (ch & 0x3f);
+ return 3;
+ }
+ else if (ch < 0x1fffff)
+ {
+ ptr[0] = 0xf0 | (ch >> 18);
+ ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
+ ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
+ ptr[3] = 0x80 | (ch & 0x3f);
+ return 4;
+ }
return 0;
}