1 files changed, 116 insertions, 88 deletions
diff --git a/src/utf8.c b/src/utf8.c
index d4c9248..89ccd04 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -10,6 +10,9 @@ static char rcsid[] = "$Id$";
 
 /* 
  * $Log$
+ * Revision 1.17  2012/06/22 10:22:25  james
+ * *** empty log message ***
+ *
  * Revision 1.16  2010/07/27 14:49:35  james
  * add support for byte logging
  *
@@ -70,24 +73,25 @@ utf8_flush (Context * c)
   int i;
   int err = 0;
 
-  switch (u->utf_ptr) {
-  case 1:
-    log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
-    break;
-  case 2:
-    log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
-           u->utf_buf[0], u->utf_buf[1]);
-    break;
-  case 3:
-    log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
-           u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
-    break;
-  case 4:
-    log_f (c->l,
-           "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
-           u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
-    break;
-  }
+  switch (u->utf_ptr)
+    {
+    case 1:
+      log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
+      break;
+    case 2:
+      log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
+             u->utf_buf[0], u->utf_buf[1]);
+      break;
+    case 3:
+      log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
+             u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
+      break;
+    case 4:
+      log_f (c->l,
+             "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
+             u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
+      break;
+    }
 
   for (i = 0; i < u->utf_ptr; ++i)
     err += vt102_parse_char (c, u->utf_buf[i]);
@@ -105,59 +109,75 @@ utf8_parse (Context * c, uint32_t ch)
   UTF8 *u = c->u;
   int err = 0;
 
-  if (ch == SYM_CHAR_RESET) {
-    u->in_utf8 = 0;
-    err += vt102_parse_char (c, ch);
-    return err;
-  }
-
-  if (c->l && c->byte_logging) {
-	uint8_t ch8=(uint8_t) ch;
-	c->l->log_bytes(c->l,&ch8,1);
-  }
-
-  if (!u->in_utf8) {
-    /* FIXME: for the moment we bodge utf8 support - need to do */
-    /* L->R and R->L and double width characters */
-    if (ch == 0xb9)             // FIXME - OTHER 8 bit control chars
-    {                           /* CSI, not a valid utf8 start char */
-      err += vt102_parse_char (c, ch);
-    } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */
-      u->in_utf8 = 1;
-      u->utf_ptr = 0;
-      u->utf_buf[u->utf_ptr++] = ch;
-      u->ch = (ch & 0x1f) << 6;
-      u->sh = 0;
-    } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence 
-                                       */
-      u->in_utf8 = 2;
-      u->utf_ptr = 0;
-      u->utf_buf[u->utf_ptr++] = ch;
-      u->ch = (ch & 0x0f) << 12;
-      u->sh = 6;
-    } else if ((ch & 0xf8) == 0xf0) {
-      u->in_utf8 = 3;
-      u->utf_ptr = 0;
-      u->utf_buf[u->utf_ptr++] = ch;
-      u->ch = (ch & 0x07) << 18;
-      u->sh = 12;
-    } else {
+  if (ch == SYM_CHAR_RESET)
+    {
+      u->in_utf8 = 0;
       err += vt102_parse_char (c, ch);
+      return err;
     }
-  } else {
-    if ((ch & 0xc0) != 0x80) {
-      err += utf8_flush (c);
-      err += vt102_parse_char (c, ch);
-    } else {
-      u->utf_buf[u->utf_ptr++] = ch;
-      u->ch |= (ch & 0x3f) << u->sh;
-      u->sh -= 6;
-      u->in_utf8--;
-
-      if (!u->in_utf8)
-        err += vt102_parse_char (c, u->ch);
+
+  if (c->l && c->byte_logging)
+    {
+      uint8_t ch8 = (uint8_t) ch;
+      c->l->log_bytes (c->l, &ch8, 1);
+    }
+
+  if (!u->in_utf8)
+    {
+      /* FIXME: for the moment we bodge utf8 support - need to do */
+      /* L->R and R->L and double width characters */
+      if (ch == 0xb9)           // FIXME - OTHER 8 bit control chars
+        {                       /* CSI, not a valid utf8 start char */
+          err += vt102_parse_char (c, ch);
+        }
+      else if ((ch & 0xe0) == 0xc0)
+        {                       /* Start of two byte unicode sequence */
+          u->in_utf8 = 1;
+          u->utf_ptr = 0;
+          u->utf_buf[u->utf_ptr++] = ch;
+          u->ch = (ch & 0x1f) << 6;
+          u->sh = 0;
+        }
+      else if ((ch & 0xf0) == 0xe0)
+        {                       /* Start of three byte unicode sequence 
+                                 */
+          u->in_utf8 = 2;
+          u->utf_ptr = 0;
+          u->utf_buf[u->utf_ptr++] = ch;
+          u->ch = (ch & 0x0f) << 12;
+          u->sh = 6;
+        }
+      else if ((ch & 0xf8) == 0xf0)
+        {
+          u->in_utf8 = 3;
+          u->utf_ptr = 0;
+          u->utf_buf[u->utf_ptr++] = ch;
+          u->ch = (ch & 0x07) << 18;
+          u->sh = 12;
+        }
+      else
+        {
+          err += vt102_parse_char (c, ch);
+        }
+    }
+  else
+    {
+      if ((ch & 0xc0) != 0x80)
+        {
+          err += utf8_flush (c);
+          err += vt102_parse_char (c, ch);
+        }
+      else
+        {
+          u->utf_buf[u->utf_ptr++] = ch;
+          u->ch |= (ch & 0x3f) << u->sh;
+          u->sh -= 6;
+          u->in_utf8--;
+
+          if (!u->in_utf8)
+            err += vt102_parse_char (c, u->ch);
+        }
     }
-  }
   return err;
 }
 
@@ -172,31 +192,39 @@ utf8_new (void)
 
   ret->in_utf8 = 0;
 
+  return ret;
 }
 
 int
-utf8_encode (char *ptr, int ch)
+utf8_encode (uint8_t * ptr, int ch)
 {
 
-  if (ch < 0x80) {
-    ptr[0] = ch;
-    return 1;
-  } else if (ch < 0x800) {
-    ptr[0] = 0xc0 | (ch >> 6);
-    ptr[1] = 0x80 | (ch & 0x3f);
-    return 2;
-  } else if (ch < 0x10000) {
-    ptr[0] = 0xe0 | (ch >> 12);
-    ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
-    ptr[2] = 0x80 | (ch & 0x3f);
-    return 3;
-  } else if (ch < 0x1fffff) {
-    ptr[0] = 0xf0 | (ch >> 18);
-    ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
-    ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
-    ptr[3] = 0x80 | (ch & 0x3f);
-    return 4;
-  }
+  if (ch < 0x80)
+    {
+      ptr[0] = ch;
+      return 1;
+    }
+  else if (ch < 0x800)
+    {
+      ptr[0] = 0xc0 | (ch >> 6);
+      ptr[1] = 0x80 | (ch & 0x3f);
+      return 2;
+    }
+  else if (ch < 0x10000)
+    {
+      ptr[0] = 0xe0 | (ch >> 12);
+      ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
+      ptr[2] = 0x80 | (ch & 0x3f);
+      return 3;
+    }
+  else if (ch < 0x1fffff)
+    {
+      ptr[0] = 0xf0 | (ch >> 18);
+      ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
+      ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
+      ptr[3] = 0x80 | (ch & 0x3f);
+      return 4;
+    }
   return 0;
 }