1 files changed, 87 insertions, 106 deletions
diff --git a/src/utf8.c b/src/utf8.c
index fd34fbf..baa03cb 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,4 +1,4 @@
-/*
+/* 
  * utf8.c:
  *
  * Copyright (c) 2008 James McKenzie <james@fishsoup.dhs.org>,
@@ -8,8 +8,11 @@
 
 static char rcsid[] = "$Id$";
 
-/*
+/* 
  * $Log$
+ * Revision 1.14  2008/03/07 12:37:04  james
+ * *** empty log message ***
+ *
  * Revision 1.13  2008/03/06 16:49:39  james
  * *** empty log message ***
  *
@@ -61,25 +64,24 @@ utf8_flush (Context * c)
   int i;
   int err = 0;
 
-  switch (u->utf_ptr)
-    {
-    case 1:
-      log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
-      break;
-    case 2:
-      log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
-             u->utf_buf[0], u->utf_buf[1]);
-      break;
-    case 3:
-      log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
-             u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
-      break;
-    case 4:
-      log_f (c->l,
-             "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
-             u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
-      break;
-    }
+  switch (u->utf_ptr) {
+  case 1:
+    log_f (c->l, "<invalid utf-8 sequence: \\%03o>", u->utf_buf[0]);
+    break;
+  case 2:
+    log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o>",
+           u->utf_buf[0], u->utf_buf[1]);
+    break;
+  case 3:
+    log_f (c->l, "<invalid utf-8 sequence: \\%03o \\%03o \\%03o>",
+           u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
+    break;
+  case 4:
+    log_f (c->l,
+           "<invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o>",
+           u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
+    break;
+  }
 
   for (i = 0; i < u->utf_ptr; ++i)
     err += vt102_parse_char (c, u->utf_buf[i]);
@@ -97,68 +99,54 @@ utf8_parse (Context * c, uint32_t ch)
   UTF8 *u = c->u;
   int err = 0;
 
-  if (ch == SYM_CHAR_RESET)
-    {
-      u->in_utf8 = 0;
+  if (ch == SYM_CHAR_RESET) {
+    u->in_utf8 = 0;
+    err += vt102_parse_char (c, ch);
+    return err;
+  }
+
+  if (!u->in_utf8) {
+    /* FIXME: for the moment we bodge utf8 support - need to do */
+    /* L->R and R->L and double width characters */
+    if (ch == 0xb9)             // FIXME - OTHER 8 bit control chars
+    {                           /* CSI, not a valid utf8 start char */
+      err += vt102_parse_char (c, ch);
+    } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */
+      u->in_utf8 = 1;
+      u->utf_ptr = 0;
+      u->utf_buf[u->utf_ptr++] = ch;
+      u->ch = (ch & 0x1f) << 6;
+      u->sh = 0;
+    } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence 
+                                       */
+      u->in_utf8 = 2;
+      u->utf_ptr = 0;
+      u->utf_buf[u->utf_ptr++] = ch;
+      u->ch = (ch & 0x0f) << 12;
+      u->sh = 6;
+    } else if ((ch & 0xf8) == 0xf0) {
+      u->in_utf8 = 3;
+      u->utf_ptr = 0;
+      u->utf_buf[u->utf_ptr++] = ch;
+      u->ch = (ch & 0x07) << 18;
+      u->sh = 12;
+    } else {
       err += vt102_parse_char (c, ch);
-      return err;
-    }
-
-  if (!u->in_utf8)
-    {
-      /*FIXME: for the moment we bodge utf8 support - need to do */
-      /* L->R and R->L and double width characters */
-      if (ch == 0xb9)           //FIXME - OTHER 8 bit control chars
-        {                       /*CSI, not a valid utf8 start char */
-          err += vt102_parse_char (c, ch);
-        }
-      else if ((ch & 0xe0) == 0xc0)
-        {                       /*Start of two byte unicode sequence */
-          u->in_utf8 = 1;
-          u->utf_ptr = 0;
-          u->utf_buf[u->utf_ptr++] = ch;
-          u->ch = (ch & 0x1f) << 6;
-          u->sh = 0;
-        }
-      else if ((ch & 0xf0) == 0xe0)
-        {                       /*Start of three byte unicode sequence */
-          u->in_utf8 = 2;
-          u->utf_ptr = 0;
-          u->utf_buf[u->utf_ptr++] = ch;
-          u->ch = (ch & 0x0f) << 12;
-          u->sh = 6;
-        }
-      else if ((ch & 0xf8) == 0xf0)
-        {
-          u->in_utf8 = 3;
-          u->utf_ptr = 0;
-          u->utf_buf[u->utf_ptr++] = ch;
-          u->ch = (ch & 0x07) << 18;
-          u->sh = 12;
-        }
-      else
-        {
-          err += vt102_parse_char (c, ch);
-        }
     }
-  else
-    {
-      if ((ch & 0xc0) != 0x80)
-        {
-          err += utf8_flush (c);
-          err += vt102_parse_char (c, ch);
-        }
-      else
-        {
-          u->utf_buf[u->utf_ptr++] = ch;
-          u->ch |= (ch & 0x3f) << u->sh;
-          u->sh -= 6;
-          u->in_utf8--;
-
-          if (!u->in_utf8)
-            err += vt102_parse_char (c, u->ch);
-        }
+  } else {
+    if ((ch & 0xc0) != 0x80) {
+      err += utf8_flush (c);
+      err += vt102_parse_char (c, ch);
+    } else {
+      u->utf_buf[u->utf_ptr++] = ch;
+      u->ch |= (ch & 0x3f) << u->sh;
+      u->sh -= 6;
+      u->in_utf8--;
+
+      if (!u->in_utf8)
+        err += vt102_parse_char (c, u->ch);
     }
+  }
   return err;
 }
 
@@ -179,32 +167,25 @@ int
 utf8_encode (char *ptr, int ch)
 {
 
-  if (ch < 0x80)
-    {
-      ptr[0] = ch;
-      return 1;
-    }
-  else if (ch < 0x800)
-    {
-      ptr[0] = 0xc0 | (ch >> 6);
-      ptr[1] = 0x80 | (ch & 0x3f);
-      return 2;
-    }
-  else if (ch < 0x10000)
-    {
-      ptr[0] = 0xe0 | (ch >> 12);
-      ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
-      ptr[2] = 0x80 | (ch & 0x3f);
-      return 3;
-    }
-  else if (ch < 0x1fffff)
-    {
-      ptr[0] = 0xf0 | (ch >> 18);
-      ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
-      ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
-      ptr[3] = 0x80 | (ch & 0x3f);
-      return 4;
-    }
+  if (ch < 0x80) {
+    ptr[0] = ch;
+    return 1;
+  } else if (ch < 0x800) {
+    ptr[0] = 0xc0 | (ch >> 6);
+    ptr[1] = 0x80 | (ch & 0x3f);
+    return 2;
+  } else if (ch < 0x10000) {
+    ptr[0] = 0xe0 | (ch >> 12);
+    ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
+    ptr[2] = 0x80 | (ch & 0x3f);
+    return 3;
+  } else if (ch < 0x1fffff) {
+    ptr[0] = 0xf0 | (ch >> 18);
+    ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
+    ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
+    ptr[3] = 0x80 | (ch & 0x3f);
+    return 4;
+  }
   return 0;
 }