aboutsummaryrefslogtreecommitdiffstats
path: root/src/utf8.c
diff options
context:
space:
mode:
authorjames <>2008-02-23 11:49:23 +0000
committerjames <>2008-02-23 11:49:23 +0000
commitfc338d9b30500f311d9555545477e4ba1775b403 (patch)
treea860ad9af01332c74527e49bac77662af3e99f3b /src/utf8.c
parent029a61812e82b58803c618ed53df42180106412f (diff)
downloadsympathy-fc338d9b30500f311d9555545477e4ba1775b403.tar.gz
sympathy-fc338d9b30500f311d9555545477e4ba1775b403.tar.bz2
sympathy-fc338d9b30500f311d9555545477e4ba1775b403.zip
*** empty log message ***
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c146
1 files changed, 103 insertions, 43 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 09066db..9d04d19 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -10,6 +10,9 @@ static char rcsid[] = "$Id$";
/*
* $Log$
+ * Revision 1.3 2008/02/23 11:48:37 james
+ * *** empty log message ***
+ *
* Revision 1.2 2008/02/22 23:39:27 james
* *** empty log message ***
*
@@ -21,61 +24,118 @@ static char rcsid[] = "$Id$";
#include "project.h"
-void utf8_flush(Context *c)
+void
+utf8_flush (Context * c)
{
-UTF8 *u=c->u;
-int i;
+ UTF8 *u = c->u;
+ int i;
-for (i=0;i<u->utf_ptr;++i)
- vt102_parse(c,u->utf_buf[i]);
+ switch (u->utf_ptr)
+ {
+ case 1:
+ log_f (c->l, "%s:%d invalid utf-8 sequence: \\%03o",
+ __FILE__, __LINE__, u->utf_buf[0]);
+ break;
+ case 2:
+ log_f (c->l, "%s:%d invalid utf-8 sequence: \\%03o \\%03o",
+ __FILE__, __LINE__, u->utf_buf[0], u->utf_buf[1]);
+ break;
+ case 3:
+ log_f (c->l, "%s:%d invalid utf-8 sequence: \\%03o \\%03o \\%03o",
+ __FILE__, __LINE__, u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]);
+ break;
+ case 4:
+ log_f (c->l,
+ "%s:%d invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o",
+ __FILE__, __LINE__,
+ u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]);
+ break;
+ }
-u->utf_ptr=0;
-u->in_utf8=0;
+ for (i = 0; i < u->utf_ptr; ++i)
+ vt102_parse_char (c, u->utf_buf[i]);
+
+ u->utf_ptr = 0;
+ u->in_utf8 = 0;
}
-int utf8_parse(Context *c,int ch)
+void
+utf8_parse (Context * c, int ch)
{
-UTF8 *u=&c->u;
-
- if (!u->in_utf8) {
- /*FIXME: for the moment we bodge utf8 support*/
- if (ch==0xb9) { /*CSI, not a valid utf8 start char*/
- vt102_parse(c,ch);
- } else if ((ch & 0xe0) == 0xc0) { /*Start of two byte unicode sequence*/
- u->in_utf8=2;
- u->utf_ptr=0;
- u->utf_buf[u->utf_ptr++]=ch;
- } else if ((ch & 0xf0) ==0xe0) { /*Start of three byte unicode sequence*/
- u->in_utf8=3;
- u->utf_ptr=0;
- u->utf_buf[u->utf_ptr++]=ch;
- } else if ((ch & 0xf8) ==0xf0) {
- u->in_utf8=4;
- u->utf_ptr=0;
- u->utf_buf[u->utf_ptr++]=ch;
- } else {
- vt102_parse(c,ch);
- }
- } else {
- if ((ch & 0xc0) != 0x80) {
- utf8_flush(c);
- vt102_parse(c,ch);
- } else {
- u->utf_buf[u->utf_ptr++]=ch;
- u->in_utf8--;
- }
- }
+ UTF8 *u = c->u;
+
+ if (ch == SYM_CHAR_RESET)
+ {
+ u->in_utf8 = 0;
+ vt102_parse_char (c, ch);
+ return;
+ }
+
+ if (!u->in_utf8)
+ {
+ /*FIXME: for the moment we bodge utf8 support */
+ if (ch == 0xb9)
+ { /*CSI, not a valid utf8 start char */
+ vt102_parse_char (c, ch);
+ }
+ else if ((ch & 0xe0) == 0xc0)
+ { /*Start of two byte unicode sequence */
+ u->in_utf8 = 1;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x1f) << 6;
+ u->sh = 0;
+ }
+ else if ((ch & 0xf0) == 0xe0)
+ { /*Start of three byte unicode sequence */
+ u->in_utf8 = 2;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x0f) << 12;
+ u->sh = 6;
+ }
+ else if ((ch & 0xf8) == 0xf0)
+ {
+ u->in_utf8 = 3;
+ u->utf_ptr = 0;
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch = (ch & 0x07) << 18;
+ u->sh = 12;
+ }
+ else
+ {
+ vt102_parse_char (c, ch);
+ }
+ }
+ else
+ {
+ if ((ch & 0xc0) != 0x80)
+ {
+ utf8_flush (c);
+ vt102_parse_char (c, ch);
+ }
+ else
+ {
+ u->utf_buf[u->utf_ptr++] = ch;
+ u->ch |= (ch & 0x3f) << u->sh;
+ u->sh -= 6;
+ u->in_utf8--;
+
+ if (!u->in_utf8)
+ vt102_parse_char (c, ch);
+ }
+ }
}
-UTF8 *utf8_new(void)
+UTF8 *
+utf8_new (void)
{
-UTF8 *ret;
+ UTF8 *ret;
-ret=(UTF8 *) malloc(sizeof(UTF8));
+ ret = (UTF8 *) malloc (sizeof (UTF8));
-ret->in_utf8=0;
+ ret->in_utf8 = 0;
}
-