diff options
author | james <> | 2008-02-23 11:49:23 +0000 |
---|---|---|
committer | james <> | 2008-02-23 11:49:23 +0000 |
commit | fc338d9b30500f311d9555545477e4ba1775b403 (patch) | |
tree | a860ad9af01332c74527e49bac77662af3e99f3b /src/utf8.c | |
parent | 029a61812e82b58803c618ed53df42180106412f (diff) | |
download | sympathy-fc338d9b30500f311d9555545477e4ba1775b403.tar.gz sympathy-fc338d9b30500f311d9555545477e4ba1775b403.tar.bz2 sympathy-fc338d9b30500f311d9555545477e4ba1775b403.zip |
*** empty log message ***
Diffstat (limited to 'src/utf8.c')
-rw-r--r-- | src/utf8.c | 146 |
1 files changed, 103 insertions, 43 deletions
@@ -10,6 +10,9 @@ static char rcsid[] = "$Id$"; /* * $Log$ + * Revision 1.3 2008/02/23 11:48:37 james + * *** empty log message *** + * * Revision 1.2 2008/02/22 23:39:27 james * *** empty log message *** * @@ -21,61 +24,118 @@ static char rcsid[] = "$Id$"; #include "project.h" -void utf8_flush(Context *c) +void +utf8_flush (Context * c) { -UTF8 *u=c->u; -int i; + UTF8 *u = c->u; + int i; -for (i=0;i<u->utf_ptr;++i) - vt102_parse(c,u->utf_buf[i]); + switch (u->utf_ptr) + { + case 1: + log_f (c->l, "%s:%d invalid utf-8 sequence: \\%03o", + __FILE__, __LINE__, u->utf_buf[0]); + break; + case 2: + log_f (c->l, "%s:%d invalid utf-8 sequence: \\%03o \\%03o", + __FILE__, __LINE__, u->utf_buf[0], u->utf_buf[1]); + break; + case 3: + log_f (c->l, "%s:%d invalid utf-8 sequence: \\%03o \\%03o \\%03o", + __FILE__, __LINE__, u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); + break; + case 4: + log_f (c->l, + "%s:%d invalid utf-8 sequence: \\%03o \\%03o \\%03o \\%03o", + __FILE__, __LINE__, + u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); + break; + } -u->utf_ptr=0; -u->in_utf8=0; + for (i = 0; i < u->utf_ptr; ++i) + vt102_parse_char (c, u->utf_buf[i]); + + u->utf_ptr = 0; + u->in_utf8 = 0; } -int utf8_parse(Context *c,int ch) +void +utf8_parse (Context * c, int ch) { -UTF8 *u=&c->u; - - if (!u->in_utf8) { - /*FIXME: for the moment we bodge utf8 support*/ - if (ch==0xb9) { /*CSI, not a valid utf8 start char*/ - vt102_parse(c,ch); - } else if ((ch & 0xe0) == 0xc0) { /*Start of two byte unicode sequence*/ - u->in_utf8=2; - u->utf_ptr=0; - u->utf_buf[u->utf_ptr++]=ch; - } else if ((ch & 0xf0) ==0xe0) { /*Start of three byte unicode sequence*/ - u->in_utf8=3; - u->utf_ptr=0; - u->utf_buf[u->utf_ptr++]=ch; - } else if ((ch & 0xf8) ==0xf0) { - u->in_utf8=4; - u->utf_ptr=0; - u->utf_buf[u->utf_ptr++]=ch; - } else { - vt102_parse(c,ch); - } - } else { - if ((ch & 0xc0) != 0x80) { - utf8_flush(c); - vt102_parse(c,ch); - } else { - u->utf_buf[u->utf_ptr++]=ch; - u->in_utf8--; - } - } + UTF8 *u = c->u; + + if (ch == SYM_CHAR_RESET) + { + u->in_utf8 = 0; + vt102_parse_char (c, ch); + return; + } + + if (!u->in_utf8) + { + /*FIXME: for the moment we bodge utf8 support */ + if (ch == 0xb9) + { /*CSI, not a valid utf8 start char */ + vt102_parse_char (c, ch); + } + else if ((ch & 0xe0) == 0xc0) + { /*Start of two byte unicode sequence */ + u->in_utf8 = 1; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x1f) << 6; + u->sh = 0; + } + else if ((ch & 0xf0) == 0xe0) + { /*Start of three byte unicode sequence */ + u->in_utf8 = 2; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x0f) << 12; + u->sh = 6; + } + else if ((ch & 0xf8) == 0xf0) + { + u->in_utf8 = 3; + u->utf_ptr = 0; + u->utf_buf[u->utf_ptr++] = ch; + u->ch = (ch & 0x07) << 18; + u->sh = 12; + } + else + { + vt102_parse_char (c, ch); + } + } + else + { + if ((ch & 0xc0) != 0x80) + { + utf8_flush (c); + vt102_parse_char (c, ch); + } + else + { + u->utf_buf[u->utf_ptr++] = ch; + u->ch |= (ch & 0x3f) << u->sh; + u->sh -= 6; + u->in_utf8--; + + if (!u->in_utf8) + vt102_parse_char (c, ch); + } + } } -UTF8 *utf8_new(void) +UTF8 * +utf8_new (void) { -UTF8 *ret; + UTF8 *ret; -ret=(UTF8 *) malloc(sizeof(UTF8)); + ret = (UTF8 *) malloc (sizeof (UTF8)); -ret->in_utf8=0; + ret->in_utf8 = 0; } - |