/* * utf8.c: * * Copyright (c) 2008 James McKenzie , * All rights reserved. * */ static char rcsid[] = "$Id$"; /* * $Log$ * Revision 1.17 2012/06/22 10:22:25 james * *** empty log message *** * * Revision 1.16 2010/07/27 14:49:35 james * add support for byte logging * * Revision 1.15 2008/03/07 13:16:02 james * *** empty log message *** * * Revision 1.14 2008/03/07 12:37:04 james * *** empty log message *** * * Revision 1.13 2008/03/06 16:49:39 james * *** empty log message *** * * Revision 1.12 2008/03/06 16:49:05 james * *** empty log message *** * * Revision 1.11 2008/03/03 06:04:42 james * *** empty log message *** * * Revision 1.10 2008/03/02 10:37:56 james * *** empty log message *** * * Revision 1.9 2008/02/27 01:31:14 james * *** empty log message *** * * Revision 1.8 2008/02/27 00:54:16 james * *** empty log message *** * * Revision 1.7 2008/02/26 23:56:12 james * *** empty log message *** * * Revision 1.6 2008/02/26 23:23:17 james * *** empty log message *** * * Revision 1.5 2008/02/24 00:42:53 james * *** empty log message *** * * Revision 1.4 2008/02/23 13:05:58 staffcvs * *** empty log message *** * * Revision 1.3 2008/02/23 11:48:37 james * *** empty log message *** * * Revision 1.2 2008/02/22 23:39:27 james * *** empty log message *** * * Revision 1.1 2008/02/22 19:12:05 james * *** empty log message *** * */ #include "project.h" int utf8_flush (Context * c) { UTF8 *u = c->u; int i; int err = 0; switch (u->utf_ptr) { case 1: log_f (c->l, "", u->utf_buf[0]); break; case 2: log_f (c->l, "", u->utf_buf[0], u->utf_buf[1]); break; case 3: log_f (c->l, "", u->utf_buf[0], u->utf_buf[1], u->utf_buf[2]); break; case 4: log_f (c->l, "", u->utf_buf[0], u->utf_buf[1], u->utf_buf[2], u->utf_buf[3]); break; } for (i = 0; i < u->utf_ptr; ++i) err += vt102_parse_char (c, u->utf_buf[i]); u->utf_ptr = 0; u->in_utf8 = 0; return err; } int utf8_parse (Context * c, uint32_t ch) { UTF8 *u = c->u; int err = 0; if (ch == SYM_CHAR_RESET) { u->in_utf8 = 0; err += vt102_parse_char (c, ch); return err; } if (c->l && c->byte_logging) { uint8_t ch8 = (uint8_t) ch; c->l->log_bytes (c->l, &ch8, 1); } if (!u->in_utf8) { /* FIXME: for the moment we bodge utf8 support - need to do */ /* L->R and R->L and double width characters */ if (ch == 0xb9) // FIXME - OTHER 8 bit control chars { /* CSI, not a valid utf8 start char */ err += vt102_parse_char (c, ch); } else if ((ch & 0xe0) == 0xc0) { /* Start of two byte unicode sequence */ u->in_utf8 = 1; u->utf_ptr = 0; u->utf_buf[u->utf_ptr++] = ch; u->ch = (ch & 0x1f) << 6; u->sh = 0; } else if ((ch & 0xf0) == 0xe0) { /* Start of three byte unicode sequence */ u->in_utf8 = 2; u->utf_ptr = 0; u->utf_buf[u->utf_ptr++] = ch; u->ch = (ch & 0x0f) << 12; u->sh = 6; } else if ((ch & 0xf8) == 0xf0) { u->in_utf8 = 3; u->utf_ptr = 0; u->utf_buf[u->utf_ptr++] = ch; u->ch = (ch & 0x07) << 18; u->sh = 12; } else { err += vt102_parse_char (c, ch); } } else { if ((ch & 0xc0) != 0x80) { err += utf8_flush (c); err += vt102_parse_char (c, ch); } else { u->utf_buf[u->utf_ptr++] = ch; u->ch |= (ch & 0x3f) << u->sh; u->sh -= 6; u->in_utf8--; if (!u->in_utf8) err += vt102_parse_char (c, u->ch); } } return err; } UTF8 * utf8_new (void) { UTF8 *ret; ret = (UTF8 *) xmalloc (sizeof (UTF8)); ret->in_utf8 = 0; return ret; } int utf8_encode (uint8_t * ptr, int ch) { if (ch < 0x80) { ptr[0] = ch; return 1; } else if (ch < 0x800) { ptr[0] = 0xc0 | (ch >> 6); ptr[1] = 0x80 | (ch & 0x3f); return 2; } else if (ch < 0x10000) { ptr[0] = 0xe0 | (ch >> 12); ptr[1] = 0x80 | ((ch >> 6) & 0x3f); ptr[2] = 0x80 | (ch & 0x3f); return 3; } else if (ch < 0x1fffff) { ptr[0] = 0xf0 | (ch >> 18); ptr[1] = 0x80 | ((ch >> 12) & 0x3f); ptr[2] = 0x80 | ((ch >> 6) & 0x3f); ptr[3] = 0x80 | (ch & 0x3f); return 4; } return 0; } int utf8_emit (TTY * t, int ch) { uint8_t buf[4]; int i; i = utf8_encode (buf, ch); if (!i) return 0; if (t->xmit (t, buf, i) != i) return -1; return 0; }