aboutsummaryrefslogtreecommitdiffstats
path: root/src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c73
1 files changed, 58 insertions, 15 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 18ca3eb..09066db 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -10,6 +10,9 @@ static char rcsid[] = "$Id$";
/*
* $Log$
+ * Revision 1.2 2008/02/22 23:39:27 james
+ * *** empty log message ***
+ *
* Revision 1.1 2008/02/22 19:12:05 james
* *** empty log message ***
*
@@ -18,21 +21,61 @@ static char rcsid[] = "$Id$";
#include "project.h"
- /*FIXME: for the moment we bodge utf8 support*/
- if ((ch>=0xc0) && (ch<0xe0)) /*Start of two byte unicode sequence*/
- {
- p->in_utf8=2;
- } else if ((ch>=0xe0) && (ch<0xf0)) /*Start of three byte unicode sequence*/
- {
- p->in_utf8=3;
- } else if ((ch>=0xf0) && (ch<0xf7)) /*Start of four byte unicode sequence*/
- p->in_utf8=4;
+void utf8_flush(Context *c)
+{
+UTF8 *u=c->u;
+int i;
+
+for (i=0;i<u->utf_ptr;++i)
+ vt102_parse(c,u->utf_buf[i]);
+
+u->utf_ptr=0;
+u->in_utf8=0;
+}
+
+int utf8_parse(Context *c,int ch)
+{
+UTF8 *u=&c->u;
+
+ if (!u->in_utf8) {
+ /*FIXME: for the moment we bodge utf8 support*/
+ if (ch==0xb9) { /*CSI, not a valid utf8 start char*/
+ vt102_parse(c,ch);
+ } else if ((ch & 0xe0) == 0xc0) { /*Start of two byte unicode sequence*/
+ u->in_utf8=2;
+ u->utf_ptr=0;
+ u->utf_buf[u->utf_ptr++]=ch;
+ } else if ((ch & 0xf0) ==0xe0) { /*Start of three byte unicode sequence*/
+ u->in_utf8=3;
+ u->utf_ptr=0;
+ u->utf_buf[u->utf_ptr++]=ch;
+ } else if ((ch & 0xf8) ==0xf0) {
+ u->in_utf8=4;
+ u->utf_ptr=0;
+ u->utf_buf[u->utf_ptr++]=ch;
+ } else {
+ vt102_parse(c,ch);
+ }
+ } else {
+ if ((ch & 0xc0) != 0x80) {
+ utf8_flush(c);
+ vt102_parse(c,ch);
+ } else {
+ u->utf_buf[u->utf_ptr++]=ch;
+ u->in_utf8--;
+ }
}
+}
+
+
+
+UTF8 *utf8_new(void)
+{
+UTF8 *ret;
+
+ret=(UTF8 *) malloc(sizeof(UTF8));
+
+ret->in_utf8=0;
- if (p->utf_8) {
- p->in_utf8--;
- ch='?';
- }
+}
- if (!p->utf_8) {
- /*Not first or last byte in sequence*/