diff options
author | fishsoupisgood <github@madingley.org> | 2019-05-27 02:41:51 +0100 |
---|---|---|
committer | fishsoupisgood <github@madingley.org> | 2019-05-27 02:41:51 +0100 |
commit | 333b605b2afd472b823aeda0adf0e8b1ea9843c0 (patch) | |
tree | bc8f581317897e2e53f278f1716b4471fcdccd4f /findhyphen.c | |
download | asl-master.tar.gz asl-master.tar.bz2 asl-master.zip |
Diffstat (limited to 'findhyphen.c')
-rw-r--r-- | findhyphen.c | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/findhyphen.c b/findhyphen.c new file mode 100644 index 0000000..25dfd1b --- /dev/null +++ b/findhyphen.c @@ -0,0 +1,270 @@ +/* findhyphen.c */ +/*****************************************************************************/ +/* AS-Portierung */ +/* */ +/* Zerlegung von Worten in Silben gemaess dem TeX-Algorithmus */ +/* */ +/* Historie: 17.2.1998 Grundsteinlegung */ +/* */ +/*****************************************************************************/ + +#undef DEBUG + +#include "stdinc.h" +#include <string.h> +#include <ctype.h> + +#include "strutil.h" + +#ifdef DEBUG +#include "ushyph.h" +#endif + +/*****************************************************************************/ + +#define LCNT (26+1+4) +#define SEPCNT 10 + +typedef struct _THyphenNode + { + Byte sepcnts[SEPCNT]; + struct _THyphenNode *Daughters[LCNT]; + } THyphenNode,*PHyphenNode; + +typedef struct _THyphenException + { + struct _THyphenException *next; + char *word; + int poscnt,*posis; + } THyphenException,*PHyphenException; + +#define INTCHR_AE '\344' +#define INTCHR_OE '\366' +#define INTCHR_UE '\374' +#define INTCHR_SZ '\337' + +/*****************************************************************************/ + +static PHyphenNode HyphenRoot=Nil; +static PHyphenException FirstException=Nil; + +/*****************************************************************************/ + + char mytolower(char ch) +BEGIN + return tolower(((unsigned int) ch)&0xff); +END + +#if 0 +char b[10]; + + static void PrintNode(PHyphenNode Node, int Level) +BEGIN + int z; + + for (z=1; z<Level; z++) putchar(' '); + for (z=1; z<=Level; z++) putchar(b[z]); + for (z=0; z<SEPCNT; z++) if (Node->sepcnts[z]>0) break; + if (z<SEPCNT) putchar('!'); printf(" %p",Node); + puts(""); + for (z=0; z<LCNT; z++) + if (Node->Daughters[z]!=Nil) + BEGIN + b[Level+1]=z+'a'-1; PrintNode(Node->Daughters[z],Level+1); + END +END +#endif + + static int GetIndex(char ch) +BEGIN + if ((mytolower(ch)>='a'-1) AND (mytolower(ch)<='z')) return (mytolower(ch)-('a'-1)); + else if (ch=='.') return 0; +#ifndef CHARSET_ASCII7 + else if ((ch==*CH_ae) OR (ch==*CH_Ae) OR (ch==INTCHR_AE)) return 27; + else if ((ch==*CH_oe) OR (ch==*CH_Oe) OR (ch==INTCHR_OE)) return 28; + else if ((ch==*CH_ue) OR (ch==*CH_Ue) OR (ch==INTCHR_UE)) return 29; + else if ((ch==*CH_sz) OR (ch==INTCHR_SZ)) return 30; +#endif + else { printf("unallowed character %d\n",ch); return -1; } +END + + static void InitHyphenNode(PHyphenNode Node) +BEGIN + int z; + + for (z=0; z<LCNT; Node->Daughters[z++]=Nil); + for (z=0; z<SEPCNT; Node->sepcnts[z++]=0); +END + + void BuildTree(char **Patterns) +BEGIN + char **run,ch,*pos,sing[500],*rrun; + Byte RunCnts[SEPCNT]; + int z,l,rc,index; + PHyphenNode Lauf; + + HyphenRoot=(PHyphenNode) malloc(sizeof(THyphenNode)); + InitHyphenNode(HyphenRoot); + + for (run=Patterns; *run!=NULL; run++) + BEGIN + strcpy(sing,*run); rrun=sing; + do + BEGIN + pos=strchr(rrun,' '); if (pos!=Nil) *pos='\0'; + l=strlen(rrun); rc=0; Lauf=HyphenRoot; + for (z=0; z<SEPCNT; RunCnts[z++]=0); + for (z=0; z<l; z++) + BEGIN + ch=rrun[z]; + if ((ch>='0') AND (ch<='9')) RunCnts[rc]=ch-'0'; + else + BEGIN + index=GetIndex(ch); + if (Lauf->Daughters[index]==Nil) + BEGIN + Lauf->Daughters[index]=(PHyphenNode) malloc(sizeof(THyphenNode)); + InitHyphenNode(Lauf->Daughters[index]); + END + Lauf=Lauf->Daughters[index]; rc++; + END + END + memcpy(Lauf->sepcnts,RunCnts,sizeof(Byte)*SEPCNT); + if (pos!=Nil) rrun=pos+1; + END + while (pos!=Nil); + END +END + + void AddException(char *Name) +BEGIN + char tmp[300],*dest,*src; + int pos[100]; + PHyphenException New; + + New=(PHyphenException) malloc(sizeof(THyphenException)); + New->next=FirstException; + New->poscnt=0; dest=tmp; + for (src=Name; *src!='\0'; src++) + if (*src=='-') pos[New->poscnt++]=dest-tmp; + else *(dest++)=*src; + *dest='\0'; + New->word=strdup(tmp); + New->posis=(int *) malloc(sizeof(int)*New->poscnt); + memcpy(New->posis,pos,sizeof(int)*New->poscnt); + FirstException=New; +END + + void DestroyNode(PHyphenNode Node) +BEGIN + int z; + + for (z=0; z<LCNT; z++) + if (Node->Daughters[z]!=Nil) DestroyNode(Node->Daughters[z]); + free(Node); +END + + void DestroyTree(void) +BEGIN + PHyphenException Old; + + if (HyphenRoot!=Nil) DestroyNode(HyphenRoot); HyphenRoot=Nil; + + while (FirstException!=Nil) + BEGIN + Old=FirstException; FirstException=Old->next; + free(Old->word); if (Old->poscnt>0) free(Old->posis); + END +END + + void DoHyphens(char *word, int **posis, int *posicnt) +BEGIN + char Field[300]; + Byte Res[300]; + int z,z2,z3,l; + PHyphenNode Lauf; + PHyphenException Ex; + + for (Ex=FirstException; Ex!=Nil; Ex=Ex->next) + if (strcasecmp(Ex->word,word)==0) + BEGIN + *posis=(int *) malloc(sizeof(int)*Ex->poscnt); + memcpy(*posis,Ex->posis,sizeof(int)*Ex->poscnt); + *posicnt=Ex->poscnt; + return; + END + + l=strlen(word); *posicnt=0; + *Field='a'-1; + for (z=0; z<l; z++) + BEGIN + Field[z+1]=tolower((unsigned int) word[z]); + if (GetIndex(Field[z+1])<=0) return; + END + Field[l+1]='a'-1; l+=2; + for (z=0; z<=l+1; Res[z++]=0); + + if (HyphenRoot==Nil) return; + + for (z=0; z<l; z++) + BEGIN + Lauf=HyphenRoot; + for (z2=z; z2<l; z2++) + BEGIN + Lauf=Lauf->Daughters[GetIndex(Field[z2])]; + if (Lauf==Nil) break; +#ifdef DEBUG + for (z3=0; z3<SEPCNT; z3++) if (Lauf->sepcnts[z3]>0) break; + if (z3<SEPCNT) + BEGIN + printf("Apply pattern "); + for (z3=z; z3<=z2; putchar(Field[z3++])); + printf(" at position %d with values",z); + for (z3=0; z3<SEPCNT; printf(" %d",Lauf->sepcnts[z3++])); + puts(""); + END +#endif + for (z3=0; z3<=z2-z+2; z3++) + if (Lauf->sepcnts[z3]>Res[z+z3]) Res[z+z3]=Lauf->sepcnts[z3]; + END + END + +#ifdef DEBUG + for (z=0; z<l; z++) printf(" %c",Field[z]); puts(""); + for (z=0; z<=l; z++) printf("%d ",Res[z]); puts(""); + for (z=0; z<l-2; z++) + BEGIN + if ((z>0) AND ((Res[z+1])&1)) putchar('-'); + putchar(Field[z+1]); + END + puts(""); +#endif + + *posis=(int *) malloc(sizeof(int)*l); *posicnt=0; + for (z=3; z<l-2; z++) + if ((Res[z]&1)==1) (*posis)[(*posicnt)++]=z-1; + if (*posicnt==0) + BEGIN + free(*posis); *posis=Nil; + END +END + +/*****************************************************************************/ + +#ifdef DEBUG + int main(int argc, char **argv) +BEGIN + int z,z2,cnt,*posis,posicnt; + + BuildTree(USHyphens); + for (z=1; z<argc; z++) + BEGIN + DoHyphens(argv[z],&posis,&cnt); + for (z2=0; z2<cnt; printf("%d ",posis[z2++])); puts(""); + if (posicnt>0) free(posis); + END +/* DoHyphens("hyphenation"); + DoHyphens("concatenation"); + DoHyphens("supercalifragilisticexpialidocous");*/ +END +#endif |