From 2cc44bb18274c1f867bf4e6ab7c1385c9d155211 Mon Sep 17 00:00:00 2001 From: Ron Koenderink Date: Thu, 26 May 2005 01:58:48 +0000 Subject: [PATCH] Support UTF-8 encoded Unicode for user communications. (NF_UTF8, togg): New client flag. (flash, wall): User text input filtering. (parse, prmptrd): Normal text input filtering. (uprmptrd, ugetstring): New, to get user text. (getele, flash, wall): Use them. (getele): No need to filter out funny characters; input filtering takes care of them. (sendmessage, ufindbreak): Work on user text. (uprnf): New, to print user text. (rea, gamedown, show_motd): Use it. (prnf): Unused, remove. (pr, upr_player): Normal text output filtering (with highlighting). (pr_flash): User text output filtering. --- doc/unicode | 102 +++++++++++++++++++++++++++++++ include/misc.h | 3 + include/nat.h | 2 + include/prototypes.h | 19 ++++-- info/Commands/toggle.t | 15 ++++- src/lib/commands/flash.c | 77 ++++++++++++++++------- src/lib/commands/rea.c | 4 +- src/lib/commands/togg.c | 8 +++ src/lib/gen/getstring.c | 13 ++++ src/lib/gen/parse.c | 5 +- src/lib/player/empdis.c | 4 +- src/lib/player/player.c | 2 +- src/lib/subs/getele.c | 17 ++---- src/lib/subs/pr.c | 128 ++++++++++++++++++++++++++++++++++++--- 14 files changed, 343 insertions(+), 56 deletions(-) create mode 100644 doc/unicode diff --git a/doc/unicode b/doc/unicode new file mode 100644 index 000000000..3ebaf307b --- /dev/null +++ b/doc/unicode @@ -0,0 +1,102 @@ +Unicode changes: + +1. toggle UTF-8 + + Add utf8 as a toggle option and store in the nat_flags field in + nation structure. In the future, this should be a login option + rather than a country toggle once the login options are added. + +2. flash and wall + + a. Message as command argument + + Interpret raw command line as message text rather than normal + text. + + b. Multi-line mode + + Read message lines as message text rather than normal text. + + c. Break long lines + + Count the charactes using utf8 format. This works for both ASCII + and UTF8 formatted strings. + + d. Print lines + + Print as message text rather than normal text. + +3. Telexes and telex-like things + + a. read and wire, MOTD and gamedown message + + Print as message text rather than normal text. + + c. tele, anno, pray, turn. + + Read as message text rather than normal text. + +4. Input filtering + + a. Parsing commands (normal text) + + Ignore control and non-ASCII characters when copying argument + strings. + + b. Reading normal text command arguments + + Replace control and non-ASCII characters, except for tab with + "?'. + + c. Reading message text command arguments + + Support message text arguments, used by 3a. and 2b. Replace + control and, if NF_UTF8 is off, non-ASCII characters. + +5. Output filtering + + Output filtering asssumes that there are no control characters or + invalid characters in the output messages. The control characters + and invalid characters are filtered out during input filtering or + that the server will not generate control characters or invalid + characters. + + a. Printing normal text + + When NF_UTF8 is on, highlighted text is printed using SO/SI. + + b. Printing message text + + When NF_UTF8 is off, replace UTF8 charactes with '?'. + + +Definitions: + +1. Normal Text + For normal text, the following ASCII characters are valid: + CR, LF and 0x20-0x7e. Normally, LF is an termination action + event. Normally, CR is not used except by the server. + Normal Text does not support UTF8 characters. In normal + text, the 8th bit is used a highlight bit. If the client + has the utf8 nation flag set, the standout bit is removed + and the highlight block is prefixed with SO (ASCII standout) + and suffixed with SI (ASCII standin). + +2. Message Text + For message text, the following ASCII characters are valid: + Tab, CR, LF and 0x020-0x7e. Normally, LF is an termination + action event. Normally, CR is not used except by the server. + Message text also supports UTF8 characters if the utf8 nation + flag is turn on otherwise only the ASCII characters are + supported. + + +Notes: + +1. Strings that considered message text are commented. + +2. Both Normal and Message text are char strings are in the server. + Care needs to be taken as some compiler consider char + signed and other default to unsigned char. + +3. Unicode functions are prefixed with u. diff --git a/include/misc.h b/include/misc.h index ed9f24b18..a6d4e2d25 100644 --- a/include/misc.h +++ b/include/misc.h @@ -144,6 +144,9 @@ extern s_char *iesplur(int n); extern s_char *plur(int n, s_char *no, s_char *yes); extern char *getstarg(char *input, char *prompt, char buf[]); extern char *getstring(char *prompt, char buf[]); +extern char *ugetstring(char *prompt, char buf[] + /* buf is message text */); + extern s_char *prbuf(s_char *format, ...) ATTRIBUTE((format (printf, 1, 2))); diff --git a/include/nat.h b/include/nat.h index 2d6387776..72663f899 100644 --- a/include/nat.h +++ b/include/nat.h @@ -172,5 +172,7 @@ extern void agecontact(struct natstr *np); #define NF_SONAR bit(4) /* Turn auto-sonar on */ #define NF_TECHLISTS bit(5) /* Sort lists by tech not type */ #define NF_SACKED bit(6) /* Capital was sacked, and hasn't been reset yet */ +#define NF_UTF8 bit(7) /* Whether the client accepts UTF-8 (true) or + ASCII only (false) */ #endif /* _NAT_H_ */ diff --git a/include/prototypes.h b/include/prototypes.h index 78718aff0..74ae8c5e3 100644 --- a/include/prototypes.h +++ b/include/prototypes.h @@ -63,7 +63,8 @@ extern void finish_server(void); */ extern int check_market(void); extern void set_coastal(struct sctstr *); -extern int sendmessage(struct natstr *, struct natstr *, char *, int); +extern int sendmessage(struct natstr *, struct natstr *, char *message + /* message is message text */, int); extern void gift(int, int, s_char *, int, s_char *); extern int display_mark(i_type, int); extern int want_to_abandon(struct sctstr *, i_type, int, struct lndstr *); @@ -484,7 +485,7 @@ extern void emp_setbit(int, int, u_char *); extern void emp_setbitmap(int, int, u_char *, int *); extern void bitinit2(struct nstr_sect *, u_char *, int); /* getele.c */ -extern int getele(s_char *, s_char *); +extern int getele(char *, char *); /* land.c */ extern s_char *prland(struct lndstr *); extern int lnd_postread(int, s_char *); @@ -591,29 +592,35 @@ extern int pln_damage(struct plnstr *, coord, coord, s_char, int *, int); extern int pln_identchance(struct plnstr *, int, int); extern void pln_set_tech(struct plnstr *, int); /* pr.c */ -extern void pr(s_char *, ...) ATTRIBUTE((format (printf, 1, 2))); -extern void prnf(s_char *buf); +extern void pr(char *, ...) ATTRIBUTE((format (printf, 1, 2))); +extern void uprnf(char *buf /* buf is message text */); extern void pr_id(struct player *, int, s_char *, ...) ATTRIBUTE((format (printf, 3, 4))); -extern void pr_flash(struct player *, s_char *, ...) +extern void pr_flash(struct player *, char *format + /* format is message text */, ...) ATTRIBUTE((format (printf, 2, 3))); extern void pr_inform(struct player *, s_char *, ...) ATTRIBUTE((format (printf, 2, 3))); extern void pr_wall(s_char *, ...) ATTRIBUTE((format (printf, 1, 2))); extern void pr_player(struct player *pl, int id, s_char *buf); +extern void upr_player(struct player *pl, int id, char *buf + /* buf is message text */); extern void pr_hilite(s_char *buf); extern void prredir(s_char *redir); extern void prexec(s_char *file); extern void prprompt(int min, int btu); extern void showvers(int vers); -extern int prmptrd(s_char *prompt, s_char *str, int size); +extern int prmptrd(char *prompt, char *str, int size); +extern int uprmptrd(char *prompt, char *str /* str is message text */, + int size); extern void prdate(void); extern void prxy(s_char *format, coord x, coord y, natid country); extern void PR(int, s_char *, ...) ATTRIBUTE((format (printf, 2, 3))); extern void PRdate(natid cn); extern void pr_beep(void); extern void mpr(int, s_char *, ...) ATTRIBUTE((format (printf, 2, 3))); +extern void prtoascii(char *buf /* buf is message text */); /* radmap.c */ extern int deltx(struct range *, coord); diff --git a/info/Commands/toggle.t b/info/Commands/toggle.t index 616bc649d..594afceca 100644 --- a/info/Commands/toggle.t +++ b/info/Commands/toggle.t @@ -1,7 +1,7 @@ .TH Command TOGGLE .NA toggle "Toggle client flags" .LV Expert -.SY "toggle [inform|flash|beep|coastwatch|sonar|techlists] [on|off]" +.SY "toggle [inform|flash|beep|coastwatch|sonar|techlists|utf8] [on|off]" You use the toggle command to set various user-interface flags for your country. The server will remember which flags you have set between sessions. If you do not specify the second argument ("on" or @@ -39,6 +39,19 @@ Will toggle the "techlists" flag (default off) for your country. When techlists is on, when you do a "show" command, the lists shows will be in order of tech instead of grouping order. .s1 +.EX "toggle utf8" +Will toggle the "utf8" flag (default off) for your country. +When utf8 is on, you can use multiple langauges in your user communication. +To use this feature, the client must support the unicode character set +and must encode unicode characters using the UTF8 format +when sending the characters to the server and must decode +the UTF8 characters into +unicode characters when receiving characters from the server. +Also both the sending and receiving clients must support +unicode in order to communicate using unicode characters. +This feature applies to the follow commands: +wire, read, announcement, flash, telegram and turn. +.s1 .EX "toggle" Will list all your flags and show whether they are on or off. .s1 diff --git a/src/lib/commands/flash.c b/src/lib/commands/flash.c index aad0d19b4..46bd6f8ae 100644 --- a/src/lib/commands/flash.c +++ b/src/lib/commands/flash.c @@ -39,14 +39,17 @@ #include "file.h" #include "commands.h" +static int ufindbreak(char *message /* message is message text */, + int num_chars); + int flash(void) { struct natstr *us; struct natstr *to; - s_char buf[1024]; + char buf[1024]; /* buf is message text */ int tocn; - s_char *sp; + char *sp; /* sp is message text */ us = getnatp(player->cnum); if ((tocn = natarg(player->argp[1], "to which country? ")) < 0) @@ -77,10 +80,17 @@ flash(void) for (sp = &player->combuf[0]; *sp && *sp != ' '; ++sp) ; for (++sp; *sp && *sp != ' '; ++sp) ; sprintf(buf, ":%s", sp); + for(sp = buf; 0 != *sp; ++sp) { + if ((*sp >= 0x0 && *sp < 0x20 && *sp != '\t') || + *sp == 0x7f) + *sp = '?'; + else if (!(us->nat_flags & NF_UTF8) && (*sp & 0x80)) + *sp = '?'; + } sendmessage(us, to, buf, 1); } else { sendmessage(us, to, "...", 1); - while (getstring("> ", buf)) { + while (ugetstring("> ", buf)) { if (*buf == '.') break; sendmessage(us, to, buf, 0); @@ -94,17 +104,24 @@ int wall(void) { struct natstr *us; - s_char buf[1024]; - s_char *sp; + char buf[1024]; /* buf is message text */ + char *sp; /* sp is message text */ us = getnatp(player->cnum); if (player->argp[1]) { for (sp = &player->combuf[0]; *sp && *sp != ' '; ++sp) ; sprintf(buf, ":%s", sp); + for(sp = buf; 0 != *sp; ++sp) { + if ((*sp >= 0x0 && *sp < 0x20 && *sp != '\t') || + *sp == 0x7f) + *sp = '?'; + else if (!(us->nat_flags & NF_UTF8) && (*sp & 0x80)) + *sp = '?'; + } sendmessage(us, 0, buf, 1); } else { sendmessage(us, 0, "...", 1); - while (getstring("> ", buf)) { + while (ugetstring("> ", buf)) { if (*buf == '.') break; sendmessage(us, 0, buf, 0); @@ -115,29 +132,22 @@ wall(void) } int -sendmessage(struct natstr *us, struct natstr *to, char *message, - int oneshot) +sendmessage(struct natstr *us, struct natstr *to, char *message + /* message is message text */, int oneshot) { struct player *other; struct tm *tm; - char *p; - char c; time_t now; int sent = 0; struct natstr *wto; + char c; /* c is message text */ + int pos; - for (p = message; 0 != (c = *p); p++) { - if (!isprint(c)) - *p = '*'; - } - if (strlen(message) > 60) { - s_char c = message[60]; - message[60] = '\0'; - sendmessage(us, to, message, oneshot); - message[60] = c; - sendmessage(us, to, &message[60], 0); - return 0; - } + pos = ufindbreak(message, 60); + c = message[pos]; + if (c) + message[pos] = '\0'; + time(&now); tm = localtime(&now); for (other = player_next(0); other != 0; other = player_next(other)) { @@ -188,5 +198,28 @@ sendmessage(struct natstr *us, struct natstr *to, char *message, pr("%s is not accepting flashes\n", to->nat_cnam); } } + if (c) { + message[pos] = c; + sendmessage(us, to, &message[pos], 0); + } return 0; } + +/* + * Return byte-index of the N-th UTF-8 character in UTF-8 string S. + * If S doesn't have that many characters, return its length instead. + */ +int +ufindbreak(char *s /* s is message text */, int n) +{ + int i = 0; + + while (n && s[i]) + { + if ((s[i++] & 0xc0) == 0xc0) + while ((s[i] & 0xc0) == 0x80) + i++; + --n; + } + return i; +} diff --git a/src/lib/commands/rea.c b/src/lib/commands/rea.c index e319eb788..b015d509e 100644 --- a/src/lib/commands/rea.c +++ b/src/lib/commands/rea.c @@ -63,7 +63,7 @@ rea(void) int teles; int size; unsigned int nbytes; - s_char buf[4096]; + s_char buf[4096]; /* buf is message text */ int lasttype; int lastcnum; time_t lastdate; @@ -168,7 +168,7 @@ rea(void) (void)fread(buf, sizeof(s_char), nbytes, telfp); buf[nbytes] = 0; if (readit) - prnf(buf); + uprnf(buf); tgm.tel_length -= nbytes; } } diff --git a/src/lib/commands/togg.c b/src/lib/commands/togg.c index ab5a0ec5d..7230db929 100644 --- a/src/lib/commands/togg.c +++ b/src/lib/commands/togg.c @@ -73,6 +73,10 @@ togg(void) name = "techlists"; flag = NF_TECHLISTS; break; + case 'u': + name = "UTF-8"; + flag = NF_UTF8; + break; default: return RET_SYN; } @@ -116,6 +120,10 @@ togg(void) pr("techlists flag on\n"); else pr("techlists flag off\n"); + if (np->nat_flags & NF_UTF8) + pr("UTF-8 flag on\n"); + else + pr("UTF-8 flag off\n"); } return RET_OK; diff --git a/src/lib/gen/getstring.c b/src/lib/gen/getstring.c index c7077311b..0938cddfc 100644 --- a/src/lib/gen/getstring.c +++ b/src/lib/gen/getstring.c @@ -47,3 +47,16 @@ getstring(char *prompt, char *buf) return 0; return buf; } + +/* + * Print sub-prompt PROMPT, receive a line of UTF8 input into BUF[1024]. + * Return BUF on success, else NULL. + */ +char * +ugetstring(char *prompt, char *buf /* buf is message text */) +{ + *buf = '\0'; + if (uprmptrd(prompt, buf, 1024) < 0) + return 0; + return buf; +} diff --git a/src/lib/gen/parse.c b/src/lib/gen/parse.c index 5ae248c17..814a2b7f8 100644 --- a/src/lib/gen/parse.c +++ b/src/lib/gen/parse.c @@ -79,7 +79,10 @@ parse(register s_char *buf, s_char **argpp, s_char **condp, s_char *space, quoted = !quoted; buf++; } else { - *bp1++ = *buf++; + if (*buf >= 0x20 && *buf <= 0x7e) + *bp1++ = *buf++; + else + buf++; } } *bp1++ = 0; diff --git a/src/lib/player/empdis.c b/src/lib/player/empdis.c index 972f58417..831710a93 100644 --- a/src/lib/player/empdis.c +++ b/src/lib/player/empdis.c @@ -154,7 +154,7 @@ gamedown(void) { FILE *down_fp; struct telstr tgm; - s_char buf[MAXTELSIZE]; + s_char buf[MAXTELSIZE]; /* buf is message text */ if (player->god) return 0; @@ -176,7 +176,7 @@ gamedown(void) return 1; } buf[tgm.tel_length] = 0; - prnf(buf); + uprnf(buf); pr("\nThe game is down\n"); fclose(down_fp); return 1; diff --git a/src/lib/player/player.c b/src/lib/player/player.c index d41cbe7cd..a59873cc4 100644 --- a/src/lib/player/player.c +++ b/src/lib/player/player.c @@ -374,7 +374,7 @@ show_motd(void) return RET_FAIL; } buf[tgm.tel_length] = 0; - prnf(buf); + uprnf(buf); fclose(motd_fp); return RET_OK; } diff --git a/src/lib/subs/getele.c b/src/lib/subs/getele.c index 0ac3d6c39..8c8785ad5 100644 --- a/src/lib/subs/getele.c +++ b/src/lib/subs/getele.c @@ -40,13 +40,12 @@ static int tilde_escape(s_char *s, s_char c); int -getele(s_char *nation, s_char *buf) +getele(char *nation, char *buf /* buf is message text */) { - register s_char *bp; + register char *bp; register int len; - register int c; - s_char buffer[MAXTELSIZE + 2]; - s_char left[MAXTELSIZE + 2]; + char buffer[MAXTELSIZE + 2]; /* buf is message text */ + char left[MAXTELSIZE + 2]; /* buf is message text */ pr("Enter telegram for %s\n", nation); pr("undo last line with ~u, print with ~p, abort with ~q, end with ^D or .\n"); @@ -54,7 +53,7 @@ getele(s_char *nation, s_char *buf) while (!player->aborted) { sprintf(left, "%4d left: ", (int)(buf + MAXTELSIZE - bp)); buffer[0] = 0; - if (prmptrd(left, buffer, MAXTELSIZE - 2) <= 0) + if (uprmptrd(left, buffer, MAXTELSIZE - 2) <= 0) break; if (tilde_escape(buffer, 'q')) return -1; @@ -96,12 +95,6 @@ getele(s_char *nation, s_char *buf) return -1; len = bp - buf; buf[len] = 0; - /* Get rid of non-ASCII and control characters. */ - for (bp = buf; 0 != (c = *bp); bp++) { - if (isascii(c) && (isprint(c) || isspace(c))) - continue; - *bp = '?'; - } return len; } diff --git a/src/lib/subs/pr.c b/src/lib/subs/pr.c index e57c1b3a8..340852280 100644 --- a/src/lib/subs/pr.c +++ b/src/lib/subs/pr.c @@ -59,20 +59,32 @@ static void outid(struct player *pl, int n); /*VARARGS*/ void -pr(s_char *format, ...) +pr(char *format, ...) { - s_char buf[4096]; + struct natstr *np = getnatp(player->cnum); + char buf[4096]; va_list ap; va_start(ap, format); (void)vsprintf(buf, format, ap); va_end(ap); - pr_player(player, C_DATA, buf); + if (np->nat_flags & NF_UTF8) + upr_player(player, C_DATA, buf); + else + pr_player(player, C_DATA, buf); } void -prnf(s_char *buf) +uprnf(char *buf /* buf is message text */) { + struct natstr *np = getnatp(player->cnum); + + /* + * Translate to ASCII if the client is not in UTF mode + */ + if (!(np->nat_flags & NF_UTF8)) + prtoascii(buf); + pr_player(player, C_DATA, buf); } @@ -94,9 +106,11 @@ pr_id(struct player *p, int id, s_char *format, ...) } void -pr_flash(struct player *pl, s_char *format, ...) +pr_flash(struct player *pl, char *format + /* format is message text */, ...) { - s_char buf[4096]; + struct natstr *np = getnatp(pl->cnum); + char buf[4096]; /* buf is message text */ va_list ap; if (pl->state != PS_PLAYING) @@ -104,6 +118,11 @@ pr_flash(struct player *pl, s_char *format, ...) va_start(ap, format); (void)vsprintf(buf, format, ap); va_end(ap); + /* + * Translate to ASCII if the client is not in UTF mode + */ + if (!(np->nat_flags & NF_UTF8)) + prtoascii(buf); pr_player(pl, C_FLASH, buf); io_output(pl->iop, IO_NOWAIT); } @@ -154,9 +173,8 @@ pr_player(struct player *pl, int id, s_char *buf) io_puts(pl->iop, "\n"); pl->curid = -1; } - if (pl->curid == -1) { + if (pl->curid == -1) outid(pl, id); - } p = strchr(bp, '\n'); if (p != 0) { len = (p - bp) + 1; @@ -173,6 +191,54 @@ pr_player(struct player *pl, int id, s_char *buf) } } +void +upr_player(struct player *pl, int id, char *buf + /* buf is message text */) +{ + register char *bp; /* bp is message text */ + register int standout = 0; + char printbuf[2]; /* bp is message text */ + + printbuf[0] = '\0'; + printbuf[1] = '\0'; + + bp = buf; + while (*bp != '\0') { + if (pl->curid != -1 && pl->curid != id) { + io_puts(pl->iop, "\n"); + pl->curid = -1; + } + if (pl->curid == -1) + outid(pl, id); + + if (*bp < 0) { /* looking for standout bit 0x80 */ + if (standout == 0) { + printbuf[0] = 0x0e; + io_puts(pl->iop, printbuf); + standout = 1; + } + *bp &= 0x7f; + } else { + if (standout == 1) { + printbuf[0] = 0x0f; + io_puts(pl->iop, printbuf); + standout = 0; + } + } + if (*bp == '\n') { + if (pl->command && (pl->command->c_flags & C_MOD)) + io_write(pl->iop, bp, 1, IO_NOWAIT); + else + io_write(pl->iop, bp, 1, IO_WAIT); + pl->curid = -1; + } else { + printbuf[0] = *bp; + io_puts(pl->iop, printbuf); + } + bp++; + } +} + /* * highlighted characters have hex 80 or'ed in * with them to designate their highlightedness @@ -242,9 +308,31 @@ showvers(int vers) } int -prmptrd(s_char *prompt, s_char *str, int size) +prmptrd(char *prompt, char *str, int size) +{ + int r; + char *cp; + + pr_id(player, C_FLUSH, "%s\n", prompt); + if ((r = recvclient(str, size)) < 0) + return r; + time(&player->curup); + if (*str == 0) + return 1; + for(cp = str; 0 != *cp; ++cp) { + if ((*cp >= 0x0 && *cp < 0x20 && *cp != '\t') || + *cp == 0x7f || *cp & 0x80) + *cp = '?'; + } + return strlen(str); +} + +int +uprmptrd(char *prompt, char *str /* str is message text */, int size) { int r; + char *cp; /* cp is message text */ + struct natstr *np = getnatp(player->cnum); pr_id(player, C_FLUSH, "%s\n", prompt); if ((r = recvclient(str, size)) < 0) @@ -252,6 +340,14 @@ prmptrd(s_char *prompt, s_char *str, int size) time(&player->curup); if (*str == 0) return 1; + + for(cp = (unsigned char *)str; 0 != *cp; ++cp) { + if ((*cp >= 0x0 && *cp < 0x20 && *cp != '\t') || + *cp == 0x7f) + *cp = '?'; + else if (!(np->nat_flags & NF_UTF8) && (*cp & 0x80)) + *cp = '?'; + } return strlen(str); } @@ -336,3 +432,17 @@ mpr(int cn, s_char *format, ...) pr_player(player, C_DATA, buf); } } + +void +prtoascii(char *buf /* buf is message text */) +{ + char *pbuf; /* pbuf is message text */ + + for(pbuf = buf; *pbuf != 0; pbuf++) + if ((*pbuf & 0xc0) == 0xc0) + *pbuf = '?'; + else if (*pbuf & 0x80) { + memmove(pbuf,pbuf+1,strlen(pbuf)-1); + pbuf--; + } +} -- 2.43.0