aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Christoph Ebersbach <jceb@e-jc.de>2015-05-22 16:06:57 +0200
committerRoberto E. Vargas Caballero <k0ga@shike2.com>2015-05-25 08:35:32 +0200
commitcaa97cc781ccf29f28c3d9e6683a66eb3f70e2bd (patch)
tree98a9dcd8e25fee71514f2a786fdaa785a1aea4bd
parentc03548750b2527a6ddb5edfd945c5799066a6224 (diff)
downloadst-patched-caa97cc781ccf29f28c3d9e6683a66eb3f70e2bd.tar.bz2
st-patched-caa97cc781ccf29f28c3d9e6683a66eb3f70e2bd.tar.xz
st-patched-caa97cc781ccf29f28c3d9e6683a66eb3f70e2bd.zip
Support UTF-8 characters as word delimiters
For a higher usefulness of the utf8strchr function, the index of the UTF-8 character could be returned in addition with a Rune instead of a char*. Since utf8strchr is currently only used by ISDELIM I didn't bother to increase the complexity.
-rw-r--r--st.c18
1 files changed, 17 insertions, 1 deletions
diff --git a/st.c b/st.c
index 0c6b9c3..3460a37 100644
--- a/st.c
+++ b/st.c
@@ -71,7 +71,7 @@ char *argv0;
71#define ISCONTROLC0(c) (BETWEEN(c, 0, 0x1f) || (c) == '\177') 71#define ISCONTROLC0(c) (BETWEEN(c, 0, 0x1f) || (c) == '\177')
72#define ISCONTROLC1(c) (BETWEEN(c, 0x80, 0x9f)) 72#define ISCONTROLC1(c) (BETWEEN(c, 0x80, 0x9f))
73#define ISCONTROL(c) (ISCONTROLC0(c) || ISCONTROLC1(c)) 73#define ISCONTROL(c) (ISCONTROLC0(c) || ISCONTROLC1(c))
74#define ISDELIM(u) (BETWEEN(u, 0, 127) && strchr(worddelimiters, u) != NULL) 74#define ISDELIM(u) (utf8strchr(worddelimiters, u) != NULL)
75#define LIMIT(x, a, b) (x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x) 75#define LIMIT(x, a, b) (x) = (x) < (a) ? (a) : (x) > (b) ? (b) : (x)
76#define ATTRCMP(a, b) ((a).mode != (b).mode || (a).fg != (b).fg || (a).bg != (b).bg) 76#define ATTRCMP(a, b) ((a).mode != (b).mode || (a).fg != (b).fg || (a).bg != (b).bg)
77#define IS_SET(flag) ((term.mode & (flag)) != 0) 77#define IS_SET(flag) ((term.mode & (flag)) != 0)
@@ -473,6 +473,7 @@ static size_t utf8decode(char *, Rune *, size_t);
473static Rune utf8decodebyte(char, size_t *); 473static Rune utf8decodebyte(char, size_t *);
474static size_t utf8encode(Rune, char *); 474static size_t utf8encode(Rune, char *);
475static char utf8encodebyte(Rune, size_t); 475static char utf8encodebyte(Rune, size_t);
476static char *utf8strchr(char *s, Rune u);
476static size_t utf8validate(Rune *, size_t); 477static size_t utf8validate(Rune *, size_t);
477 478
478static ssize_t xwrite(int, const char *, size_t); 479static ssize_t xwrite(int, const char *, size_t);
@@ -640,6 +641,21 @@ utf8encodebyte(Rune u, size_t i) {
640 return utfbyte[i] | (u & ~utfmask[i]); 641 return utfbyte[i] | (u & ~utfmask[i]);
641} 642}
642 643
644char *
645utf8strchr(char *s, Rune u) {
646 Rune r;
647 size_t i, j, len;
648
649 len = strlen(s);
650 for(i = 0, j = 0; i < len; i += j) {
651 if(!(j = utf8decode(&s[i], &r, len - i)))
652 break;
653 if(r == u)
654 return &(s[i]);
655 }
656 return NULL;
657}
658
643size_t 659size_t
644utf8validate(Rune *u, size_t i) { 660utf8validate(Rune *u, size_t i) {
645 if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) 661 if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))