summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2009-11-14 18:34:57 -0800
committerKaz Kylheku <kaz@kylheku.com>2009-11-14 18:34:57 -0800
commit19cdd145d345c4560ccc9d841025415e9696b64e (patch)
treed93b2eebb6fd855774667b0e8a361f2f130f653c
parentd3aceae6d8c4e37e0d7fb8290a9ae58fc9f7a149 (diff)
downloadtxr-19cdd145d345c4560ccc9d841025415e9696b64e.tar.gz
txr-19cdd145d345c4560ccc9d841025415e9696b64e.tar.bz2
txr-19cdd145d345c4560ccc9d841025415e9696b64e.zip
Provide both char * and unsigned char * interfaces in UTF-8 module.
Fix unsigned and plan char * mixing.
-rw-r--r--ChangeLog16
-rw-r--r--lib.c2
-rw-r--r--lib.h2
-rw-r--r--stream.c2
-rw-r--r--utf8.c38
-rw-r--r--utf8.h12
6 files changed, 59 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 33386b85..8b1a7ea7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
2009-11-14 Kaz Kylheku <kkylheku@gmail.com>
+ Provide both char * and unsigned char * interfaces in UTF-8 module.
+ Fix unsigned and plan char * mixing.
+
+ * utf8.c (utf8_from_uc, utf8_to_uc, utf8_dup_from_uc,
+ utf8_dup_to_uc): New functions.
+ (utf8_from): Fix type of backtrack pointer to unsigned char *.
+
+ * utf8.h (utf8_from_uc, utf8_to_uc, utf8_dup_from_uc,
+ utf8_dup_to_uc): Declared.
+
+ * lib.c (string_utf8): Changed to take char * argument.
+
+ * lib.h (string_utf8): Declaration updated.
+
+2009-11-14 Kaz Kylheku <kkylheku@gmail.com>
+
* Makefile (depend): Marked phony and $(PROG) prerequisite dropped.
(clean, distclean, tests, install): Phony targets marked phony.
diff --git a/lib.c b/lib.c
index 10c4f972..25e8198e 100644
--- a/lib.c
+++ b/lib.c
@@ -701,7 +701,7 @@ obj_t *string(const wchar_t *str)
return obj;
}
-obj_t *string_utf8(const unsigned char *str)
+obj_t *string_utf8(const char *str)
{
obj_t *obj = make_obj();
obj->st.type = STR;
diff --git a/lib.h b/lib.h
index 5fb84018..0eab0604 100644
--- a/lib.h
+++ b/lib.h
@@ -240,7 +240,7 @@ obj_t *max2(obj_t *anum, obj_t *bnum);
obj_t *min2(obj_t *anum, obj_t *bnum);
obj_t *string_own(wchar_t *str);
obj_t *string(const wchar_t *str);
-obj_t *string_utf8(const unsigned char *str);
+obj_t *string_utf8(const char *str);
obj_t *mkstring(obj_t *len, obj_t *ch);
obj_t *mkustring(obj_t *len); /* must initialize immediately with init_str! */
obj_t *init_str(obj_t *str, const wchar_t *);
diff --git a/stream.c b/stream.c
index 282a5b94..2ec402bb 100644
--- a/stream.c
+++ b/stream.c
@@ -629,7 +629,7 @@ obj_t *make_string_byte_input_stream(obj_t *string)
{
struct byte_input *bi = (struct byte_input *) chk_malloc(sizeof *bi);
- unsigned char *utf8 = utf8_dup_to(c_str(string));
+ unsigned char *utf8 = utf8_dup_to_uc(c_str(string));
bi->buf = utf8;
bi->size = strlen((char *) utf8);
bi->index = 0;
diff --git a/utf8.c b/utf8.c
index ca2e9016..a5a1fc7e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -31,11 +31,11 @@
#include "lib.h"
#include "utf8.h"
-size_t utf8_from(wchar_t *wdst, const unsigned char *src)
+size_t utf8_from_uc(wchar_t *wdst, const unsigned char *src)
{
size_t nchar = 1;
enum utf8_state state = utf8_init;
- const char *backtrack = 0;
+ const unsigned char *backtrack = 0;
wchar_t wch = 0;
for (;;) {
@@ -101,7 +101,12 @@ size_t utf8_from(wchar_t *wdst, const unsigned char *src)
return nchar;
}
-size_t utf8_to(unsigned char *dst, const wchar_t *wsrc)
+size_t utf8_from(wchar_t *wdst, const char *src)
+{
+ return utf8_from_uc(wdst, (const unsigned char *) src);
+}
+
+size_t utf8_to_uc(unsigned char *dst, const wchar_t *wsrc)
{
size_t nbyte = 1;
wchar_t wch;
@@ -140,7 +145,20 @@ size_t utf8_to(unsigned char *dst, const wchar_t *wsrc)
return nbyte;
}
-wchar_t *utf8_dup_from(const unsigned char *str)
+size_t utf8_to(char *dst, const wchar_t *wsrc)
+{
+ return utf8_to_uc((unsigned char *) dst, wsrc);
+}
+
+wchar_t *utf8_dup_from_uc(const unsigned char *str)
+{
+ size_t nchar = utf8_from_uc(0, str);
+ wchar_t *wstr = chk_malloc(sizeof *wstr * nchar);
+ utf8_from_uc(wstr, str);
+ return wstr;
+}
+
+wchar_t *utf8_dup_from(const char *str)
{
size_t nchar = utf8_from(0, str);
wchar_t *wstr = chk_malloc(sizeof *wstr * nchar);
@@ -148,10 +166,18 @@ wchar_t *utf8_dup_from(const unsigned char *str)
return wstr;
}
-unsigned char *utf8_dup_to(const wchar_t *wstr)
+unsigned char *utf8_dup_to_uc(const wchar_t *wstr)
{
- size_t nbyte = utf8_to(0, wstr);
+ size_t nbyte = utf8_to_uc(0, wstr);
unsigned char *str = chk_malloc(nbyte);
+ utf8_to_uc(str, wstr);
+ return str;
+}
+
+char *utf8_dup_to(const wchar_t *wstr)
+{
+ size_t nbyte = utf8_to(0, wstr);
+ char *str = chk_malloc(nbyte);
utf8_to(str, wstr);
return str;
}
diff --git a/utf8.h b/utf8.h
index 542a84fa..159a7e8a 100644
--- a/utf8.h
+++ b/utf8.h
@@ -24,10 +24,14 @@
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
-size_t utf8_from(wchar_t *, const unsigned char *);
-size_t utf8_to(unsigned char *, const wchar_t *);
-wchar_t *utf8_dup_from(const unsigned char *);
-unsigned char *utf8_dup_to(const wchar_t *);
+size_t utf8_from_uc(wchar_t *, const unsigned char *);
+size_t utf8_from(wchar_t *, const char *);
+size_t utf8_to_uc(unsigned char *, const wchar_t *);
+size_t utf8_to(char *, const wchar_t *);
+wchar_t *utf8_dup_from_uc(const unsigned char *);
+wchar_t *utf8_dup_from(const char *);
+char *utf8_dup_to(const wchar_t *);
+unsigned char *utf8_dup_to_uc(const wchar_t *);
enum utf8_state { utf8_init, utf8_more1, utf8_more2, utf8_more3 };