summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2022-03-21 06:16:43 -0700
committerKaz Kylheku <kaz@kylheku.com>2022-03-21 06:16:43 -0700
commitaf8b7db50fcc3ed34bfe1136d1664d59e89ba1a9 (patch)
tree14a7981e70ecc67f165e848dd4e3d0a7349d8671
parent3ce67812ebbbb8c0a82d77a72f565cda40bdec73 (diff)
downloadtxr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.gz
txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.bz2
txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.zip
New FFI types str-s, bstr-s and wstr-s.
These types actually make it possible to receive a string by pointer from a C function, without trying to free it. It is now possible to write a FFI wrapper for strtol or wcstol, which is done in the new test case. * ffi.c (str_s_s, bstr_s_s, wstr_s_s): New symbol variables. (ffi_init_types): Register the types str-s, bstr-s and wstr-s. (ffi_init): Intern the new symbols. * tests/017/str-s.tl: New file. * txr.1: Documented. * stdlib/doc-syms.tl: Updated.
-rw-r--r--ffi.c25
-rw-r--r--stdlib/doc-syms.tl15
-rw-r--r--tests/017/str-s.tl11
-rw-r--r--txr.1146
4 files changed, 143 insertions, 54 deletions
diff --git a/ffi.c b/ffi.c
index 268a7a46..2bb52d29 100644
--- a/ffi.c
+++ b/ffi.c
@@ -135,7 +135,7 @@ val array_s, zarray_s, carray_s;
val union_s;
-val str_d_s, wstr_s, wstr_d_s, bstr_s, bstr_d_s;
+val str_d_s, str_s_s, wstr_s, wstr_d_s, wstr_s_s, bstr_s, bstr_d_s, bstr_s_s;
val buf_d_s;
@@ -4614,6 +4614,26 @@ static void ffi_init_types(void)
ffi_bstr_put, ffi_bstr_d_get,
0, 0));
+ ffi_typedef(str_s_s, make_ffi_type_builtin(str_s_s, str_s, FFI_KIND_PTR,
+ sizeof (mem_t *),
+ alignof (mem_t *),
+ &ffi_type_pointer,
+ ffi_ptr_out_null_put, ffi_str_get,
+ 0, 0));
+
+ ffi_typedef(wstr_s_s, make_ffi_type_builtin(wstr_s_s, str_s, FFI_KIND_PTR,
+ sizeof (mem_t *),
+ alignof (mem_t *),
+ &ffi_type_pointer,
+ ffi_ptr_out_null_put, ffi_wstr_get,
+ 0, 0));
+
+ ffi_typedef(bstr_s_s, make_ffi_type_builtin(bstr_s_s, str_s, FFI_KIND_PTR,
+ sizeof (mem_t *),
+ alignof (mem_t *),
+ &ffi_type_pointer,
+ ffi_ptr_out_null_put, ffi_bstr_get,
+ 0, 0));
{
val iter;
@@ -6772,10 +6792,13 @@ void ffi_init(void)
carray_s = intern(lit("carray"), user_package);
union_s = intern(lit("union"), user_package);
str_d_s = intern(lit("str-d"), user_package);
+ str_s_s = intern(lit("str-s"), user_package);
wstr_s = intern(lit("wstr"), user_package);
wstr_d_s = intern(lit("wstr-d"), user_package);
+ wstr_s_s = intern(lit("wstr-s"), user_package);
bstr_s = intern(lit("bstr"), user_package);
bstr_d_s = intern(lit("bstr-d"), user_package);
+ bstr_s_s = intern(lit("bstr-s"), user_package);
buf_d_s = intern(lit("buf-d"), user_package);
ptr_in_s = intern(lit("ptr-in"), user_package);
ptr_out_s = intern(lit("ptr-out"), user_package);
diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl
index 179ce5e1..5ce5c0d0 100644
--- a/stdlib/doc-syms.tl
+++ b/stdlib/doc-syms.tl
@@ -168,8 +168,9 @@
("bs0" "N-03BD477F")
("bs1" "N-03BD477F")
("bsdly" "N-03BD477F")
- ("bstr" "N-00C6B7C4")
- ("bstr-d" "N-00C6B7C4")
+ ("bstr" "N-0225F1EF")
+ ("bstr-d" "N-0225F1EF")
+ ("bstr-s" "N-0225F1EF")
("buf" "D-005E")
("buf-alloc-size" "N-013A3727")
("buf-carray" "N-0022F54E")
@@ -1852,13 +1853,14 @@
("static-slot-p" "N-032FD510")
("static-slot-set" "N-0017D1B5")
("stdlib" "N-008E4BC2")
- ("str" "N-00C6B7C4")
+ ("str" "N-01736060")
("str-buf" "N-012BF6AD")
- ("str-d" "N-00C6B7C4")
+ ("str-d" "N-01736060")
("str-in6addr" "N-01FF658D")
("str-in6addr-net" "N-00918411")
("str-inaddr" "N-01FF658D")
("str-inaddr-net" "N-00918411")
+ ("str-s" "N-01736060")
("str-seq" "N-02F0880D")
("str<" "N-01AA954A")
("str<=" "N-01AA954A")
@@ -2195,8 +2197,9 @@
("with-update-expander" "N-006EA023")
("wrap" "N-026DDCEC")
("wrap*" "N-026DDCEC")
- ("wstr" "N-032DB6DC")
- ("wstr-d" "N-032DB6DC")
+ ("wstr" "N-033B8A6D")
+ ("wstr-d" "N-033B8A6D")
+ ("wstr-s" "N-033B8A6D")
("xcase" "N-0072FF5E")
("yield" "N-02AE5C1E")
("yield-from" "N-01556613")
diff --git a/tests/017/str-s.tl b/tests/017/str-s.tl
new file mode 100644
index 00000000..bb9dc38a
--- /dev/null
+++ b/tests/017/str-s.tl
@@ -0,0 +1,11 @@
+(load "../common")
+
+(with-dyn-lib nil
+ (deffi strtol "strtol" long (str (ptr-out (array 1 str-s)) int))
+ (deffi bcstol "strtol" long (bstr (ptr-out (array 1 bstr-s)) int))
+ (deffi wcstol "wcstol" long (wstr (ptr-out (array 1 wstr-s)) int)))
+
+(mtest
+ (let ((v (vec nil))) (list (strtol "-345x" v 0) v)) (-345 #("x"))
+ (let ((v (vec nil))) (list (bcstol "-345x" v 0) v)) (-345 #("x"))
+ (let ((v (vec nil))) (list (wcstol "-345x" v 0) v)) (-345 #("x")))
diff --git a/txr.1 b/txr.1
index e6a2b381..7ffc3d13 100644
--- a/txr.1
+++ b/txr.1
@@ -79078,26 +79078,21 @@ object can be passed as the argument of
.code fclose
to close the stream.
-.coNP FFI types @, str @, bstr @ str-d and @ bstr-d
+.coNP FFI types @, str @ str-d @ and @ str-s
These FFI types correspond to the C pointer type
.codn "char *" ,
providing automatic conversion between Lisp strings and null-terminated
-C strings. The
-.code str
-and
-.code str-d
-types use UTF-8 encoding. The
-.code bstr
+C strings.
+
+The related types
+.codn bstr ,
+.codn bstr-d ,
+.codn bstr-s ,
+.codn wstr ,
+.code wstr-d
and
-.code bstr-d
-types do not use UTF-8: only Lisp strings which contain strictly
-code points in the range U+0000 to U+00FF may convert to these types;
-out-of-range characters trigger an error exception.
-The
-.code -d
-suffixed types differ from the unsuffixed variants
-in that they denote the transfer of ownership of dynamically allocated memory,
-and thus the responsibility for freeing that memory.
+.code wstr-s
+are also provided; these are described in the following sections.
The
.code str
@@ -79131,17 +79126,19 @@ it deallocates that C string by invoking the C library function
.code free
on it.
-The type
-.code bstr-d
-behaves like
-.code str-d
-with regard to memory management; it differs from
-.code str-d
-in the same way that
-.code str
-differs from
-.codn bstr :
-it doesn't perform UTF-8 encoding or decoding.
+Type type
+.code str-
+is similar to
+.codn str-d ;
+it also has no in-operation, and doesn't deallocate the buffer
+allocated in the put operation.
+Under the get operation, the
+.code str-s
+type does not assume ownership of memory, and therefore does not
+free the pointer received from the foreign function. The
+.code str-s
+type is intended for receiving strings via a pointer-to-pointer
+argument, in situations when the string must not be freed.
Like other types, the string types combine with the
.code ptr
@@ -79151,7 +79148,7 @@ family has memory management semantics, as does the string family,
it is important to understand the memory management implications
of the combination of the two.
-The types
+The derived pointer types
.code "(ptr str-d)"
and
.code "(ptr str)"
@@ -79172,20 +79169,19 @@ pointer, and then pass that pointer to the C
.code free
function.
-To receive a string pointer by pointer from a foreign
-function, one of the types
-.code "(ptr-out str)"
-or
-.code "(ptr-out str-d)"
-should be used, which have different semantics. In either situation, FFI will
-prepare a pointer-sized uninitialized buffer, which the called function fills
-with a
-.code "char *"
-pointer. In the
-.code str
-case, FFI will duplicate that string to a Lisp string. In the
-.code str-d
-case, FFI will also free the string received from the foreign function.
+Receiving a string by pointer from a foreign function is achieved
+by treating the situation as a pointer to an array of one element.
+So that is to say, an argument like
+.code "char **pstr"
+can be treated as either
+.code "(ptr-out (array 1 str-d))"
+if the foreign function passes ownership of the string, or else
+.code "(ptr-out (array 1 str-s))"
+if the foreign function retains ownership of the string.
+In either case, the argument is a vector of one element, which
+will be updated to the returned string, or else
+.code nil
+if the function passes back a null pointer.
The type combination
.code "(ptr-in str-d)"
@@ -79197,18 +79193,74 @@ passes the string pointer in the same way, but the foreign module mustn't
use the pointer after returning. FFI will free the pointer that had been
passed.
-.coNP FFI types @ wstr and @ wstr-d
+.coNP FFI types @, bstr @ bstr-d @ and @ bstr-s
+The
+.code bstr
+family corresponds to null-terminated
+.code "char *"
+C strings, like the
+.code str
+family, and the family members have memory management semantics
+similar to their
+.code str
+counterparts.
+
+The
+.code b
+prefix in the naming denotes "byte". It indicates that unlike the
+.code str
+family, the
+.code bstr
+family does not use UTF-8 encoding; only Lisp strings which contain strictly
+code points in the range U+0000 to U+00FF may convert to these types;
+out-of-range characters trigger an error exception.
+
+Likewise, in the reverse direction, no UTF-8 decoding is performed: every byte value
+turns into the corresponding character code. The byte 0 is interpreted as the
+string terminator.
+
+Note: the
+.code bstr
+type may be advantageous in situations when character handling is known
+to be confined to the ASCII range, since UTF-8 conversion is then
+unnecessary overhead. Because \*(TX strings use wide characters internally,
+converting to and from the
+.code bstr
+type still requires memory management overhead, just like in the case of the
+.code str
+type. The
+.code wstr
+type described in the next section avoids memory management and conversion
+overhead. Thus, even in situations in which characters are confined to the
+ASCII range, if wide functions are available in the foreign API, it may
+be more efficient to use them, particularly if the foreign component
+uses that representation internally.
+
+.coNP FFI types @, wstr @ wstr-d and @ wstr-s
The FFI type
.code wstr
corresponds to the C type
.code "wchar_t *"
pointing to the first character of a null terminated wide string.
It converts between Lisp strings and symbols, and C strings.
-The memory management is similar to the
+The family members of
+.code wstr
+have memory management semantics similar to their
.code str
-and
-.code str-d
-types, except that no UTF-8 conversion takes place.
+counterparts,
+
+Note: because wide characters do not require UTF-8 conversion, the
+.code wstr
+family is more efficient. A
+.code wstr
+string passes into foreign code directly: the Lisp object already contains
+a null-terminated wide character string, and so the pointer to that is
+given directly to the foreign code. Similarly, ownership transfer in
+either direction is a pointer passage with no memory management or conversion
+overheads.
+Whenever some foreign API offers a choice between UTF-8 strings, and wide
+strings, the wide version should be targeted by FFI, particularly if the
+API is known to works with wide strings internally also.
.coNP FFI types @ buf and @ buf-d
The