New FFI types str-s, bstr-s and wstr-s.

These types actually make it possible to receive a string by pointer from a C function, without trying to free it. It is now possible to write a FFI wrapper for strtol or wcstol, which is done in the new test case. * ffi.c (str_s_s, bstr_s_s, wstr_s_s): New symbol variables. (ffi_init_types): Register the types str-s, bstr-s and wstr-s. (ffi_init): Intern the new symbols. * tests/017/str-s.tl: New file. * txr.1: Documented. * stdlib/doc-syms.tl: Updated.
author: Kaz Kylheku <kaz@kylheku.com> 2022-03-21 06:16:43 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2022-03-21 06:16:43 -0700
commit: af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9 (patch)
tree: 14a7981e70ecc67f165e848dd4e3d0a7349d8671
parent: 3ce67812ebbbb8c0a82d77a72f565cda40bdec73 (diff)
download: txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.gz
txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.bz2
txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.zip
4 files changed, 143 insertions, 54 deletions
diff --git a/ffi.c b/ffi.c
index 268a7a46..2bb52d29 100644
--- a/ffi.c
+++ b/ffi.c
@@ -135,7 +135,7 @@ val array_s, zarray_s, carray_s;
 
 val union_s;
 
-val str_d_s, wstr_s, wstr_d_s, bstr_s, bstr_d_s;
+val str_d_s, str_s_s, wstr_s, wstr_d_s, wstr_s_s, bstr_s, bstr_d_s, bstr_s_s;
 
 val buf_d_s;
 
@@ -4614,6 +4614,26 @@ static void ffi_init_types(void)
                                               ffi_bstr_put, ffi_bstr_d_get,
                                               0, 0));
 
+  ffi_typedef(str_s_s, make_ffi_type_builtin(str_s_s, str_s, FFI_KIND_PTR,
+                                             sizeof (mem_t *),
+                                             alignof (mem_t *),
+                                             &ffi_type_pointer,
+                                             ffi_ptr_out_null_put, ffi_str_get,
+                                             0, 0));
+
+  ffi_typedef(wstr_s_s, make_ffi_type_builtin(wstr_s_s, str_s, FFI_KIND_PTR,
+                                              sizeof (mem_t *),
+                                              alignof (mem_t *),
+                                              &ffi_type_pointer,
+                                              ffi_ptr_out_null_put, ffi_wstr_get,
+                                              0, 0));
+
+  ffi_typedef(bstr_s_s, make_ffi_type_builtin(bstr_s_s, str_s, FFI_KIND_PTR,
+                                              sizeof (mem_t *),
+                                              alignof (mem_t *),
+                                              &ffi_type_pointer,
+                                              ffi_ptr_out_null_put, ffi_bstr_get,
+                                              0, 0));
   {
     val iter;
 
@@ -6772,10 +6792,13 @@ void ffi_init(void)
   carray_s = intern(lit("carray"), user_package);
   union_s = intern(lit("union"), user_package);
   str_d_s = intern(lit("str-d"), user_package);
+  str_s_s = intern(lit("str-s"), user_package);
   wstr_s = intern(lit("wstr"), user_package);
   wstr_d_s = intern(lit("wstr-d"), user_package);
+  wstr_s_s = intern(lit("wstr-s"), user_package);
   bstr_s = intern(lit("bstr"), user_package);
   bstr_d_s = intern(lit("bstr-d"), user_package);
+  bstr_s_s = intern(lit("bstr-s"), user_package);
   buf_d_s = intern(lit("buf-d"), user_package);
   ptr_in_s = intern(lit("ptr-in"), user_package);
   ptr_out_s = intern(lit("ptr-out"), user_package);
diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl
index 179ce5e1..5ce5c0d0 100644
--- a/stdlib/doc-syms.tl
+++ b/stdlib/doc-syms.tl
@@ -168,8 +168,9 @@
       ("bs0" "N-03BD477F")
       ("bs1" "N-03BD477F")
       ("bsdly" "N-03BD477F")
-      ("bstr" "N-00C6B7C4")
-      ("bstr-d" "N-00C6B7C4")
+      ("bstr" "N-0225F1EF")
+      ("bstr-d" "N-0225F1EF")
+      ("bstr-s" "N-0225F1EF")
       ("buf" "D-005E")
       ("buf-alloc-size" "N-013A3727")
       ("buf-carray" "N-0022F54E")
@@ -1852,13 +1853,14 @@
       ("static-slot-p" "N-032FD510")
       ("static-slot-set" "N-0017D1B5")
       ("stdlib" "N-008E4BC2")
-      ("str" "N-00C6B7C4")
+      ("str" "N-01736060")
       ("str-buf" "N-012BF6AD")
-      ("str-d" "N-00C6B7C4")
+      ("str-d" "N-01736060")
       ("str-in6addr" "N-01FF658D")
       ("str-in6addr-net" "N-00918411")
       ("str-inaddr" "N-01FF658D")
       ("str-inaddr-net" "N-00918411")
+      ("str-s" "N-01736060")
       ("str-seq" "N-02F0880D")
       ("str<" "N-01AA954A")
       ("str<=" "N-01AA954A")
@@ -2195,8 +2197,9 @@
       ("with-update-expander" "N-006EA023")
       ("wrap" "N-026DDCEC")
       ("wrap*" "N-026DDCEC")
-      ("wstr" "N-032DB6DC")
-      ("wstr-d" "N-032DB6DC")
+      ("wstr" "N-033B8A6D")
+      ("wstr-d" "N-033B8A6D")
+      ("wstr-s" "N-033B8A6D")
       ("xcase" "N-0072FF5E")
       ("yield" "N-02AE5C1E")
       ("yield-from" "N-01556613")
diff --git a/tests/017/str-s.tl b/tests/017/str-s.tl
new file mode 100644
index 00000000..bb9dc38a
--- /dev/null
+++ b/tests/017/str-s.tl
@@ -0,0 +1,11 @@
+(load "../common")
+
+(with-dyn-lib nil
+  (deffi strtol "strtol" long (str (ptr-out (array 1 str-s)) int))
+  (deffi bcstol "strtol" long (bstr (ptr-out (array 1 bstr-s)) int))
+  (deffi wcstol "wcstol" long (wstr (ptr-out (array 1 wstr-s)) int)))
+
+(mtest
+  (let ((v (vec nil))) (list (strtol "-345x" v 0) v)) (-345 #("x"))
+  (let ((v (vec nil))) (list (bcstol "-345x" v 0) v)) (-345 #("x"))
+  (let ((v (vec nil))) (list (wcstol "-345x" v 0) v)) (-345 #("x")))
diff --git a/txr.1 b/txr.1
index e6a2b381..7ffc3d13 100644
--- a/txr.1
+++ b/txr.1
@@ -79078,26 +79078,21 @@ object can be passed as the argument of
 .code fclose
 to close the stream.
 
-.coNP FFI types @, str @, bstr @ str-d and @ bstr-d
+.coNP FFI types @, str @ str-d @ and @ str-s
 These FFI types correspond to the C pointer type
 .codn "char *" ,
 providing automatic conversion between Lisp strings and null-terminated
-C strings. The
-.code str
-and
-.code str-d
-types use UTF-8 encoding. The
-.code bstr
+C strings.
+
+The related types
+.codn bstr ,
+.codn bstr-d ,
+.codn bstr-s ,
+.codn wstr ,
+.code wstr-d
 and
-.code bstr-d
-types do not use UTF-8: only Lisp strings which contain strictly
-code points in the range U+0000 to U+00FF may convert to these types;
-out-of-range characters trigger an error exception.
-The
-.code -d
-suffixed types differ from the unsuffixed variants
-in that they denote the transfer of ownership of dynamically allocated memory,
-and thus the responsibility for freeing that memory.
+.code wstr-s
+are also provided; these are described in the following sections.
 
 The
 .code str
@@ -79131,17 +79126,19 @@ it deallocates that C string by invoking the C library function
 .code free
 on it.
 
-The type
-.code bstr-d
-behaves like
-.code str-d
-with regard to memory management; it differs from
-.code str-d
-in the same way that
-.code str
-differs from
-.codn bstr :
-it doesn't perform UTF-8 encoding or decoding.
+Type type
+.code str-
+is similar to
+.codn str-d ;
+it also has no in-operation, and doesn't deallocate the buffer
+allocated in the put operation.
+Under the get operation, the
+.code str-s
+type does not assume ownership of memory, and therefore does not
+free the pointer received from the foreign function. The
+.code str-s
+type is intended for receiving strings via a pointer-to-pointer
+argument, in situations when the string must not be freed.
 
 Like other types, the string types combine with the
 .code ptr
@@ -79151,7 +79148,7 @@ family has memory management semantics, as does the string family,
 it is important to understand the memory management implications
 of the combination of the two.
 
-The types
+The derived pointer types
 .code "(ptr str-d)"
 and
 .code "(ptr str)"
@@ -79172,20 +79169,19 @@ pointer, and then pass that pointer to the C
 .code free
 function.
 
-To receive a string pointer by pointer from a foreign
-function, one of the types
-.code "(ptr-out str)"
-or
-.code "(ptr-out str-d)"
-should be used, which have different semantics. In either situation, FFI will
-prepare a pointer-sized uninitialized buffer, which the called function fills
-with a
-.code "char *"
-pointer. In the
-.code str
-case, FFI will duplicate that string to a Lisp string. In the
-.code str-d
-case, FFI will also free the string received from the foreign function.
+Receiving a string by pointer from a foreign function is achieved
+by treating the situation as a pointer to an array of one element.
+So that is to say, an argument like
+.code "char **pstr"
+can be treated as either
+.code "(ptr-out (array 1 str-d))"
+if the foreign function passes ownership of the string, or else
+.code "(ptr-out (array 1 str-s))"
+if the foreign function retains ownership of the string.
+In either case, the argument is a vector of one element, which
+will be updated to the returned string, or else
+.code nil
+if the function passes back a null pointer.
 
 The type combination
 .code "(ptr-in str-d)"
@@ -79197,18 +79193,74 @@ passes the string pointer in the same way, but the foreign module mustn't
 use the pointer after returning. FFI will free the pointer that had been
 passed.
 
-.coNP FFI types @ wstr and @ wstr-d
+.coNP FFI types @, bstr @ bstr-d @ and @ bstr-s
+The
+.code bstr
+family corresponds to null-terminated
+.code "char *"
+C strings, like the
+.code str
+family, and the family members have memory management semantics
+similar to their
+.code str
+counterparts.
+
+The
+.code b
+prefix in the naming denotes "byte". It indicates that unlike the
+.code str
+family, the
+.code bstr
+family does not use UTF-8 encoding; only Lisp strings which contain strictly
+code points in the range U+0000 to U+00FF may convert to these types;
+out-of-range characters trigger an error exception.
+
+Likewise, in the reverse direction, no UTF-8 decoding is performed: every byte value
+turns into the corresponding character code. The byte 0 is interpreted as the
+string terminator.
+
+Note: the
+.code bstr
+type may be advantageous in situations when character handling is known
+to be confined to the ASCII range, since UTF-8 conversion is then
+unnecessary overhead. Because \*(TX strings use wide characters internally,
+converting to and from the
+.code bstr
+type still requires memory management overhead, just like in the case of the
+.code str
+type. The
+.code wstr
+type described in the next section avoids memory management and conversion
+overhead. Thus, even in situations in which characters are confined to the
+ASCII range, if wide functions are available in the foreign API, it may
+be more efficient to use them, particularly if the foreign component
+uses that representation internally.
+
+.coNP FFI types @, wstr @ wstr-d and @ wstr-s
 The FFI type
 .code wstr
 corresponds to the C type
 .code "wchar_t *"
 pointing to the first character of a null terminated wide string.
 It converts between Lisp strings and symbols, and C strings.
-The memory management is similar to the
+The family members of
+.code wstr
+have memory management semantics similar to their
 .code str
-and
-.code str-d
-types, except that no UTF-8 conversion takes place.
+counterparts,
+
+Note: because wide characters do not require UTF-8 conversion, the
+.code wstr
+family is more efficient. A
+.code wstr
+string passes into foreign code directly: the Lisp object already contains
+a null-terminated wide character string, and so the pointer to that is
+given directly to the foreign code. Similarly, ownership transfer in
+either direction is a pointer passage with no memory management or conversion
+overheads.
+Whenever some foreign API offers a choice between UTF-8 strings, and wide
+strings, the wide version should be targeted by FFI, particularly if the
+API is known to works with wide strings internally also.
 
 .coNP FFI types @ buf and @ buf-d
 The
author	Kaz Kylheku <kaz@kylheku.com>	2022-03-21 06:16:43 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2022-03-21 06:16:43 -0700
commit	af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9 (patch)
tree	14a7981e70ecc67f165e848dd4e3d0a7349d8671
parent	3ce67812ebbbb8c0a82d77a72f565cda40bdec73 (diff)
download	txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.gz txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.bz2 txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.zip