diff options
author | Kaz Kylheku <kaz@kylheku.com> | 2022-03-21 06:16:43 -0700 |
---|---|---|
committer | Kaz Kylheku <kaz@kylheku.com> | 2022-03-21 06:16:43 -0700 |
commit | af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9 (patch) | |
tree | 14a7981e70ecc67f165e848dd4e3d0a7349d8671 | |
parent | 3ce67812ebbbb8c0a82d77a72f565cda40bdec73 (diff) | |
download | txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.gz txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.tar.bz2 txr-af8b7db50fcc3ed34bfe1136d1664d59e89ba1a9.zip |
New FFI types str-s, bstr-s and wstr-s.
These types actually make it possible to receive a string by
pointer from a C function, without trying to free it.
It is now possible to write a FFI wrapper for strtol or
wcstol, which is done in the new test case.
* ffi.c (str_s_s, bstr_s_s, wstr_s_s): New symbol variables.
(ffi_init_types): Register the types str-s, bstr-s and wstr-s.
(ffi_init): Intern the new symbols.
* tests/017/str-s.tl: New file.
* txr.1: Documented.
* stdlib/doc-syms.tl: Updated.
-rw-r--r-- | ffi.c | 25 | ||||
-rw-r--r-- | stdlib/doc-syms.tl | 15 | ||||
-rw-r--r-- | tests/017/str-s.tl | 11 | ||||
-rw-r--r-- | txr.1 | 146 |
4 files changed, 143 insertions, 54 deletions
@@ -135,7 +135,7 @@ val array_s, zarray_s, carray_s; val union_s; -val str_d_s, wstr_s, wstr_d_s, bstr_s, bstr_d_s; +val str_d_s, str_s_s, wstr_s, wstr_d_s, wstr_s_s, bstr_s, bstr_d_s, bstr_s_s; val buf_d_s; @@ -4614,6 +4614,26 @@ static void ffi_init_types(void) ffi_bstr_put, ffi_bstr_d_get, 0, 0)); + ffi_typedef(str_s_s, make_ffi_type_builtin(str_s_s, str_s, FFI_KIND_PTR, + sizeof (mem_t *), + alignof (mem_t *), + &ffi_type_pointer, + ffi_ptr_out_null_put, ffi_str_get, + 0, 0)); + + ffi_typedef(wstr_s_s, make_ffi_type_builtin(wstr_s_s, str_s, FFI_KIND_PTR, + sizeof (mem_t *), + alignof (mem_t *), + &ffi_type_pointer, + ffi_ptr_out_null_put, ffi_wstr_get, + 0, 0)); + + ffi_typedef(bstr_s_s, make_ffi_type_builtin(bstr_s_s, str_s, FFI_KIND_PTR, + sizeof (mem_t *), + alignof (mem_t *), + &ffi_type_pointer, + ffi_ptr_out_null_put, ffi_bstr_get, + 0, 0)); { val iter; @@ -6772,10 +6792,13 @@ void ffi_init(void) carray_s = intern(lit("carray"), user_package); union_s = intern(lit("union"), user_package); str_d_s = intern(lit("str-d"), user_package); + str_s_s = intern(lit("str-s"), user_package); wstr_s = intern(lit("wstr"), user_package); wstr_d_s = intern(lit("wstr-d"), user_package); + wstr_s_s = intern(lit("wstr-s"), user_package); bstr_s = intern(lit("bstr"), user_package); bstr_d_s = intern(lit("bstr-d"), user_package); + bstr_s_s = intern(lit("bstr-s"), user_package); buf_d_s = intern(lit("buf-d"), user_package); ptr_in_s = intern(lit("ptr-in"), user_package); ptr_out_s = intern(lit("ptr-out"), user_package); diff --git a/stdlib/doc-syms.tl b/stdlib/doc-syms.tl index 179ce5e1..5ce5c0d0 100644 --- a/stdlib/doc-syms.tl +++ b/stdlib/doc-syms.tl @@ -168,8 +168,9 @@ ("bs0" "N-03BD477F") ("bs1" "N-03BD477F") ("bsdly" "N-03BD477F") - ("bstr" "N-00C6B7C4") - ("bstr-d" "N-00C6B7C4") + ("bstr" "N-0225F1EF") + ("bstr-d" "N-0225F1EF") + ("bstr-s" "N-0225F1EF") ("buf" "D-005E") ("buf-alloc-size" "N-013A3727") ("buf-carray" "N-0022F54E") @@ -1852,13 +1853,14 @@ ("static-slot-p" "N-032FD510") ("static-slot-set" "N-0017D1B5") ("stdlib" "N-008E4BC2") - ("str" "N-00C6B7C4") + ("str" "N-01736060") ("str-buf" "N-012BF6AD") - ("str-d" "N-00C6B7C4") + ("str-d" "N-01736060") ("str-in6addr" "N-01FF658D") ("str-in6addr-net" "N-00918411") ("str-inaddr" "N-01FF658D") ("str-inaddr-net" "N-00918411") + ("str-s" "N-01736060") ("str-seq" "N-02F0880D") ("str<" "N-01AA954A") ("str<=" "N-01AA954A") @@ -2195,8 +2197,9 @@ ("with-update-expander" "N-006EA023") ("wrap" "N-026DDCEC") ("wrap*" "N-026DDCEC") - ("wstr" "N-032DB6DC") - ("wstr-d" "N-032DB6DC") + ("wstr" "N-033B8A6D") + ("wstr-d" "N-033B8A6D") + ("wstr-s" "N-033B8A6D") ("xcase" "N-0072FF5E") ("yield" "N-02AE5C1E") ("yield-from" "N-01556613") diff --git a/tests/017/str-s.tl b/tests/017/str-s.tl new file mode 100644 index 00000000..bb9dc38a --- /dev/null +++ b/tests/017/str-s.tl @@ -0,0 +1,11 @@ +(load "../common") + +(with-dyn-lib nil + (deffi strtol "strtol" long (str (ptr-out (array 1 str-s)) int)) + (deffi bcstol "strtol" long (bstr (ptr-out (array 1 bstr-s)) int)) + (deffi wcstol "wcstol" long (wstr (ptr-out (array 1 wstr-s)) int))) + +(mtest + (let ((v (vec nil))) (list (strtol "-345x" v 0) v)) (-345 #("x")) + (let ((v (vec nil))) (list (bcstol "-345x" v 0) v)) (-345 #("x")) + (let ((v (vec nil))) (list (wcstol "-345x" v 0) v)) (-345 #("x"))) @@ -79078,26 +79078,21 @@ object can be passed as the argument of .code fclose to close the stream. -.coNP FFI types @, str @, bstr @ str-d and @ bstr-d +.coNP FFI types @, str @ str-d @ and @ str-s These FFI types correspond to the C pointer type .codn "char *" , providing automatic conversion between Lisp strings and null-terminated -C strings. The -.code str -and -.code str-d -types use UTF-8 encoding. The -.code bstr +C strings. + +The related types +.codn bstr , +.codn bstr-d , +.codn bstr-s , +.codn wstr , +.code wstr-d and -.code bstr-d -types do not use UTF-8: only Lisp strings which contain strictly -code points in the range U+0000 to U+00FF may convert to these types; -out-of-range characters trigger an error exception. -The -.code -d -suffixed types differ from the unsuffixed variants -in that they denote the transfer of ownership of dynamically allocated memory, -and thus the responsibility for freeing that memory. +.code wstr-s +are also provided; these are described in the following sections. The .code str @@ -79131,17 +79126,19 @@ it deallocates that C string by invoking the C library function .code free on it. -The type -.code bstr-d -behaves like -.code str-d -with regard to memory management; it differs from -.code str-d -in the same way that -.code str -differs from -.codn bstr : -it doesn't perform UTF-8 encoding or decoding. +Type type +.code str- +is similar to +.codn str-d ; +it also has no in-operation, and doesn't deallocate the buffer +allocated in the put operation. +Under the get operation, the +.code str-s +type does not assume ownership of memory, and therefore does not +free the pointer received from the foreign function. The +.code str-s +type is intended for receiving strings via a pointer-to-pointer +argument, in situations when the string must not be freed. Like other types, the string types combine with the .code ptr @@ -79151,7 +79148,7 @@ family has memory management semantics, as does the string family, it is important to understand the memory management implications of the combination of the two. -The types +The derived pointer types .code "(ptr str-d)" and .code "(ptr str)" @@ -79172,20 +79169,19 @@ pointer, and then pass that pointer to the C .code free function. -To receive a string pointer by pointer from a foreign -function, one of the types -.code "(ptr-out str)" -or -.code "(ptr-out str-d)" -should be used, which have different semantics. In either situation, FFI will -prepare a pointer-sized uninitialized buffer, which the called function fills -with a -.code "char *" -pointer. In the -.code str -case, FFI will duplicate that string to a Lisp string. In the -.code str-d -case, FFI will also free the string received from the foreign function. +Receiving a string by pointer from a foreign function is achieved +by treating the situation as a pointer to an array of one element. +So that is to say, an argument like +.code "char **pstr" +can be treated as either +.code "(ptr-out (array 1 str-d))" +if the foreign function passes ownership of the string, or else +.code "(ptr-out (array 1 str-s))" +if the foreign function retains ownership of the string. +In either case, the argument is a vector of one element, which +will be updated to the returned string, or else +.code nil +if the function passes back a null pointer. The type combination .code "(ptr-in str-d)" @@ -79197,18 +79193,74 @@ passes the string pointer in the same way, but the foreign module mustn't use the pointer after returning. FFI will free the pointer that had been passed. -.coNP FFI types @ wstr and @ wstr-d +.coNP FFI types @, bstr @ bstr-d @ and @ bstr-s +The +.code bstr +family corresponds to null-terminated +.code "char *" +C strings, like the +.code str +family, and the family members have memory management semantics +similar to their +.code str +counterparts. + +The +.code b +prefix in the naming denotes "byte". It indicates that unlike the +.code str +family, the +.code bstr +family does not use UTF-8 encoding; only Lisp strings which contain strictly +code points in the range U+0000 to U+00FF may convert to these types; +out-of-range characters trigger an error exception. + +Likewise, in the reverse direction, no UTF-8 decoding is performed: every byte value +turns into the corresponding character code. The byte 0 is interpreted as the +string terminator. + +Note: the +.code bstr +type may be advantageous in situations when character handling is known +to be confined to the ASCII range, since UTF-8 conversion is then +unnecessary overhead. Because \*(TX strings use wide characters internally, +converting to and from the +.code bstr +type still requires memory management overhead, just like in the case of the +.code str +type. The +.code wstr +type described in the next section avoids memory management and conversion +overhead. Thus, even in situations in which characters are confined to the +ASCII range, if wide functions are available in the foreign API, it may +be more efficient to use them, particularly if the foreign component +uses that representation internally. + +.coNP FFI types @, wstr @ wstr-d and @ wstr-s The FFI type .code wstr corresponds to the C type .code "wchar_t *" pointing to the first character of a null terminated wide string. It converts between Lisp strings and symbols, and C strings. -The memory management is similar to the +The family members of +.code wstr +have memory management semantics similar to their .code str -and -.code str-d -types, except that no UTF-8 conversion takes place. +counterparts, + +Note: because wide characters do not require UTF-8 conversion, the +.code wstr +family is more efficient. A +.code wstr +string passes into foreign code directly: the Lisp object already contains +a null-terminated wide character string, and so the pointer to that is +given directly to the foreign code. Similarly, ownership transfer in +either direction is a pointer passage with no memory management or conversion +overheads. +Whenever some foreign API offers a choice between UTF-8 strings, and wide +strings, the wide version should be targeted by FFI, particularly if the +API is known to works with wide strings internally also. .coNP FFI types @ buf and @ buf-d The |