awk macro: proper fs semantics in paragraph mode.

* share/txr/stdlib/awk.tl (sys:awk-state): New slots: par-mode, par-mode-fs, par-mode-prev-fs. (sys:awk-state rec-to-f): In paragraph mode, detect that fs has changed since the last call. In that case, take the user's fs and add to it a newline match. If it is a regex, take the source, add the syntax and recompile the regex. If it's a string, build regex around it and compile. (sys:awk-state loop): Maintain the par-mode-t variable in the state structure as the rs value triggers transitions into or out of paragraph mode. * txr.1: Updated documentation for rs.
author: Kaz Kylheku <kaz@kylheku.com> 2016-09-25 10:40:51 -0700
committer: Kaz Kylheku <kaz@kylheku.com> 2016-09-25 10:40:51 -0700
commit: b0bbe6e9dfd169f78b4908296d6edba52ed9a707 (patch)
tree: 9fe148cacf3e5252928b098fe33c41af6e9cf614
parent: 7656e99c9e1ffb509a6310cadca26c4c1c7008c9 (diff)
download: txr-b0bbe6e9dfd169f78b4908296d6edba52ed9a707.tar.gz
txr-b0bbe6e9dfd169f78b4908296d6edba52ed9a707.tar.bz2
txr-b0bbe6e9dfd169f78b4908296d6edba52ed9a707.zip
2 files changed, 39 insertions, 30 deletions
diff --git a/share/txr/stdlib/awk.tl b/share/txr/stdlib/awk.tl
index 3efd8cc6..e99f62d2 100644
--- a/share/txr/stdlib/awk.tl
+++ b/share/txr/stdlib/awk.tl
@@ -37,6 +37,7 @@
   (rec-num 0)
   rec orig-rec fields nf
   rng-vec (rng-n 0)
+  par-mode par-mode-fs par-mode-prev-fs
   (:postinit (self)
     (if (plusp self.rng-n)
       (set self.rng-vec (vector self.rng-n)))
@@ -61,8 +62,18 @@
       (if (and (not self.kfs) (equal self.rec ""))
         (set self.fields nil
              self.nf 0)
-        (set self.fields (split-str self.rec self.fs self.kfs)
-             self.nf (length self.fields))))
+        (let ((eff-fs (if self.par-mode
+                        (if (equal self.fs self.par-mode-prev-fs)
+                          self.par-mode-fs
+                          (set self.par-mode-prev-fs self.fs
+                               self.par-mode-fs
+                               (regex-compile ^(or ,(if (regexp self.fs)
+                                                      (regex-source self.fs)
+                                                      self.fs)
+                                                   "\n"))))
+                        self.fs)))
+          (set self.fields (split-str self.rec eff-fs self.kfs)
+               self.nf (length self.fields)))))
     (self.ft
       (set self.fields (tok-str self.rec self.ft self.kfs)
            self.nf (length self.fields)))
@@ -95,20 +106,23 @@
                      (set cached-rr
                           (cond
                             ((and (equal aws.rs "\n") (not aws.krs))
+                               (set aws.par-mode nil)
                                (lambda () (get-line stin)))
                             ((null aws.rs)
-                             (let ((rin (record-adapter #/\n[ \n\t]*\n/))
-                                   (flag t))
-                               (lambda ()
-                                 (let ((r (get-line rin)))
-                                   (cond
-                                     (flag
-                                       (set flag nil)
-                                       (if (equal r "")
-                                         (get-line rin)
-                                         r))
-                                     (t r))))))
+                               (set aws.par-mode t)
+                               (let ((rin (record-adapter #/\n[ \n\t]*\n/))
+                                     (flag t))
+                                 (lambda ()
+                                   (let ((r (get-line rin)))
+                                     (cond
+                                       (flag
+                                         (set flag nil)
+                                         (if (equal r "")
+                                           (get-line rin)
+                                           r))
+                                       (t r))))))
                             (t
+                              (set aws.par-mode nil)
                               (let ((rin (record-adapter (if (regexp aws.rs)
                                                            aws.rs
                                                            (regex-compile aws.rs))
diff --git a/txr.1 b/txr.1
index 508b8c72..bea89447 100644
--- a/txr.1
+++ b/txr.1
@@ -38597,27 +38597,22 @@ or more blank lines (empty lines or lines containing only a mixture of
 tabs and spaces). This means that, effectively, the record-separating
 sequences match the regular expression
 .codn "/\en[ \en\et]*\en/" .
-There is a difference between paragraph mode and simply using the above
+
+There are two differences between paragraph mode and simply using the above
 regular expression as
 .codn rs .
-The difference is that if the first record which is read upon entering
+The first difference is that if the first record which is read upon entering
 paragraph mode is empty (because the input begins with a match for the
-separator regex), then that record is thrown away, and the next record
-is read.
-
-Note that the POSIX Awk paragraph mode (which occurs when
-.code RS
-is blank) there is an additional difference: regardless of the value
-of the field separator
-.codn FS ,
-newline characters separate fields. This behavior is not implemented
-in the
-.code awk
-macro. Since newlines are included as separators in under the default field
-separation, the behaviors match in that case. Code using a custom
+separator regex), then that record is thrown away, and the next record is read.
+The second difference is that, if field separation based on the
+.code fs
+variable is in effect, then regardless of the value of
+.codn fs ,
+newline characters separate fields. Therefore, the programmer-defined
 .code fs
-must explicitly include a match for newline to obtain that as a field
-separator.
+doesn't have to include a match for newline. Moreover, if it is a simple
+fixed string, it need not be converted to a regular expression which also
+matches a newline.
 
 .coNP Variable @ krs
 .desc
author	Kaz Kylheku <kaz@kylheku.com>	2016-09-25 10:40:51 -0700
committer	Kaz Kylheku <kaz@kylheku.com>	2016-09-25 10:40:51 -0700
commit	b0bbe6e9dfd169f78b4908296d6edba52ed9a707 (patch)
tree	9fe148cacf3e5252928b098fe33c41af6e9cf614
parent	7656e99c9e1ffb509a6310cadca26c4c1c7008c9 (diff)
download	txr-b0bbe6e9dfd169f78b4908296d6edba52ed9a707.tar.gz txr-b0bbe6e9dfd169f78b4908296d6edba52ed9a707.tar.bz2 txr-b0bbe6e9dfd169f78b4908296d6edba52ed9a707.zip