summaryrefslogtreecommitdiffstats
path: root/txrtags.tl
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2022-05-27 23:59:32 -0700
committerKaz Kylheku <kaz@kylheku.com>2022-05-27 23:59:32 -0700
commiteaec6a766396d40f017c6a3a00bb5b70d90babdc (patch)
treed0c352968114d794a40cd1a47e551da9824eb205 /txrtags.tl
parent6b3f6b976f10e675728830d2b91f773649cf2466 (diff)
downloadtxr-eaec6a766396d40f017c6a3a00bb5b70d90babdc.tar.gz
txr-eaec6a766396d40f017c6a3a00bb5b70d90babdc.tar.bz2
txr-eaec6a766396d40f017c6a3a00bb5b70d90babdc.zip
tags: rename to avoid name clash.
* tags.tl: File renamed to txrtags.tl. * libtags.txr: load "txrtags" rather than "tags". And here is where we can explain the problem being fixed: (load "tags") loads the actual tags file, if it already exists, and not tags.tl.
Diffstat (limited to 'txrtags.tl')
-rwxr-xr-xtxrtags.tl421
1 files changed, 421 insertions, 0 deletions
diff --git a/txrtags.tl b/txrtags.tl
new file mode 100755
index 00000000..042aa993
--- /dev/null
+++ b/txrtags.tl
@@ -0,0 +1,421 @@
+#!/usr/bin/env txr
+
+(defvar *tags-lib*)
+
+;; The etags format is described here:
+;; https://git.savannah.gnu.org/cgit/emacs.git/tree/etc/ETAGS.EBNF.
+;;
+;; Unmentioned in the document is that the line number is 1-based and
+;; the byte offset 0-based.
+(defparml etag-sec-start #\x0c)
+(defparml etag-pat-end #\x7f)
+(defparml etag-name-end #\x01)
+(defparml etag-nonname-chars " \f\t\n\r()=,;'")
+
+(define-option-struct tags-opts nil
+ (nil help :bool "List this help text and exit.")
+ (o output :text "Act on the tags file named TEXT.")
+ (a append :bool "Append to existing tags file, without sorting.")
+ (m merge :bool "Merge with existing tags file, sorting combined content.")
+ (nil exclude (cumul :text) "Skip paths matching glob pattern given \ \
+ in TEXT. Multiple patterns can be specified.")
+ (e emacs :bool "Write the tags file in Emacs's etags format.")
+ (q qual :bool "Also generate struct:slot tags for each slot."))
+
+(defun escape (str)
+ (mappend (do caseql @1
+ ((#\^ #\$ #\/ #\\) (list #\\ @1))
+ (t (list @1)))
+ str))
+
+(defstruct tag ()
+ ident
+ path
+ linum
+ byte
+ line
+ (type "?")
+
+ (:postinit (me)
+ (upd me.ident tostringp))
+
+ (:method text (me)
+ `@{me.ident}\t@{me.path}\t/^@(escape me.line)$/;"\t@{me.type}`)
+
+ (:method etext (me)
+ `@{me.line}@{etag-pat-end} \
+ @{me.ident}@{etag-name-end} \
+ @{me.linum},@{me.byte}`))
+
+(defstruct file-tag tag
+ (type "F")
+ (:postinit (me)
+ (set me.ident (base-name me.path)))
+ (:method text (me)
+ `@{me.ident}\t@{me.path}\t;"\t@{me.type}`))
+
+(defstruct fun-tag tag
+ (type "f"))
+
+(defstruct var-tag tag
+ (type "v"))
+
+(defstruct struct-tag tag
+ (type "s"))
+
+(defstruct type-tag tag
+ (type "t"))
+
+(defstruct slot-tag tag
+ (type "m")
+ parent
+ expattern
+ (:method text (me)
+ `@{me.ident}\t@{me.path}\t/^@(escape me.line)$/@(if me.expattern `\x3b/@(escape me.ident)/`)\x3b"\t@{me.type}\tstruct:@{me.parent}`)
+ (:method make-qual-tag (me)
+ (if me.parent
+ (let ((qt (copy me)))
+ (set qt.ident `@{me.parent}:@{me.ident}`)
+ qt))))
+
+(defstruct orig-tag tag
+ ;; We reuse the line slot as the already-escaped ctag pattern.
+ orig-fields
+ (:method text (me)
+ `@{me.ident}\t@{me.path}\t@{me.line} \
+ @(if me.orig-fields `\t@(cat-str me.orig-fields #\tab)`)`))
+
+(defvarl err-ret (gensym))
+
+(defvar *fake-load-path*)
+
+(defun get-pos-line (lines form)
+ (tree-case (source-loc form)
+ ((line . file)
+ ;; The file-get-string function keeps carriage returns, so the byte
+ ;; offset is correct even with \r\n line separators.
+ (let ((byte (+ line ; Count the newlines.
+ -1 ; Adjust the byte offset to be 0-based.
+ [sum (take line lines) coded-length])))
+ (cons (cons line byte) [lines line])))))
+
+(defmacro in-anon-package (. body)
+ (with-gensyms (pkg)
+ ^(let* ((*package-alist* *package-alist*)
+ (,pkg (sys:make-anon-package t))
+ (*package* ,pkg))
+ (set-package-fallback-list *package* '(:usr))
+ ,*body)))
+
+(defmacro with-tag-shorthand-macro ((name-sym path-var lines-var obj-var)
+ . body)
+ ^(macrolet ((,name-sym (type ident : parent pattern-obj)
+ (with-gensyms (linum byte line)
+ ^(tree-case ,(if pattern-obj
+ ^(get-pos-line ,',lines-var ,pattern-obj)
+ ^(get-pos-line ,',lines-var ,',obj-var))
+ (((,linum . ,byte) . ,line)
+ (new ,type ident ,ident
+ path ,',path-var
+ linum ,linum
+ byte ,byte
+ line ,line
+ ,*(if parent ^(parent ,parent))
+ ,*(if pattern-obj ^(expattern t))))))))
+ ,*body))
+
+(defun process-package-influencing-form (form)
+ (caseq (car form)
+ (load (fake-load (cadr form)))
+ (load-for (each ((clause (cdr form)))
+ (tree-bind (kind sym arg) clause
+ (when (and (eq kind 'pkg)
+ (not (find-package sym)))
+ (fake-load (caddr clause))))))
+ (defpackage (make-package (symbol-name (cadr form))))))
+
+(defun fake-load (path)
+ (unless (abs-path-p path)
+ (set path (path-cat (dir-name *fake-load-path*) path))
+ (let ((*fake-load-path* path)
+ (stream (if (ends-with ".tl" path)
+ (open-file path)
+ (or (ignerr (open-file `@path.tl`))
+ (open-file path)))))
+ (whilet ((obj (read stream *stderr* err-ret path))
+ ((neq obj err-ret)))
+ (when (consp obj)
+ (process-package-influencing-form obj))))))
+
+(defun process-form (path lines obj)
+ (build
+ (with-tag-shorthand-macro (ntag path lines obj)
+ (when (consp obj)
+ (process-package-influencing-form obj)
+ (caseq (car obj)
+ ((progn eval-only compile-only with-dyn-lib macro-time)
+ (pend [mappend (op process-form path lines) (cdr obj)]))
+ ((defun defmacro define-place-macro defmatch deffi deffi-cb)
+ (add (ntag fun-tag (cadr obj))))
+ ((defvar defvarl defparm defparml defsymacro)
+ (add (ntag var-tag (cadr obj))))
+ ((defmeth)
+ (add (ntag slot-tag (caddr obj) (cadr obj))))
+ ((defplace)
+ (tree-bind (op (name . args) . body) obj
+ (add (ntag fun-tag name))))
+ ((typedef)
+ (add (ntag type-tag (cadr obj))))
+ ((defpackage)
+ (add (ntag struct-tag (cadr obj))))
+ ((define-option-struct)
+ (let ((struct-name (cadr obj)))
+ (add (ntag struct-tag struct-name))
+ (each ((obj (cdddr obj)))
+ (tree-bind (short long . rest) obj
+ (cond
+ (long (add (ntag slot-tag long struct-name)))
+ (short (add (ntag slot-tag short struct-name))))))))
+ ((defstruct deffi-struct)
+ (let ((struct-obj obj)
+ (struct-name (tree-case (cadr obj)
+ ((atom . rest) atom)
+ (atom atom))))
+ (add (ntag struct-tag struct-name))
+ (each ((obj (cdddr obj)))
+ (tree-case obj
+ ((word name . rest)
+ (caseq word
+ ((:method :function :static :instance)
+ (add (ntag slot-tag name struct-name)))
+ (t :)))
+ ((word (arg) . body)
+ (caseq word
+ ((:init :postinit :fini))
+ (t :)))
+ ((name . rest)
+ (add (ntag slot-tag name struct-name)))
+ (name
+ (add (ntag slot-tag name struct-name struct-obj))))))))))))
+
+(defun unexpand (form)
+ (whilet ((anc (macro-ancestor form)))
+ (set form anc))
+ form)
+
+(defun process-clause (path lines clause)
+ (when (consp clause)
+ (let ((elem (car clause)))
+ (build
+ (with-tag-shorthand-macro (ntag path lines elem)
+ (when (consp elem)
+ (caseq (car elem)
+ (define (let ((args (if (eq t (cadr elem))
+ (cadddr elem)
+ (cadr elem))))
+ (add (ntag fun-tag (car args)))))
+ (bind (let ((syms (flatcar (cadr elem))))
+ (each ((sym syms))
+ (add (ntag var-tag sym)))))
+ (do (let ((forms [mapcar unexpand (cdr elem)]))
+ (each ((form forms))
+ (pend (process-form path lines form))))))))))))
+
+(defun collect-tags-tl (path)
+ (let* ((text (file-get-string path))
+ (text (if (starts-with "#!" text) `;@text` text))
+ ;; Make line numbers and byte offsets 1-based.
+ (lines (cons "" (spl #\newline text)))
+ (stream (make-string-byte-input-stream text))
+ (*rec-source-loc* t)
+ (*fake-load-path* path))
+ (build
+ (add (new file-tag
+ path path))
+ (in-anon-package
+ (whilet ((obj (read stream *stderr* err-ret path))
+ ((neq obj err-ret)))
+ (pend (process-form path lines obj)))))))
+
+(defun collect-tags-txr (path)
+ (let* ((text (file-get-string path))
+ (text (if (starts-with "#!" text) `\@;@text` text))
+ ;; Make line numbers and byte offsets 1-based.
+ (lines (cons "" (spl #\newline text)))
+ (stream (make-string-byte-input-stream text))
+ (*rec-source-loc* t)
+ (syntax (in-anon-package (txr-parse stream *stderr* nil path))))
+ (build
+ (each ((clause syntax))
+ (pend (process-clause path lines clause))))))
+
+(defun collect-tags-guess (path)
+ (with-stream (s (open-file path))
+ (iflet ((line (get-line s)))
+ (if (and (starts-with "#!" line)
+ (search-str line "txr"))
+ (if (search-str line "--lisp")
+ (collect-tags-tl path)
+ (collect-tags-txr path))
+ (progn
+ (put-line `@path: unable to determine file type` *stderr*)
+ nil)))))
+
+ (defun parse-etag-path (stream)
+ (let ((line (get-line stream)))
+ (unless line
+ (throw 'syntax-error "trailing etag section starter"))
+ (match-case line
+ (`@{path #/[^,]+/},@{size #/\d+/}` path)
+ (@otherwise
+ (throwf 'syntax-error "bad etag path line: ~s" line)))))
+
+ (defun get-etag-name (line)
+ (let ((etag-pat-end etag-pat-end)
+ (etag-name-end etag-name-end))
+ (match-case line
+ (`@pat@{etag-pat-end}@ident@{etag-name-end}@rest` ident)
+ (`@pat@{etag-pat-end}@rest`
+ (labels ((nonname-char-p (ch) (in etag-nonname-chars ch)))
+ (when (nonname-char-p [pat -1])
+ (set pat [pat 0..-1]))
+ (let ((pos [rpos-if nonname-char-p pat]))
+ (when pos (inc pos))
+ [pat pos..:])))
+ (@otherwise
+ (throwf 'syntax-error "bad etag line: ~s" line)))))
+
+ ;; Does not support include sections.
+ ;; Does not support file properties.
+ (defun read-etagfile (path)
+ (with-stream (stream (open-file path))
+ (let ((line (get-line stream)))
+ (unless line (return nil))
+ (unless (equal line (tostringp etag-sec-start))
+ (throwf 'syntax-error "bad etag section starter: ~s" line)))
+ (let ((all-tags ()))
+ (whilet ((path (parse-etag-path stream))
+ (tags ())
+ (t))
+ (whilet ((line (get-line stream))
+ (next (equal line (tostringp etag-sec-start)))
+ (t))
+ (when (or (not line) next)
+ (push (cons path tags) all-tags)
+ (if next
+ (return)
+ (return-from read-etagfile all-tags)))
+ (push (new orig-tag
+ ident (get-etag-name line)
+ orig-line line)
+ tags))))))
+
+(defun read-tagfile (path)
+ (catch (let ((lines (file-get-lines path)))
+ (collect-each ((line lines))
+ (tree-bind (ident path pat . fields) (split-str line #\tab)
+ (new orig-tag
+ ident ident
+ path path
+ line pat
+ orig-fields fields))))
+ (path-not-found (_))))
+
+(defun write-tagfile (tags o)
+ (when o.merge
+ (whenlet ((orig-tags (read-tagfile o.output)))
+ (set tags (merge tags orig-tags : .ident))))
+ (with-stream (stream (open-file o.output (if o.append "a" "w")))
+ (each ((tag tags))
+ (put-line tag.(text) stream))))
+
+(defun write-etagfile (grouped-etags o)
+ (with-stream (stream (open-file o.output (if o.append "a" "w")))
+ (each ((pair grouped-etags))
+ (tree-bind (path . etags) pair
+ (let ((str (with-out-string-stream (s)
+ (each ((etag etags))
+ (put-line etag.(etext) s)))))
+ (put-string `@{etag-sec-start}\n@{path},@(len str)\n@{str}`
+ stream))))))
+
+(defvarl ftw-actionretval 0)
+(defvarl ftw-continue 0)
+(defvarl ftw-skip-subtree 0)
+
+(defmacro static-when (expr . body)
+ (when (eval expr) ^(progn ,*body)))
+
+(compile-only
+ (unless *tags-lib*
+ (let ((o (new tags-opts)))
+ o.(getopts *args*)
+ (when o.help
+ (put-line "\nUsage:\n")
+ (put-line ` @{*load-path*} [options] {file|dir}*\n`)
+ (put-line "Directory arguments are recursively searched for .tl and .txr files.")
+ (put-line "If no arguments are given, the current directory is searched.")
+ o.(opthelp)
+ (exit t))
+
+ (unless o.out-args
+ (push "." o.out-args))
+
+ (unless (plusp (len o.output))
+ (set o.output (if o.emacs "TAGS" "tags")))
+
+ (when (and o.merge (or o.append o.emacs))
+ ;; The --merge option (without --emacs) currently results in
+ ;; duplicate tags if a file is retagged (e.g., "txr tags.tl foo.tl
+ ;; && txr tags.tl --merge foo.tl").
+ ;; We could have --merge replace all existing tags of a retagged
+ ;; file with the latest ones, with and without --emacs, but for
+ ;; now don't bother (and therefore forbid combining --emacs with
+ ;; --merge).
+ (put-line `@{*load-path*}: @(if o.append `--append` `--emacs`)\ \
+ and --merge are mutually exclusive`)
+ (exit nil))
+
+ (let* ((excf [apply orf (mapcar (do op fnmatch @@1) o.exclude)])
+ (skips ())
+ (*read-unknown-structs* t)
+ (tags (build
+ (ftw o.out-args
+ (lambda (path type stat . rest)
+ (caseql* type
+ (ftw-f (when (and (not [excf path])
+ (not [excf (base-name path)])
+ (not (some skips (op starts-with @1 path))))
+ (cond
+ ((ends-with ".tl" path)
+ (pend (ignerr (collect-tags-tl path))))
+ ((ends-with ".txr" path)
+ (pend (ignerr (collect-tags-txr path))))
+ ((member path o.out-args)
+ (pend (ignerr (collect-tags-guess path))))
+ (t ftw-continue))))
+ (ftw-d (while (and skips (starts-with path (car skips)))
+ (pop skips))
+ (cond
+ ((or [excf path] [excf (base-name path)])
+ (static-when (zerop ftw-actionretval)
+ (push `@path/` skips))
+ ftw-skip-subtree)))
+ (t ftw-continue)))
+ (logior ftw-phys ftw-actionretval)))))
+ (if o.qual
+ (set tags (build
+ (pend tags)
+ (each ((tg tags))
+ (if (typep tg 'slot-tag)
+ (iflet ((qt tg.(make-qual-tag)))
+ (add qt)))))))
+ (if o.emacs
+ (write-etagfile (flow tags
+ (remove-if (op equal @1.type "F"))
+ (nsort @1 : .linum)
+ (group-by .path)
+ (hash-alist)
+ (nsort @1 : car))
+ o)
+ (write-tagfile (nsort tags : .ident) o))))))