#!/usr/bin/env txr (defvar *tags-lib*) ;; The etags format is described here: ;; https://git.savannah.gnu.org/cgit/emacs.git/tree/etc/ETAGS.EBNF. ;; ;; Unmentioned in the document is that the line number is 1-based and ;; the byte offset 0-based. (defparml etag-sec-start #\x0c) (defparml etag-pat-end #\x7f) (defparml etag-name-end #\x01) (defparml etag-nonname-chars " \f\t\n\r()=,;'") (define-option-struct tags-opts nil (nil help :bool "List this help text and exit.") (o output :text "Act on the tags file named TEXT.") (a append :bool "Append to existing tags file, without sorting.") (m merge :bool "Merge with existing tags file, sorting combined content.") (nil exclude (cumul :text) "Skip paths matching glob pattern given \ \ in TEXT. Multiple patterns can be specified.") (e emacs :bool "Write the tags file in Emacs's etags format.") (q qual :bool "Also generate struct:slot tags for each slot.")) (defun escape (str) (mappend (do caseql @1 ((#\^ #\$ #\/ #\\) (list #\\ @1)) (t (list @1))) str)) (defstruct tag () ident path linum byte line (type "?") (:postinit (me) (upd me.ident tostringp)) (:method text (me) `@{me.ident}\t@{me.path}\t/^@(escape me.line)$/;"\t@{me.type}`) (:method etext (me) `@{me.line}@{etag-pat-end} \ @{me.ident}@{etag-name-end} \ @{me.linum},@{me.byte}`)) (defstruct file-tag tag (type "F") (:postinit (me) (set me.ident (base-name me.path))) (:method text (me) `@{me.ident}\t@{me.path}\t;"\t@{me.type}`)) (defstruct fun-tag tag (type "f")) (defstruct var-tag tag (type "v")) (defstruct struct-tag tag (type "s")) (defstruct type-tag tag (type "t")) (defstruct slot-tag tag (type "m") parent expattern (:method text (me) `@{me.ident}\t@{me.path}\t/^@(escape me.line)$/@(if me.expattern `\x3b/@(escape me.ident)/`)\x3b"\t@{me.type}\tstruct:@{me.parent}`) (:method make-qual-tag (me) (if me.parent (let ((qt (copy me))) (set qt.ident `@{me.parent}:@{me.ident}`) qt)))) (defstruct orig-tag tag ;; We reuse the line slot as the already-escaped ctag pattern. orig-fields (:method text (me) `@{me.ident}\t@{me.path}\t@{me.line} \ @(if me.orig-fields `\t@(cat-str me.orig-fields #\tab)`)`)) (defvarl err-ret (gensym)) (defvar *fake-load-path*) (defun get-pos-line (lines form) (tree-case (source-loc form) ((line . file) ;; The file-get-string function keeps carriage returns, so the byte ;; offset is correct even with \r\n line separators. (let ((byte (+ line ; Count the newlines. -1 ; Adjust the byte offset to be 0-based. [sum (take line lines) coded-length]))) (cons (cons line byte) [lines line]))))) (defmacro in-anon-package (. body) (with-gensyms (pkg) ^(let* ((*package-alist* *package-alist*) (,pkg (sys:make-anon-package t)) (*package* ,pkg)) (set-package-fallback-list *package* '(:usr)) ,*body))) (defmacro with-tag-shorthand-macro ((name-sym path-var lines-var obj-var) . body) ^(macrolet ((,name-sym (type ident : parent pattern-obj) (with-gensyms (linum byte line) ^(tree-case ,(if pattern-obj ^(get-pos-line ,',lines-var ,pattern-obj) ^(get-pos-line ,',lines-var ,',obj-var)) (((,linum . ,byte) . ,line) (new ,type ident ,ident path ,',path-var linum ,linum byte ,byte line ,line ,*(if parent ^(parent ,parent)) ,*(if pattern-obj ^(expattern t)))))))) ,*body)) (defun process-package-influencing-form (form) (caseq (car form) (load (fake-load (cadr form))) (load-for (each ((clause (cdr form))) (tree-bind (kind sym arg) clause (when (and (eq kind 'pkg) (not (find-package sym))) (fake-load (caddr clause)))))) (defpackage (make-package (symbol-name (cadr form)))))) (defun fake-load (path) (unless (abs-path-p path) (set path (path-cat (dir-name *fake-load-path*) path)) (let ((*fake-load-path* path) (stream (if (ends-with ".tl" path) (open-file path) (or (ignerr (open-file `@path.tl`)) (open-file path))))) (whilet ((obj (read stream *stderr* err-ret path)) ((neq obj err-ret))) (when (consp obj) (process-package-influencing-form obj)))))) (defun process-form (path lines obj) (build (with-tag-shorthand-macro (ntag path lines obj) (when (consp obj) (process-package-influencing-form obj) (caseq (car obj) ((progn eval-only compile-only with-dyn-lib macro-time) (pend [mappend (op process-form path lines) (cdr obj)])) ((defun defmacro define-place-macro defmatch deffi deffi-cb) (add (ntag fun-tag (cadr obj)))) ((defvar defvarl defparm defparml defsymacro) (add (ntag var-tag (cadr obj)))) ((defmeth) (add (ntag slot-tag (caddr obj) (cadr obj)))) ((defplace) (tree-bind (op (name . args) . body) obj (add (ntag fun-tag name)))) ((typedef) (add (ntag type-tag (cadr obj)))) ((defpackage) (add (ntag struct-tag (cadr obj)))) ((define-option-struct) (let ((struct-name (cadr obj))) (add (ntag struct-tag struct-name)) (each ((obj (cdddr obj))) (tree-bind (short long . rest) obj (cond (long (add (ntag slot-tag long struct-name))) (short (add (ntag slot-tag short struct-name)))))))) ((defstruct deffi-struct) (let ((struct-obj obj) (struct-name (tree-case (cadr obj) ((atom . rest) atom) (atom atom)))) (add (ntag struct-tag struct-name)) (each ((obj (cdddr obj))) (tree-case obj ((word name . rest) (caseq word ((:method :function :static :instance) (add (ntag slot-tag name struct-name))) (t :))) ((word (arg) . body) (caseq word ((:init :postinit :fini)) (t :))) ((name . rest) (add (ntag slot-tag name struct-name))) (name (add (ntag slot-tag name struct-name struct-obj)))))))))))) (defun unexpand (form) (whilet ((anc (macro-ancestor form))) (set form anc)) form) (defun process-clause (path lines clause) (when (consp clause) (let ((elem (car clause))) (build (with-tag-shorthand-macro (ntag path lines elem) (when (consp elem) (caseq (car elem) (define (let ((args (if (eq t (cadr elem)) (cadddr elem) (cadr elem)))) (add (ntag fun-tag (car args))))) (bind (let ((syms (flatcar (cadr elem)))) (each ((sym syms)) (add (ntag var-tag sym))))) (do (let ((forms [mapcar unexpand (cdr elem)])) (each ((form forms)) (pend (process-form path lines form)))))))))))) (defun collect-tags-tl (path) (let* ((text (file-get-string path)) (text (if (starts-with "#!" text) `;@text` text)) ;; Make line numbers and byte offsets 1-based. (lines (cons "" (spl #\newline text))) (stream (make-string-byte-input-stream text)) (*rec-source-loc* t) (*fake-load-path* path)) (build (add (new file-tag path path)) (in-anon-package (whilet ((obj (read stream *stderr* err-ret path)) ((neq obj err-ret))) (pend (process-form path lines obj))))))) (defun collect-tags-txr (path) (let* ((text (file-get-string path)) (text (if (starts-with "#!" text) `\@;@text` text)) ;; Make line numbers and byte offsets 1-based. (lines (cons "" (spl #\newline text))) (stream (make-string-byte-input-stream text)) (*rec-source-loc* t) (syntax (in-anon-package (txr-parse stream *stderr* nil path)))) (build (each ((clause syntax)) (pend (process-clause path lines clause)))))) (defun collect-tags-guess (path) (with-stream (s (open-file path)) (iflet ((line (get-line s))) (if (and (starts-with "#!" line) (search-str line "txr")) (if (search-str line "--lisp") (collect-tags-tl path) (collect-tags-txr path)) (progn (put-line `@path: unable to determine file type` *stderr*) nil))))) (defun parse-etag-path (stream) (let ((line (get-line stream))) (unless line (throw 'syntax-error "trailing etag section starter")) (match-case line (`@{path #/[^,]+/},@{size #/\d+/}` path) (@otherwise (throwf 'syntax-error "bad etag path line: ~s" line))))) (defun get-etag-name (line) (let ((etag-pat-end etag-pat-end) (etag-name-end etag-name-end)) (match-case line (`@pat@{etag-pat-end}@ident@{etag-name-end}@rest` ident) (`@pat@{etag-pat-end}@rest` (labels ((nonname-char-p (ch) (in etag-nonname-chars ch))) (when (nonname-char-p [pat -1]) (set pat [pat 0..-1])) (let ((pos [rpos-if nonname-char-p pat])) (when pos (inc pos)) [pat pos..:]))) (@otherwise (throwf 'syntax-error "bad etag line: ~s" line))))) ;; Does not support include sections. ;; Does not support file properties. (defun read-etagfile (path) (with-stream (stream (open-file path)) (let ((line (get-line stream))) (unless line (return nil)) (unless (equal line (tostringp etag-sec-start)) (throwf 'syntax-error "bad etag section starter: ~s" line))) (let ((all-tags ())) (whilet ((path (parse-etag-path stream)) (tags ()) (t)) (whilet ((line (get-line stream)) (next (equal line (tostringp etag-sec-start))) (t)) (when (or (not line) next) (push (cons path tags) all-tags) (if next (return) (return-from read-etagfile all-tags))) (push (new orig-tag ident (get-etag-name line) orig-line line) tags)))))) (defun read-tagfile (path) (catch (let ((lines (file-get-lines path))) (collect-each ((line lines)) (tree-bind (ident path pat . fields) (split-str line #\tab) (new orig-tag ident ident path path line pat orig-fields fields)))) (path-not-found (_)))) (defun write-tagfile (tags o) (when o.merge (whenlet ((orig-tags (read-tagfile o.output))) (set tags (merge tags orig-tags : .ident)))) (with-stream (stream (open-file o.output (if o.append "a" "w"))) (each ((tag tags)) (put-line tag.(text) stream)))) (defun write-etagfile (grouped-etags o) (with-stream (stream (open-file o.output (if o.append "a" "w"))) (each ((pair grouped-etags)) (tree-bind (path . etags) pair (let ((str (with-out-string-stream (s) (each ((etag etags)) (put-line etag.(etext) s))))) (put-string `@{etag-sec-start}\n@{path},@(len str)\n@{str}` stream)))))) (defvarl ftw-actionretval 0) (defvarl ftw-continue 0) (defvarl ftw-skip-subtree 0) (defmacro static-when (expr . body) (when (eval expr) ^(progn ,*body))) (compile-only (unless *tags-lib* (let ((o (new tags-opts))) o.(getopts *args*) (when o.help (put-line "\nUsage:\n") (put-line ` @{*load-path*} [options] {file|dir}*\n`) (put-line "Directory arguments are recursively searched for .tl and .txr files.") (put-line "If no arguments are given, the current directory is searched.") o.(opthelp) (exit t)) (unless o.out-args (push "." o.out-args)) (unless (plusp (len o.output)) (set o.output (if o.emacs "TAGS" "tags"))) (when (and o.merge (or o.append o.emacs)) ;; The --merge option (without --emacs) currently results in ;; duplicate tags if a file is retagged (e.g., "txr tags.tl foo.tl ;; && txr tags.tl --merge foo.tl"). ;; We could have --merge replace all existing tags of a retagged ;; file with the latest ones, with and without --emacs, but for ;; now don't bother (and therefore forbid combining --emacs with ;; --merge). (put-line `@{*load-path*}: @(if o.append `--append` `--emacs`)\ \ and --merge are mutually exclusive`) (exit nil)) (let* ((excf [apply orf (mapcar (do op fnmatch @@1) o.exclude)]) (skips ()) (*read-unknown-structs* t) (tags (build (ftw o.out-args (lambda (path type stat . rest) (caseql* type (ftw-f (when (and (not [excf path]) (not [excf (base-name path)]) (not (some skips (op starts-with @1 path)))) (cond ((ends-with ".tl" path) (pend (ignerr (collect-tags-tl path)))) ((ends-with ".txr" path) (pend (ignerr (collect-tags-txr path)))) ((member path o.out-args) (pend (ignerr (collect-tags-guess path)))) (t ftw-continue)))) (ftw-d (while (and skips (starts-with path (car skips))) (pop skips)) (cond ((or [excf path] [excf (base-name path)]) (static-when (zerop ftw-actionretval) (push `@path/` skips)) ftw-skip-subtree))) (t ftw-continue))) (logior ftw-phys ftw-actionretval))))) (if o.qual (set tags (build (pend tags) (each ((tg tags)) (if (typep tg 'slot-tag) (iflet ((qt tg.(make-qual-tag))) (add qt))))))) (if o.emacs (write-etagfile (flow tags (remove-if (op equal @1.type "F")) (nsort @1 : .linum) (group-by .path) (hash-alist) (nsort @1 : car)) o) (write-tagfile (nsort tags : .ident) o))))))