#!/usr/bin/env txr @(mdo ;; TODO ;; #. etags support (update to new hash table format, etc.). ;; #. Remove obsolete symbols (flip, etc.). ;; #. Update to the new compat handling. There may be fewer if3s in the reg calls. ;; #. merge into tags.tl the writing functions. ;; #. handle reg_vars, etc. need to handle the reg_varl(sym,val) which is in the definition of reg_var. ;; reg_var(sym, obj) too, with the -b option. ;; (there are many cases where the reg_vars are used to initialize some variables. ;; need special handling, probably.) ;; #. others of my todos. ;; ;; #. *args-eff* not stored in var, so getting doubly stored. ;; ;; #. make sure some hard-to-determine things are printed to stderr, ;; like the multiple occurrences of idents in the existing tags file. ;; makes it easier to handle changes in the C source. ;; ;; #. Check if it still allows us to navigate to the right tags (e.g., acons-new vs acons_new). ;; FIXME Why is .l getting added. ;; and .y too. ;; with only *.c pattern. ;; Should I add .shipped ones? ;; probably better to assume maintainer, since libtags.txr is for ;; developers of txr. ;; so check the .l, .y, yy.tab.c, etc. ;; not the .shipped ones. ;; libtags.txr can tag the source even without an existing tags file. ;; It will just be less accurate. ;; ;; libtags.txr automatically ignores lines with any comments, so ;; adding "/* OBS */" to for example num-chr does remove it. ;; However, there may be no other comments, otherwise it will miss ;; them. ;; ;; Even if some vars like path_sep_chars are declared and initialized ;; at the same time, better to just tag the reg_var call directly. ;; The user can navigate manually to the var in question; there would ;; be too many useless false positives. ;; also when they get assigned, like stderr_s, etc. ;; *maybe* we can have a list of exceptions, of which path_sep_chars ;; can be a part of. ;; but even then, it's being assigned in a static_str, so that may ;; be interesting to know. ;; ;; actually, completely unnecessary to specify that type is intrinsic, ;; because they are in c files. ;; any c files with such a tag is an intrinsic. ;; Note that libtags.txr should be run from the TXR source tree. ;; Because it globs for *.c. (defvar *tags-lib*) (let ((*tags-lib* t)) (load "txrtags")) (define-option-struct libtags-opts tags-opts (v verbose :bool "Print diagnostic messages during processing.")) (defvarl output) (defvarl emacs) (defvarl verbose) (defvarl ix-tags (hash :equal-based)) ;; FIXME Rename to sym-vars? (defvarl var-syms (hash :equal-based)) (defvarl fun-vars (hash :equal-based)) (defun update-ix-tags (tag newkey : oldkey) ;; Remove the var_s tag because we need not tag it if we have the ;; actual function. (when oldkey (del [ix-tags oldkey])) (upd [ix-tags newkey] (append (sys:var 1) (list tag)))) (defun qualify-sym (sym pkg) (join (casequal pkg ("user_package" "") ("system_package" "sys:") ("keyword_package" ":") (t (when verbose (put-line `@sym: in unknown package @pkg` *stderr*)) pkg)) sym)) (defun op-error-fun-p (fun) (mequal fun "op_error" "op_meta_error" "op_qquote_error" "op_unquote_error"))) @(do (let ((o (new libtags-opts))) o.(getopts *args*) (set output (cond (o.output o.output) (o.emacs "TAGS") (t "tags"))) (set emacs o.emacs) (set verbose o.verbose))) @(bind var_s #/\w[\w\d]*_s/) @(bind cident #/\w[\w\d]*/) @(bind regfun #/reg_(op|mac|fun)/) @(bind regvar #/reg_(varl?|symacro)|ffi_typedef/) @(bind lpar "(") @(bind rpar ")") @;; @(define get-interned-sym (sym))@\ @ (local lit pkg)intern(lit("@lit"), @{pkg cident})@\ @ (bind sym @(qualify-sym lit pkg))@\ @(end) @;; @(define get-fun (fun))@\ @ func_@/[\w\d]+/(@{fun cident}@(maybe), @/\d+/@(end))@\ @(end) @;; @(define get-sym-fun (fun))@\ @ (local fun-var)@\ @ (cases)func_@/[\w\d]+/(if3(opt_compat && opt_compat <= @/\d+/, @\ @ cident, @{fun cident})@(maybe), @/\d+/@(end))@\ @ ;; TODO There may be missing cases here of func_ with compat opt handling. @ (or)@(get-fun fun)@\ @ (or)@{fun-var cident}@\ @ (do (when verbose (unless (or (starts-with "op_" fun-var) [fun-vars fun-var]) (put-line `@{fun-var}: undefined function variable` *stderr*))))@\ @ (bind fun @(or [fun-vars fun-var] fun-var))@\ @ (end)@\ @(end) @;; @(define get-sym-fun (fun)) @ (cases) @ / +/@(get-sym-fun fun)@rpar; @ (or) @ / +/func_@/[\w\d]+/(if3(opt_compat && opt_compat <= @/\d+/, @ (cases) @ ;; For abs-path-p. @ / +/@cident, @{fun cident})@(maybe), @/\d+/@(end))@rpar; @ (or) @ ;; For lexical-var-p. @ / +/@cident, @ / +/@{fun cident}@rpar@(maybe), @/\d+/@(end)@rpar@rpar; @ (end) @ (or) @ ;; For match-regex, match-regex-right and match-regst-right. @ / +/func_@/[\w\d]+/((opt_compat && opt_compat <= @/\d+/) ? @ / +/@cident : @{fun cident}@(maybe), @/\d+/@(end))@rpar; @ (end) @(end) @(define get-file-ix-tags (file)) @ (next file) @ (collect) @ (local sym var fun pkg) @ (all) @ line @ (and) @ (cases) @ / +/@(maybe)val @(end)@{var cident} = @(get-fun fun); @ (do (set [fun-vars var] fun)) @ (or) @ / +/@(maybe)val @(end)@{var var_s} = @(get-interned-sym sym); @ (do (if [var-syms var] (when verbose (put-line `@var: reassigned variable` *stderr*)) (progn (iflet ((tags [ix-tags var])) ;; The variable is declared later in our search. (progn (each ((tag tags)) (let ((old-ident tag.ident)) (set tag.ident sym) (typecase tag (fun-tag (update-ix-tags tag old-ident)) (var-tag (update-ix-tags tag sym)) (t ;; This would be a bug in libtags.txr, ;; so print it regardless of --verbose. (put-line `@(struct-type-name tag): unexpected struct type` *stderr*))))) (del [ix-tags var])) ;; We may not find a corresponding C function or ;; variable (either because of missing patterns in ;; libtags.txr, or accidental omissions in the C ;; source), in which case we will just tag the line of ;; the var_s assignment. (set [ix-tags var] (list (new tag ident sym path file line line)))) ;; Keep track of the symbols, because when we find a ;; symbol corresponding to the above tag to insert into ;; ix-tags, we remove the above tag, but some symbols ;; are multiply bound, for example ‘and’ which is both ;; an operator and a function. (set [var-syms var] sym)))) @ (or) @ (cases) @ / +/@regfun(@{var var_s}, @(get-sym-fun fun)); @ (or) @ / +/@regfun(@{var var_s}, @ / +/@(get-sym-fun fun)); @ (end) @ (do ;; op_error and company appear only in the var_s cases ;; (because otherwise the interned symbol would be used ;; only for throwing an error). (unless (op-error-fun-p fun) (iflet ((sym [var-syms var]) ;; We store the path and line in case there is no such ;; tagged function or variable in the tags file, so ;; that we can still jump to the line where the symbol ;; was interned. (tag (new fun-tag ident (or sym fun) path file line line)) ((have sym))) (update-ix-tags tag fun var) (update-ix-tags tag var)))) @ (or) @ (cases) @ / +/@regfun(@(get-interned-sym sym), @(get-sym-fun fun)); @ (bind var nil) @ (or) @ / +/@regfun@lpar@(get-interned-sym sym), @ (get-sym-fun fun) @ (bind var nil) @ (or) @ ;; The assignment form always spans two or more lines. @ / +/@regfun(@{var var_s} = @(get-interned-sym sym), @ / +/@(get-sym-fun fun)); @ (or) @ / +/@regfun(@{var var_s} = intern(lit("@lit"), @ / +/@{pkg cident}), @(get-sym-fun fun)); @ (bind sym @(qualify-sym lit pkg)) @ (end) @ (do (when var (if [var-syms var] (when verbose (put-line `@var: reassigned variable` *stderr*)) (set [var-syms var] sym))) (update-ix-tags (new fun-tag ident sym path file line line) fun var)) @ (or) @ (cases) @ / +/@regvar(@{var var_s}, @(skip)); @ (or) @ ;; Cannot add a comma after the skip because the line @ ;; contains many comma. @ / +/ffi_typedef@lpar@{var var_s}, @(skip) @ (end) @ (do (iflet ((sym [var-syms var]) (tag (new var-tag ;; The var value is not used. ;; (except in debugging, to print the undefined ;; variables.) ident (or sym var) path file line line)) ((have sym))) ;; FIXME Makes sense to have a list here? ;; Or just following along with the style for funs? (update-ix-tags tag ;; Doesn't matter if we set the hash table ;; key to sym, because in the output we ;; separate the tags based on var-tag or ;; tag/fun-tag. ;; Only fun-tags need to have a key that ;; corresponds to the existing tags. sym var) (update-ix-tags tag var))) @ (or) @ (cases) @ / +/@regvar(@(get-interned-sym sym), @(skip)); @ (bind var nil) @ (or) @ / +/@regvar@lpar@(get-interned-sym sym), @ (bind var nil) @ (or) @ / +/@regvar(@{var var_s} = @(get-interned-sym sym), @(skip)); @ (or) @ / +/@regvar@lpar@{var var_s} = @(get-interned-sym sym), @ (or) @ / +/@regvar(@{var var_s} = intern(lit("@lit"), @ / +/@{pkg cident}), @(skip)); @ (bind sym @(qualify-sym lit pkg)) @ (or) @ / +/@regvar@lpar@{var var_s} = intern(lit("@lit"), @ / +/@{pkg cident}), @ (bind sym @(qualify-sym lit pkg)) @ (end) @ (do (when var (if [var-syms var] (when verbose (put-line `@var: reassigned variable` *stderr*)) (set [var-syms var] sym))) (update-ix-tags (new var-tag ident sym path file line line) sym var)) @ (end) @ (end) @ (end) @(end) @;; Move lib.c to the front, because many _f variables are @;; defined there before being used elsewhere, for example in eval.c. @;; (And with such an order, none are ever defined after being used.) @;; However, if we ever need to do it, we could add the tags in question @;; to another hash table and replace the _f variables in question as soon @;; as we found suitable candidates. @(next :list (cons "lib.c" (remqual "lib.c" (command-get-lines "git ls-files '*.c'")))) @(repeat) @ file.c @ (get-file-ix-tags `@file.c`) @(end) @(do ;; Is nshuffle even getting detected as possible duplicate? ;; FIXME The eval.c compat handling has changed. ;; Need to update that. (when nil (dohash (ident tag ix-tags) (when (mequal tag.ident ;; Obsolete symbols. "flip" "slot-p") (del [ix-tags ident])))) ;; TODO what if sym has _s? shouldn't happen, but could be more ;; robust by using another kind of key. like (list ident "foo"). (when verbose (let ((alist (keep-if (op ends-with "_s" (first @1)) (hash-pairs ix-tags))) (undefined nil) (missing nil)) (each ((pair alist)) (tree-bind (key tags) pair (each ((tag tags)) (typecase tag (fun-tag (push (list key (join "(" tag.ident ")")) undefined)) (var-tag (push (list key) undefined)) (tag (push (list key (join "(" tag.ident ")")) missing)) (t (put-line `@(struct-type-name tag): unexpected struct type` *stderr*)))))) (upd undefined (nsort @1 : car)) (upd missing (nsort @1 : car)) (mapdo (op put-line `@(cat-str @1 " "): undefined variable` *stderr*) undefined) (mapdo (op put-line `@(cat-str @1 " "): no corresponding function or value` *stderr*) missing) (put-line `@(len undefined) undefined variables` *stderr*) (put-line `@(len missing) missing corresponding functions` *stderr*) (let ((vals (flow ix-tags hash-values flatten (nsort @1 : .ident)))) (put-line `@(len vals) ix-tags` *stderr*)))) (defun merge-ix-tags (orig-tags) (let ((tags orig-tags) ;; Empty if no original tags file. (orig-tags (group-by (op identity @1.ident) orig-tags)) (ix-tags (hash-pairs ix-tags))) (each ((pair ix-tags)) ;; These idents are unique, because we store them into the hash ;; table and have lists of ix-tags for each ident. (tree-bind (ident ix-tags) pair (condlet ((((ends-with "_s" ident))) ;; We don't want the tag to point to the declaration of the ;; variable (which is what would happen if we duplicate the ;; tag in the existing tags file), because that is of ;; limited usefulness, so fall back to our info. (upd tags (revappend ix-tags))) (((orig-tags-orig orig-tags) (orig-tags (keep-if (andf (op mequal (short-suffix @1.path) ;; We know that the identifiers are ;; in C source (in particular, not ;; in TXR Lisp source). ".c" ".l" ".y") ;; Assume that an open parenthesis means the tagged ;; identifier is a function. ;; This way we can skipped tagged struct members, which ;; can share the same name as an existing function, but ;; which we want to ignore (because we are tagging Lisp ;; functions here). (op find #\( @1.line)) [orig-tags ident]))) ;; We may tag Lisp identifiers several times (possibly with ;; same-named static functions in different compilation ;; units, but more likely from #ifdef blocks), but it's the ;; best we can do without fully parsing the C. (when (and verbose (> (len orig-tags) 1)) (put-line `@ident: multiple occurrences in tags file` *stderr*)) (tree-bind (: var-tags other-tags) (separate (op equal @1.type "v") ix-tags) (upd tags (revappend var-tags)) (each-prod ((orig-tag orig-tags) (ix-tag other-tags)) (unless (equal orig-tag.ident ix-tag.ident) (let ((tag (copy-struct orig-tag))) (set tag.ident ix-tag.ident) (push tag tags)))))) (t (when verbose (whenlet ((orig-tags (keep-if (op mequal (short-suffix @1.path) ".c" ".l" ".y") [orig-tags-orig ident]))) (each-prod ((orig-tag orig-tags) (ix-tag ix-tags)) ;; FIXME args (for args_s) is duplicated four times. (put-line `@ident | orig @{orig-tag.ident} @{orig-tag.path} | ix @{ix-tag.ident} @{ix-tag.path}`)))) ;; The tags file doesn't contain a tag for our ident. ;; Fall back to our info. (upd tags (revappend ix-tags)))))) ;; Not nsort so as not to modify orig-tags in caller. (sort tags : .ident))) ;; We may as well merge the ctags file (i.e., sort the lines), since ;; we have to read all the existing lines. ;; ;; Greatly adjusted from tags.tl. ;; ;; For some reason, acons-new is found only if it is placed before ;; acons_new, and asin is found only if it is placed before asine. ;; Is there some alphabetical ordering particularity? ;; Will need to resort the ctags since me_op could be associated to op ;; or do. (defun write-tagfile (ix-tags) (let* ((orig-tags (read-tagfile output)) (ix-tags (merge-ix-tags orig-tags))) (with-stream (stream (open-file output "w")) (each ((tag ix-tags)) (put-line tag.(text) stream))))) ;; Greatly adjusted from tags.tl. ;; ;; FIXME incomplete at the moment. (defun write-etagfile (ix-tags) (let ((orig-tags (read-etagfile output))) (upd orig-tags (nsort @1 : car)) (file-put "tags.out" orig-tags) (with-stream (stream (open-file output "w")) (each ((pair orig-tags)) (tree-bind (path . etags) pair (let ((str (with-out-string-stream (s) (each ((etag etags)) (unless (ends-with "_s" etag.ident) (each ((ix-tag [keep-if (op find etag.ident @1 : .ident) ix-tags .ctags])) (put-line `duping @ix-tag`) ;; (put-line `@{lisptag.ident}@[tag.(text) (len tag.ident)..:]` ;; stream) )) (put-line etag.(etext) s))))) (put-string `@{etag-sec-start}\n@{path},@(len str)\n@{str}` stream))))))) (if emacs (write-etagfile ix-tags) (write-tagfile ix-tags)))