Klimi's new dotfiles with stow.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

238 lines
11 KiB

5 years ago
  1. ;;; make-regexp.el --- generate efficient regexps to match strings.
  2. ;; Copyright (C) 1994, 1995 Simon Marshall.
  3. ;; Author: Simon Marshall <simon@gnu.ai.mit.edu>
  4. ;; Keywords: lisp, matching
  5. ;; Version: 1.02
  6. ;; LCD Archive Entry:
  7. ;; make-regexp|Simon Marshall|simon@gnu.ai.mit.edu|
  8. ;; Generate efficient regexps to match strings.|
  9. ;; 11-Jul-1995|1.02|~/functions/make-regexp.el.gz|
  10. ;; The archive is archive.cis.ohio-state.edu in /pub/gnu/emacs/elisp-archive.
  11. ;;; This file is not part of GNU Emacs.
  12. ;;; This program is free software; you can redistribute it and/or modify
  13. ;;; it under the terms of the GNU General Public License as published by
  14. ;;; the Free Software Foundation; either version 2, or (at your option)
  15. ;;; any later version.
  16. ;;; This program is distributed in the hope that it will be useful,
  17. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. ;;; GNU General Public License for more details.
  20. ;; A copy of the GNU General Public License is available at
  21. ;; https://www.r-project.org/Licenses/
  22. ;;; Commentary:
  23. ;; Purpose:
  24. ;;
  25. ;; To make efficient regexps from lists of strings.
  26. ;; For example:
  27. ;;
  28. ;; (let ((strings '("cond" "if" "while" "let\\*?" "prog1" "prog2" "progn"
  29. ;; "catch" "throw" "save-restriction" "save-excursion"
  30. ;; "save-window-excursion" "save-match-data"
  31. ;; "unwind-protect" "condition-case" "track-mouse")))
  32. ;; (concat "(" (make-regexp strings t)))
  33. ;;
  34. ;; => "(\\(c\\(atch\\|ond\\(\\|ition-case\\)\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(excursion\\|match-data\\|restriction\\|window-excursion\\)\\|t\\(hrow\\|rack-mouse\\)\\|unwind-protect\\|while\\)"
  35. ;;
  36. ;; To search for the above regexp takes about 70% of the time as for the simple
  37. ;; (concat "(\\(" (mapconcat 'identity strings "\\|") "\\)") regexp.
  38. ;;
  39. ;; Obviously, the more the similarity between strings, the faster the regexp:
  40. ;;
  41. ;; (make-regexp '("abort" "abs" "accept" "access" "array" "begin" "body" "case"
  42. ;; "constant" "declare" "delay" "delta" "digits" "else" "elsif"
  43. ;; "entry" "exception" "exit" "function" "generic" "goto" "if"
  44. ;; "others" "limited" "loop" "mod" "new" "null" "out" "subtype"
  45. ;; "package" "pragma" "private" "procedure" "raise" "range"
  46. ;; "record" "rem" "renames" "return" "reverse" "select"
  47. ;; "separate" "task" "terminate" "then" "type" "when" "while"
  48. ;; "with" "xor"))
  49. ;;
  50. ;; => "a\\(b\\(ort\\|s\\)\\|cce\\(pt\\|ss\\)\\|rray\\)\\|b\\(egin\\|ody\\)\\|c\\(ase\\|onstant\\)\\|d\\(e\\(clare\\|l\\(ay\\|ta\\)\\)\\|igits\\)\\|e\\(ls\\(e\\|if\\)\\|ntry\\|x\\(ception\\|it\\)\\)\\|function\\|g\\(eneric\\|oto\\)\\|if\\|l\\(imited\\|oop\\)\\|mod\\|n\\(ew\\|ull\\)\\|o\\(thers\\|ut\\)\\|p\\(ackage\\|r\\(agma\\|ivate\\|ocedure\\)\\)\\|r\\(a\\(ise\\|nge\\)\\|e\\(cord\\|m\\|names\\|turn\\|verse\\)\\)\\|s\\(e\\(lect\\|parate\\)\\|ubtype\\)\\|t\\(ask\\|erminate\\|hen\\|ype\\)\\|w\\(h\\(en\\|ile\\)\\|ith\\)\\|xor"
  51. ;;
  52. ;; To search for the above regexp takes less than 60% of the time of the simple
  53. ;; mapconcat equivalent.
  54. ;;
  55. ;; But even small regexps may be worth it:
  56. ;;
  57. ;; (make-regexp '("and" "at" "do" "end" "for" "in" "is" "not" "of" "or" "use"))
  58. ;; => "a\\(nd\\|t\\)\\|do\\|end\\|for\\|i[ns]\\|not\\|o[fr]\\|use"
  59. ;;
  60. ;; as this is 10% faster than the mapconcat equivalent.
  61. ;; Installation:
  62. ;;
  63. ;; (autoload 'make-regexp "make-regexp"
  64. ;; "Return a regexp to match a string item in STRINGS.")
  65. ;;
  66. ;; (autoload 'make-regexps "make-regexp"
  67. ;; "Return a regexp to REGEXPS.")
  68. ;;
  69. ;; Since these functions were written to produce efficient regexps, not regexps
  70. ;; efficiently, it is probably not a good idea to in-line too many calls in
  71. ;; your code, unless you use the following neat trick with `eval-when-compile':
  72. ;;
  73. ;; (defvar definition-regexp
  74. ;; (let ((regexp (eval-when-compile
  75. ;; (make-regexp '("defun" "defsubst" "defmacro" "defalias"
  76. ;; "defvar" "defconst" "defadvice") t))))
  77. ;; (concat "^(" regexp)))
  78. ;;
  79. ;; The `byte-compile' code will be as if you had defined the variable thus:
  80. ;;
  81. ;; (defvar definition-regexp
  82. ;; "^(\\(def\\(a\\(dvice\\|lias\\)\\|const\\|macro\\|subst\\|un\\|var\\)\\)")
  83. ;; Feedback:
  84. ;;
  85. ;; Originally written for font-lock, from an idea from Stig's hl319.
  86. ;; Please don't tell me that it doesn't produce optimal regexps; I know that
  87. ;; already. But (ideas or) code to improve things (are) is welcome. Please
  88. ;; test your code and tell me the speed up in searching an appropriate buffer.
  89. ;;
  90. ;; Please send me bug reports, bug fixes, and extensions, etc.
  91. ;; Simon Marshall <simon@gnu.ai.mit.edu>
  92. ;; History:
  93. ;;
  94. ;; 1.00--1.01:
  95. ;; - Made `make-regexp' take `lax' to force top-level parentheses.
  96. ;; - Fixed `make-regexps' for MATCH bug and new `font-lock-keywords'.
  97. ;; - Added `unfontify' to user timing functions.
  98. ;; 1.01--1.02:
  99. ;; - Made `make-regexp' `let' a big `max-lisp-eval-depth'.
  100. ;; The basic idea is to find the shortest common non-"" prefix each time, and
  101. ;; squirrel it out. If there is no such prefix, we divide the list into two so
  102. ;; that (at least) one half will have at least a one-character common prefix.
  103. ;; In addition, we (a) delay the addition of () parenthesis as long as possible
  104. ;; (until we're sure we need them), and (b) try to squirrel out one-character
  105. ;; sequences (so we can use [] rather than ()).
  106. ;;; Code:
  107. (defun make-regexp (strings &optional paren lax)
  108. "Return a regexp to match a string item in STRINGS.
  109. If optional PAREN non-nil, output regexp parentheses around returned regexp.
  110. If optional LAX non-nil, don't output parentheses if it doesn't require them.
  111. Merges keywords to avoid backtracking in Emacs' regexp matcher."
  112. (let* ((max-lisp-eval-depth (* 1024 1024))
  113. (strings (let ((l strings)) ; Paranoia---make strings unique!
  114. (while l (setq l (setcdr l (delete (car l) (cdr l)))))
  115. (sort strings 'string-lessp)))
  116. (open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" ""))
  117. (open-lax (if lax "" open-paren)) (close-lax (if lax "" close-paren))
  118. (completion-ignore-case nil))
  119. (cond
  120. ;; If there's only one string, just return it.
  121. ((= (length strings) 1)
  122. (concat open-lax (car strings) close-lax))
  123. ;; If there's an empty string, pull it out.
  124. ((string= (car strings) "")
  125. (if (and (= (length strings) 2) (= (length (nth 1 strings)) 1))
  126. (concat open-lax (nth 1 strings) "?" close-lax)
  127. (concat open-paren "\\|" (make-regexp (cdr strings)) close-paren)))
  128. ;; If there are only one-character strings, make a [] list instead.
  129. ((= (length strings) (apply '+ (mapcar 'length strings)))
  130. (concat open-lax "[" (mapconcat 'identity strings "") "]" close-lax))
  131. (t
  132. ;; We have a list of strings. Is there a common prefix?
  133. (let ((prefix (try-completion "" (mapcar 'list strings))))
  134. (if (> (length prefix) 0)
  135. ;; Common prefix! Squirrel it out and recurse with the suffixes.
  136. (let* ((len (length prefix))
  137. (sufs (mapcar (lambda (str) (substring str len)) strings)))
  138. (concat open-paren prefix (make-regexp sufs t t) close-paren))
  139. ;; No common prefix. Is there a one-character sequence?
  140. (let ((letters (let ((completion-regexp-list '("^.$")))
  141. (all-completions "" (mapcar 'list strings)))))
  142. (if (> (length letters) 1)
  143. ;; Do the one-character sequences, then recurse on the rest.
  144. (let ((rest (let ((completion-regexp-list '("^..+$")))
  145. (all-completions "" (mapcar 'list strings)))))
  146. (concat open-paren
  147. (make-regexp letters) "\\|" (make-regexp rest)
  148. close-paren))
  149. ;; No one-character sequence, so divide the list into two by
  150. ;; dividing into those that start with a particular letter, and
  151. ;; those that do not.
  152. (let* ((char (substring (car strings) 0 1))
  153. (half1 (all-completions char (mapcar 'list strings)))
  154. (half2 (nthcdr (length half1) strings)))
  155. (concat open-paren
  156. (make-regexp half1) "\\|" (make-regexp half2)
  157. close-paren))))))))))
  158. ;; This stuff is realy for font-lock...
  159. ;; Ahhh, the wonders of lisp...
  160. (defun regexp-span (regexp &optional start)
  161. "Return the span or depth of REGEXP.
  162. This means the number of \"\\\\(...\\\\)\" pairs in REGEXP, optionally from START."
  163. (let ((match (string-match (regexp-quote "\\(") regexp (or start 0))))
  164. (if (not match) 0 (1+ (regexp-span regexp (match-end 0))))))
  165. ;; The basic idea is to concat the regexps together, keeping count of the span
  166. ;; of the regexps so that we can get the correct match for hilighting.
  167. (defun make-regexps (&rest regexps)
  168. "Return a regexp to match REGEXPS
  169. Each item of REGEXPS should be of the form:
  170. STRING ; A STRING to be used literally.
  171. (STRING MATCH FACE DATA) ; Match STRING at depth MATCH with FACE
  172. ; and highlight according to DATA.
  173. (STRINGS FACE DATA) ; STRINGS is a list of strings FACE is
  174. ; to highlight according to DATA.
  175. Returns a list of the form:
  176. (REGEXP (MATCH FACE DATA) ...)
  177. For example:
  178. (make-regexps \"^(\"
  179. '((\"defun\" \"defalias\" \"defsubst\" \"defadvice\") keyword)
  180. \"[ \t]*\"
  181. '(\"\\\\([a-zA-Z-]+\\\\)?\" 1 function-name nil t))
  182. =>
  183. (\"^(\\\\(def\\\\(a\\\\(dvice\\\\|lias\\\\)\\\\|subst\\\\|un\\\\)\\\\)[ ]*\\\\([a-zA-Z-]+\\\\)?\"
  184. (1 keyword) (4 function-name nil t))
  185. Uses `make-regexp' to make efficient regexps."
  186. (let ((regexp "") (data ()))
  187. (while regexps
  188. (cond ((stringp (car regexps))
  189. (setq regexp (concat regexp (car regexps))))
  190. ((stringp (nth 0 (car regexps)))
  191. (setq data (cons (cons (+ (regexp-span regexp)
  192. (nth 1 (car regexps)))
  193. (nthcdr 2 (car regexps)))
  194. data)
  195. regexp (concat regexp (nth 0 (car regexps)))))
  196. (t
  197. (setq data (cons (cons (1+ (regexp-span regexp))
  198. (cdr (car regexps)))
  199. data)
  200. regexp (concat regexp (make-regexp (nth 0 (car regexps))
  201. t)))))
  202. (setq regexps (cdr regexps)))
  203. (cons regexp (nreverse data))))
  204. ;; timing functions removed due to name collisions with Gnus
  205. (provide 'make-regexp)
  206. ;;; make-regexp.el ends here