Klimi's new dotfiles with stow.
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

513 řádky
19 KiB

před 4 roky
  1. ;;; haskell-lexeme.el --- haskell lexical tokens -*- coding: utf-8; lexical-binding: t -*-
  2. ;; Copyright (C) 2015 Gracjan Polak
  3. ;; This file is free software; you can redistribute it and/or modify
  4. ;; it under the terms of the GNU General Public License as published by
  5. ;; the Free Software Foundation; either version 3, or (at your option)
  6. ;; any later version.
  7. ;; This file is distributed in the hope that it will be useful,
  8. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. ;; GNU General Public License for more details.
  11. ;; You should have received a copy of the GNU General Public License
  12. ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. ;;; Commentary:
  14. ;;; Code:
  15. (require 'rx)
  16. (unless (category-docstring ?P)
  17. (define-category ?P "Haskell symbol constituent characters")
  18. (map-char-table
  19. #'(lambda (key val)
  20. (if (or
  21. (and (consp key) (> (car key) 128))
  22. (and (numberp key) (> key 128)))
  23. (if (member val '(Pc Pd Po Sm Sc Sk So))
  24. (modify-category-entry key ?P))))
  25. unicode-category-table)
  26. (dolist (key (string-to-list "!#$%&*+./<=>?@^|~\\-:"))
  27. (modify-category-entry key ?P)))
  28. (defconst haskell-lexeme-modid
  29. "[[:upper:]][[:alnum:]'_]*"
  30. "Regexp matching a valid Haskell module identifier.
  31. Note that GHC accepts Unicode category UppercaseLetter as a first
  32. character. Following letters are from Unicode categories
  33. UppercaseLetter, LowercaseLetter, OtherLetter, TitlecaseLetter,
  34. ModifierLetter, DecimalNumber, OtherNumber, backslash or
  35. underscore.")
  36. (defconst haskell-lexeme-id
  37. "[[:alpha:]_][[:alnum:]'_]*"
  38. "Regexp matching a valid Haskell identifier.
  39. GHC accepts a string starting with any alphabetic character or
  40. underscore followed by any alphanumeric character or underscore
  41. or apostrophe.")
  42. (defconst haskell-lexeme-sym
  43. "\\cP+"
  44. "Regexp matching a valid Haskell variable or constructor symbol.
  45. GHC accepts a string of chars from the set
  46. [:!#$%&*+./<=>?@^|~\\-] or Unicode category Symbol for chars with
  47. codes larger than 128 only.")
  48. (defconst haskell-lexeme-idsym-first-char
  49. "\\(?:[[:alpha:]_]\\|\\cP\\)"
  50. "Regexp matching first character of a qualified or unqualified
  51. identifier or symbol.
  52. Useful for `re-search-forward'.")
  53. (defconst haskell-lexeme-modid-opt-prefix
  54. (concat "\\(?:" haskell-lexeme-modid "\\.\\)*")
  55. "Regexp matching a valid Haskell module prefix, potentially empty.
  56. Module path prefix is separated by dots and finishes with a
  57. dot. For path component syntax see `haskell-lexeme-modid'.")
  58. (defconst haskell-lexeme-qid-or-qsym
  59. (rx-to-string `(: (regexp ,haskell-lexeme-modid-opt-prefix)
  60. (group (| (regexp ,haskell-lexeme-id) (regexp ,haskell-lexeme-sym)
  61. ))))
  62. "Regexp matching a valid qualified identifier or symbol.
  63. Note that (match-string 1) returns the unqualified part.")
  64. (defun haskell-lexeme-looking-at-qidsym ()
  65. "Non-nil when point is just in front of an optionally qualified
  66. identifier or symbol.
  67. Using this function is more efficient than matching against the
  68. regexp `haskell-lexeme-qid-or-qsym'.
  69. Returns:
  70. 'qid - if matched a qualified id: 'Data.Map' or 'Map'
  71. 'qsym - if matched a qualified id: 'Monad.>>=' or '>>='
  72. 'qprefix - if matched only modid prefix: 'Data.'
  73. After successful 'qid or 'qsym match (match-string 1) will return
  74. the unqualified part (if any)."
  75. (let ((begin (point))
  76. (match-data-old (match-data)))
  77. (save-excursion
  78. (while (looking-at (concat haskell-lexeme-modid "\\."))
  79. (goto-char (match-end 0)))
  80. (cond
  81. ((looking-at haskell-lexeme-id)
  82. (let ((beg (match-beginning 0))
  83. (end (match-end 0)))
  84. ;; check is MagicHash is present at the end of the token
  85. (goto-char end)
  86. (when (looking-at "#+")
  87. (setq end (match-end 0)))
  88. (set-match-data
  89. (list begin end
  90. beg end)))
  91. 'qid)
  92. ((looking-at haskell-lexeme-sym)
  93. (set-match-data
  94. (list begin (match-end 0)
  95. (match-beginning 0) (match-end 0)))
  96. 'qsym)
  97. ((equal begin (point))
  98. (set-match-data match-data-old)
  99. nil)
  100. (t
  101. (set-match-data
  102. (list begin (point)
  103. nil nil))
  104. 'qprefix)))))
  105. (defun haskell-lexeme-looking-at-backtick ()
  106. "Non-nil when point is just in front of an identifier quoted with backticks.
  107. When match is successful, match-data will contain:
  108. (match-text 1) - opening backtick
  109. (match-text 2) - whole qualified identifier
  110. (match-text 3) - unqualified part of identifier
  111. (match-text 4) - closing backtick"
  112. (let ((match-data-old (match-data))
  113. first-backtick-start
  114. last-backtick-start
  115. qid-start
  116. id-start
  117. id-end
  118. result)
  119. (save-excursion
  120. (when (looking-at "`")
  121. (setq first-backtick-start (match-beginning 0))
  122. (goto-char (match-end 0))
  123. (forward-comment (buffer-size))
  124. (when (haskell-lexeme-looking-at-qidsym)
  125. (setq qid-start (match-beginning 0))
  126. (setq id-start (match-beginning 1))
  127. (setq id-end (match-end 1))
  128. (goto-char (match-end 0))
  129. (forward-comment (buffer-size))
  130. (when (looking-at "`")
  131. (setq last-backtick-start (match-beginning 0))
  132. (set-match-data
  133. (mapcar
  134. (lambda (p)
  135. (set-marker (make-marker) p))
  136. (list
  137. first-backtick-start (1+ last-backtick-start)
  138. first-backtick-start (1+ first-backtick-start)
  139. qid-start id-end
  140. id-start id-end
  141. last-backtick-start (1+ last-backtick-start))))
  142. (setq result t)))))
  143. (unless result
  144. (set-match-data match-data-old))
  145. result))
  146. (defconst haskell-lexeme-qid
  147. (rx-to-string `(: (regexp "'*")
  148. (regexp ,haskell-lexeme-modid-opt-prefix)
  149. (group (regexp ,haskell-lexeme-id))))
  150. "Regexp matching a valid qualified identifier.
  151. Note that (match-string 1) returns the unqualified part.")
  152. (defconst haskell-lexeme-qsym
  153. (rx-to-string `(: (regexp "'*")
  154. (regexp ,haskell-lexeme-modid-opt-prefix)
  155. (group (regexp ,haskell-lexeme-id))))
  156. "Regexp matching a valid qualified symbol.
  157. Note that (match-string 1) returns the unqualified part.")
  158. (defconst haskell-lexeme-number
  159. (rx (| (: (regexp "[0-9]+\\.[0-9]+") (opt (regexp "[eE][-+]?[0-9]+")))
  160. (regexp "[0-9]+[eE][-+]?[0-9]+")
  161. (regexp "0[xX][0-9a-fA-F]+")
  162. (regexp "0[oO][0-7]+")
  163. (regexp "[0-9]+")))
  164. "Regexp matching a floating point, decimal, octal or hexadecimal number.
  165. Note that negative sign char is not part of a number.")
  166. (defconst haskell-lexeme-char-literal-inside
  167. (rx (| (not (any "\n'\\"))
  168. (: "\\"
  169. (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'"
  170. "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK"
  171. "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE"
  172. "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN"
  173. "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL"
  174. (regexp "[0-9]+")
  175. (: "x" (regexp "[0-9a-fA-F]+"))
  176. (: "o" (regexp "[0-7]+"))
  177. (: "^" (regexp "[]A-Z@^_\\[]"))))))
  178. "Regexp matching an inside of a character literal.
  179. Note that `haskell-lexeme-char-literal-inside' matches strictly
  180. only escape sequences defined in Haskell Report.")
  181. (defconst haskell-lexeme--char-literal-rx
  182. (rx-to-string `(: (group "'")
  183. (| (: (group (regexp "[[:alpha:]_:([]")) (group "'")) ; exactly one char
  184. (: (group (| (regexp "\\\\[^\n][^'\n]*") ; allow quote just after first backslash
  185. (regexp "[^[:alpha:]_:(['\n][^'\n]*")))
  186. (| (group "'") "\n" (regexp "\\'"))))))
  187. "Regexp matching a character literal lookalike.
  188. Note that `haskell-lexeme--char-literal-rx' matches more than
  189. Haskell Report specifies because we want to support also code
  190. under edit.
  191. Character literals end with a quote or a newline or end of
  192. buffer.
  193. Regexp has subgroup expressions:
  194. (match-text 1) matches the opening quote.
  195. (match-text 2) matches the inside of the character literal.
  196. (match-text 3) matches the closing quote or an empty string
  197. at the end of line or the end buffer.")
  198. (defun haskell-lexeme-looking-at-char-literal ()
  199. "Non-nil when point is at a char literal lookalike.
  200. Note that this function matches more than Haskell Report
  201. specifies because we want to support also code under edit.
  202. Char literals end with a quote or an unescaped newline or end
  203. of buffer.
  204. After successful match:
  205. (match-text 1) matches the opening quote.
  206. (match-text 2) matches the inside of the char literla.
  207. (match-text 3) matches the closing quote, or a closing
  208. newline or is nil when at the end of the buffer."
  209. (when (looking-at haskell-lexeme--char-literal-rx)
  210. (set-match-data
  211. (list (match-beginning 0) (match-end 0)
  212. (match-beginning 1) (match-end 1)
  213. (or (match-beginning 2) (match-beginning 4)) (or (match-end 2) (match-end 4))
  214. (or (match-beginning 3) (match-beginning 5)) (or (match-end 3) (match-end 5))))
  215. t))
  216. (defconst haskell-lexeme-string-literal-inside-item
  217. (rx (| (not (any "\n\"\\"))
  218. (: "\\"
  219. (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'" "&"
  220. "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK"
  221. "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE"
  222. "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN"
  223. "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL"
  224. (regexp "[0-9]+")
  225. (: "x" (regexp "[0-9a-fA-F]+"))
  226. (: "o" (regexp "[0-7]+"))
  227. (: "^" (regexp "[]A-Z@^_\\[]"))
  228. (regexp "[ \t\n\r\v\f]*\\\\")))))
  229. "Regexp matching an item that is a single character or a single
  230. escape sequence inside of a string literal.
  231. Note that `haskell-lexeme-string-literal-inside-item' matches
  232. strictly only escape sequences defined in Haskell Report.")
  233. (defconst haskell-lexeme-string-literal
  234. (rx (: (group "\"")
  235. (group (* (| (regexp "\\\\[ \t\n\r\v\f]*\\\\")
  236. (regexp "\\\\[ \t\n\r\v\f]+")
  237. (regexp "\\\\[^ \t\n\r\v\f]")
  238. (* (regexp "[^\"\n\\]")))))
  239. (group (| "\"" (regexp "$") (regexp "\\\\?\\'")
  240. ))))
  241. "Regexp matching a string literal lookalike.
  242. Note that `haskell-lexeme-string-literal' matches more than
  243. Haskell Report specifies because we want to support also code
  244. under edit.
  245. String literals end with double quote or unescaped newline or end
  246. of buffer.
  247. Regexp has subgroup expressions:
  248. (match-text 1) matches the opening double quote.
  249. (match-text 2) matches the inside of the string.
  250. (match-text 3) matches the closing double quote or an empty string
  251. at the end of line or the end buffer.")
  252. (defun haskell-lexeme-looking-at-string-literal ()
  253. "Non-nil when point is at a string literal lookalike.
  254. Note that this function matches more than Haskell Report
  255. specifies because we want to support also code under edit.
  256. String literals end with double quote or unescaped newline or end
  257. of buffer.
  258. After successful match:
  259. (match-text 1) matches the opening doublequote.
  260. (match-text 2) matches the inside of the string.
  261. (match-text 3) matches the closing quote, or a closing
  262. newline or is nil when at the end of the buffer."
  263. (when (looking-at "\"")
  264. (save-excursion
  265. (let ((begin (point)))
  266. (goto-char (match-end 0))
  267. (let (finish)
  268. (while (and (not finish)
  269. (re-search-forward "[\"\n\\]" nil 'goto-eob))
  270. (cond
  271. ((equal (match-string 0) "\\")
  272. (if (looking-at "[ \t\n\r\v\f]+\\\\?")
  273. (goto-char (match-end 0))
  274. (goto-char (1+ (point)))))
  275. ((equal (match-string 0) "\"")
  276. (set-match-data
  277. (list begin (match-end 0)
  278. begin (1+ begin)
  279. (1+ begin) (match-beginning 0)
  280. (match-beginning 0) (match-end 0)))
  281. (setq finish t))
  282. ((equal (match-string 0) "\n")
  283. (set-match-data
  284. (list begin (match-beginning 0)
  285. begin (1+ begin)
  286. (1+ begin) (match-beginning 0)
  287. nil nil))
  288. (setq finish t))))
  289. (unless finish
  290. ;; string closed by end of buffer
  291. (set-match-data
  292. (list begin (point)
  293. begin (1+ begin)
  294. (1+ begin) (point)
  295. nil nil))))))
  296. ;; there was a match
  297. t))
  298. (defun haskell-lexeme-looking-at-quasi-quote-literal ()
  299. "Non-nil when point is just in front of Template Haskell
  300. quaisquote literal.
  301. Quasi quotes start with '[xxx|' or '[$xxx|' sequence and end with
  302. '|]'. The 'xxx' is a quoter name. There is no escaping mechanism
  303. provided for the ending sequence.
  304. Regexp has subgroup expressions:
  305. (match-text 1) matches the quoter name (without $ sign if present).
  306. (match-text 2) matches the opening vertical bar.
  307. (match-text 3) matches the inside of the quoted string.
  308. (match-text 4) matches the closing vertical bar
  309. or nil if at the end of the buffer.
  310. Note that this function excludes 'e', 't', 'd', 'p' as quoter
  311. names according to Template Haskell specification."
  312. (let ((match-data-old (match-data)))
  313. (if (and
  314. (looking-at (rx-to-string `(: "[" (optional "$")
  315. (group (regexp ,haskell-lexeme-id))
  316. (group "|"))))
  317. (equal (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1)))
  318. 'varid)
  319. (not (member (match-string 1) '("e" "t" "d" "p"))))
  320. (save-excursion
  321. ;; note that quasi quote syntax does not have any escaping
  322. ;; mechanism and if not closed it will span til lthe end of buffer
  323. (goto-char (match-end 0))
  324. (let ((match-data (match-data))
  325. (match-data-2 (and (re-search-forward "|]" nil t)
  326. (match-data))))
  327. (if match-data-2
  328. (set-match-data
  329. (list
  330. (nth 0 match-data) (nth 1 match-data-2) ;; whole match
  331. (nth 2 match-data) (nth 3 match-data) ;; quoter name
  332. (nth 4 match-data) (nth 5 match-data) ;; opening bar
  333. (nth 5 match-data) (nth 0 match-data-2) ;; inner string
  334. (nth 0 match-data-2) (1+ (nth 0 match-data-2)))) ;; closing bar
  335. (set-match-data
  336. (list
  337. (nth 0 match-data) (point-max) ;; whole match
  338. (nth 2 match-data) (nth 3 match-data) ;; quoter name
  339. (nth 4 match-data) (nth 5 match-data) ;; opening bar
  340. (nth 5 match-data) (point-max) ;; inner string
  341. nil nil)) ;; closing bar
  342. ))
  343. t)
  344. ;; restore old match data if not matched
  345. (set-match-data match-data-old)
  346. nil)))
  347. (defun haskell-lexeme-classify-by-first-char (char)
  348. "Classify token by CHAR.
  349. CHAR is a chararacter that is assumed to be the first character
  350. of a token."
  351. (let ((category (get-char-code-property (or char ?\ ) 'general-category)))
  352. (cond
  353. ((or (member char '(?! ?# ?$ ?% ?& ?* ?+ ?. ?/ ?< ?= ?> ?? ?@ ?^ ?| ?~ ?\\ ?-))
  354. (and (> char 127)
  355. (member category '(Pc Pd Po Sm Sc Sk So))))
  356. 'varsym)
  357. ((equal char ?:)
  358. 'consym)
  359. ((equal char ?\')
  360. 'char)
  361. ((equal char ?\")
  362. 'string)
  363. ((member category '(Lu Lt))
  364. 'conid)
  365. ((or (equal char ?_)
  366. (member category '(Ll Lo)))
  367. 'varid)
  368. ((and (>= char ?0) (<= char ?9))
  369. 'number)
  370. ((member char '(?\] ?\[ ?\( ?\) ?\{ ?\} ?\` ?\, ?\;))
  371. 'special))))
  372. (defun haskell-lexeme-looking-at-token (&rest flags)
  373. "Like `looking-at' but understands Haskell lexemes.
  374. Moves point forward over whitespace. Returns a symbol describing
  375. type of Haskell token recognized. Use `match-string',
  376. `match-beginning' and `match-end' with argument 0 to query match
  377. result.
  378. Possible results are:
  379. - 'special: for chars [](){}`,;
  380. - 'comment: for single line comments
  381. - 'nested-comment: for multiline comments
  382. - 'qsymid: for qualified identifiers or symbols
  383. - 'string: for strings literals
  384. - 'char: for char literals
  385. - 'number: for decimal, float, hexadecimal and octal number literals
  386. - 'template-haskell-quote: for a string of apostrophes for template haskell
  387. - 'template-haskell-quasi-quote: for a string of apostrophes for template haskell
  388. Note that for qualified symbols (match-string 1) returns the
  389. unqualified identifier or symbol. Further qualification for
  390. symbol or identifier can be done with:
  391. (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1)))
  392. See `haskell-lexeme-classify-by-first-char' for details."
  393. (while
  394. ;; Due to how unterminated strings terminate at newline, some
  395. ;; newlines have syntax set to generic string delimeter. We want
  396. ;; those to be treated as whitespace anyway
  397. (or
  398. (> (skip-syntax-forward "-") 0)
  399. (and (not (member 'newline flags))
  400. (> (skip-chars-forward "\n") 0))))
  401. (let
  402. ((case-fold-search nil)
  403. (point (point-marker)))
  404. (or
  405. (and
  406. (equal (string-to-syntax "<")
  407. (get-char-property (point) 'syntax-table))
  408. (progn
  409. (set-match-data (list point (set-marker (make-marker) (line-end-position))))
  410. 'literate-comment))
  411. (and (looking-at "\n")
  412. 'newline)
  413. (and (looking-at "{-")
  414. (save-excursion
  415. (forward-comment 1)
  416. (set-match-data (list point (point-marker)))
  417. 'nested-comment))
  418. (and (haskell-lexeme-looking-at-char-literal)
  419. 'char)
  420. (and (haskell-lexeme-looking-at-string-literal)
  421. 'string)
  422. (and (looking-at "[][(){}`,;]")
  423. (if (haskell-lexeme-looking-at-quasi-quote-literal)
  424. 'template-haskell-quasi-quote
  425. 'special))
  426. (and (haskell-lexeme-looking-at-qidsym)
  427. (if (save-match-data
  428. (string-match "\\`---*\\'" (match-string-no-properties 0)))
  429. (progn
  430. (set-match-data (list point (set-marker (make-marker) (line-end-position))))
  431. 'comment)
  432. 'qsymid))
  433. (and (looking-at haskell-lexeme-number)
  434. 'number)
  435. (and (looking-at "'+")
  436. 'template-haskell-quote)
  437. (and (looking-at ".")
  438. 'illegal))))
  439. (provide 'haskell-lexeme)
  440. ;;; haskell-lexeme.el ends here