In this repo i store all my websites, each in a different branch
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

1896 行
50 KiB

  1. <?php
  2. /**
  3. * Markdown - A text-to-HTML conversion tool for web writers
  4. *
  5. * @package php-markdown
  6. * @author Michel Fortin <michel.fortin@michelf.com>
  7. * @copyright 2004-2016 Michel Fortin <https://michelf.com/projects/php-markdown/>
  8. * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
  9. */
  10. namespace Michelf;
  11. /**
  12. * Markdown Parser Class
  13. */
  14. class Markdown implements MarkdownInterface {
  15. /**
  16. * Define the package version
  17. * @var string
  18. */
  19. const MARKDOWNLIB_VERSION = "1.7.0";
  20. /**
  21. * Simple function interface - Initialize the parser and return the result
  22. * of its transform method. This will work fine for derived classes too.
  23. *
  24. * @api
  25. *
  26. * @param string $text
  27. * @return string
  28. */
  29. public static function defaultTransform($text) {
  30. // Take parser class on which this function was called.
  31. $parser_class = \get_called_class();
  32. // Try to take parser from the static parser list
  33. static $parser_list;
  34. $parser =& $parser_list[$parser_class];
  35. // Create the parser it not already set
  36. if (!$parser) {
  37. $parser = new $parser_class;
  38. }
  39. // Transform text using parser.
  40. return $parser->transform($text);
  41. }
  42. /**
  43. * Configuration variables
  44. */
  45. /**
  46. * Change to ">" for HTML output.
  47. * @var string
  48. */
  49. public $empty_element_suffix = " />";
  50. /**
  51. * The width of indentation of the output markup
  52. * @var int
  53. */
  54. public $tab_width = 4;
  55. /**
  56. * Change to `true` to disallow markup or entities.
  57. * @var boolean
  58. */
  59. public $no_markup = false;
  60. public $no_entities = false;
  61. /**
  62. * Change to `true` to enable line breaks on \n without two trailling spaces
  63. * @var boolean
  64. */
  65. public $hard_wrap = false;
  66. /**
  67. * Predefined URLs and titles for reference links and images.
  68. * @var array
  69. */
  70. public $predef_urls = array();
  71. public $predef_titles = array();
  72. /**
  73. * Optional filter function for URLs
  74. * @var callable
  75. */
  76. public $url_filter_func = null;
  77. /**
  78. * Optional header id="" generation callback function.
  79. * @var callable
  80. */
  81. public $header_id_func = null;
  82. /**
  83. * Optional function for converting code block content to HTML
  84. * @var callable
  85. */
  86. public $code_block_content_func = null;
  87. /**
  88. * Optional function for converting code span content to HTML.
  89. * @var callable
  90. */
  91. public $code_span_content_func = null;
  92. /**
  93. * Class attribute to toggle "enhanced ordered list" behaviour
  94. * setting this to true will allow ordered lists to start from the index
  95. * number that is defined first.
  96. *
  97. * For example:
  98. * 2. List item two
  99. * 3. List item three
  100. *
  101. * Becomes:
  102. * <ol start="2">
  103. * <li>List item two</li>
  104. * <li>List item three</li>
  105. * </ol>
  106. *
  107. * @var bool
  108. */
  109. public $enhanced_ordered_list = false;
  110. /**
  111. * Parser implementation
  112. */
  113. /**
  114. * Regex to match balanced [brackets].
  115. * Needed to insert a maximum bracked depth while converting to PHP.
  116. * @var int
  117. */
  118. protected $nested_brackets_depth = 6;
  119. protected $nested_brackets_re;
  120. protected $nested_url_parenthesis_depth = 4;
  121. protected $nested_url_parenthesis_re;
  122. /**
  123. * Table of hash values for escaped characters:
  124. * @var string
  125. */
  126. protected $escape_chars = '\`*_{}[]()>#+-.!';
  127. protected $escape_chars_re;
  128. /**
  129. * Constructor function. Initialize appropriate member variables.
  130. * @return void
  131. */
  132. public function __construct() {
  133. $this->_initDetab();
  134. $this->prepareItalicsAndBold();
  135. $this->nested_brackets_re =
  136. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
  137. str_repeat('\])*', $this->nested_brackets_depth);
  138. $this->nested_url_parenthesis_re =
  139. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
  140. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
  141. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
  142. // Sort document, block, and span gamut in ascendent priority order.
  143. asort($this->document_gamut);
  144. asort($this->block_gamut);
  145. asort($this->span_gamut);
  146. }
  147. /**
  148. * Internal hashes used during transformation.
  149. * @var array
  150. */
  151. protected $urls = array();
  152. protected $titles = array();
  153. protected $html_hashes = array();
  154. /**
  155. * Status flag to avoid invalid nesting.
  156. * @var boolean
  157. */
  158. protected $in_anchor = false;
  159. /**
  160. * Called before the transformation process starts to setup parser states.
  161. * @return void
  162. */
  163. protected function setup() {
  164. // Clear global hashes.
  165. $this->urls = $this->predef_urls;
  166. $this->titles = $this->predef_titles;
  167. $this->html_hashes = array();
  168. $this->in_anchor = false;
  169. }
  170. /**
  171. * Called after the transformation process to clear any variable which may
  172. * be taking up memory unnecessarly.
  173. * @return void
  174. */
  175. protected function teardown() {
  176. $this->urls = array();
  177. $this->titles = array();
  178. $this->html_hashes = array();
  179. }
  180. /**
  181. * Main function. Performs some preprocessing on the input text and pass
  182. * it through the document gamut.
  183. *
  184. * @api
  185. *
  186. * @param string $text
  187. * @return string
  188. */
  189. public function transform($text) {
  190. $this->setup();
  191. # Remove UTF-8 BOM and marker character in input, if present.
  192. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
  193. # Standardize line endings:
  194. # DOS to Unix and Mac to Unix
  195. $text = preg_replace('{\r\n?}', "\n", $text);
  196. # Make sure $text ends with a couple of newlines:
  197. $text .= "\n\n";
  198. # Convert all tabs to spaces.
  199. $text = $this->detab($text);
  200. # Turn block-level HTML blocks into hash entries
  201. $text = $this->hashHTMLBlocks($text);
  202. # Strip any lines consisting only of spaces and tabs.
  203. # This makes subsequent regexen easier to write, because we can
  204. # match consecutive blank lines with /\n+/ instead of something
  205. # contorted like /[ ]*\n+/ .
  206. $text = preg_replace('/^[ ]+$/m', '', $text);
  207. # Run document gamut methods.
  208. foreach ($this->document_gamut as $method => $priority) {
  209. $text = $this->$method($text);
  210. }
  211. $this->teardown();
  212. return $text . "\n";
  213. }
  214. /**
  215. * Define the document gamut
  216. * @var array
  217. */
  218. protected $document_gamut = array(
  219. // Strip link definitions, store in hashes.
  220. "stripLinkDefinitions" => 20,
  221. "runBasicBlockGamut" => 30,
  222. );
  223. /**
  224. * Strips link definitions from text, stores the URLs and titles in
  225. * hash references
  226. * @param string $text
  227. * @return string
  228. */
  229. protected function stripLinkDefinitions($text) {
  230. $less_than_tab = $this->tab_width - 1;
  231. // Link defs are in the form: ^[id]: url "optional title"
  232. $text = preg_replace_callback('{
  233. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  234. [ ]*
  235. \n? # maybe *one* newline
  236. [ ]*
  237. (?:
  238. <(.+?)> # url = $2
  239. |
  240. (\S+?) # url = $3
  241. )
  242. [ ]*
  243. \n? # maybe one newline
  244. [ ]*
  245. (?:
  246. (?<=\s) # lookbehind for whitespace
  247. ["(]
  248. (.*?) # title = $4
  249. [")]
  250. [ ]*
  251. )? # title is optional
  252. (?:\n+|\Z)
  253. }xm',
  254. array($this, '_stripLinkDefinitions_callback'),
  255. $text
  256. );
  257. return $text;
  258. }
  259. /**
  260. * The callback to strip link definitions
  261. * @param array $matches
  262. * @return string
  263. */
  264. protected function _stripLinkDefinitions_callback($matches) {
  265. $link_id = strtolower($matches[1]);
  266. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  267. $this->urls[$link_id] = $url;
  268. $this->titles[$link_id] =& $matches[4];
  269. return ''; // String that will replace the block
  270. }
  271. /**
  272. * Hashify HTML blocks
  273. * @param string $text
  274. * @return string
  275. */
  276. protected function hashHTMLBlocks($text) {
  277. if ($this->no_markup) {
  278. return $text;
  279. }
  280. $less_than_tab = $this->tab_width - 1;
  281. /**
  282. * Hashify HTML blocks:
  283. *
  284. * We only want to do this for block-level HTML tags, such as headers,
  285. * lists, and tables. That's because we still want to wrap <p>s around
  286. * "paragraphs" that are wrapped in non-block-level tags, such as
  287. * anchors, phrase emphasis, and spans. The list of tags we're looking
  288. * for is hard-coded:
  289. *
  290. * * List "a" is made of tags which can be both inline or block-level.
  291. * These will be treated block-level when the start tag is alone on
  292. * its line, otherwise they're not matched here and will be taken as
  293. * inline later.
  294. * * List "b" is made of tags which are always block-level;
  295. */
  296. $block_tags_a_re = 'ins|del';
  297. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
  298. 'script|noscript|style|form|fieldset|iframe|math|svg|'.
  299. 'article|section|nav|aside|hgroup|header|footer|'.
  300. 'figure';
  301. // Regular expression for the content of a block tag.
  302. $nested_tags_level = 4;
  303. $attr = '
  304. (?> # optional tag attributes
  305. \s # starts with whitespace
  306. (?>
  307. [^>"/]+ # text outside quotes
  308. |
  309. /+(?!>) # slash not followed by ">"
  310. |
  311. "[^"]*" # text inside double quotes (tolerate ">")
  312. |
  313. \'[^\']*\' # text inside single quotes (tolerate ">")
  314. )*
  315. )?
  316. ';
  317. $content =
  318. str_repeat('
  319. (?>
  320. [^<]+ # content without tag
  321. |
  322. <\2 # nested opening tag
  323. '.$attr.' # attributes
  324. (?>
  325. />
  326. |
  327. >', $nested_tags_level). // end of opening tag
  328. '.*?'. // last level nested tag content
  329. str_repeat('
  330. </\2\s*> # closing nested tag
  331. )
  332. |
  333. <(?!/\2\s*> # other tags with a different name
  334. )
  335. )*',
  336. $nested_tags_level);
  337. $content2 = str_replace('\2', '\3', $content);
  338. /**
  339. * First, look for nested blocks, e.g.:
  340. * <div>
  341. * <div>
  342. * tags for inner block must be indented.
  343. * </div>
  344. * </div>
  345. *
  346. * The outermost tags must start at the left margin for this to match,
  347. * and the inner nested divs must be indented.
  348. * We need to do this before the next, more liberal match, because the
  349. * next match will start at the first `<div>` and stop at the
  350. * first `</div>`.
  351. */
  352. $text = preg_replace_callback('{(?>
  353. (?>
  354. (?<=\n) # Starting on its own line
  355. | # or
  356. \A\n? # the at beginning of the doc
  357. )
  358. ( # save in $1
  359. # Match from `\n<tag>` to `</tag>\n`, handling nested tags
  360. # in between.
  361. [ ]{0,'.$less_than_tab.'}
  362. <('.$block_tags_b_re.')# start tag = $2
  363. '.$attr.'> # attributes followed by > and \n
  364. '.$content.' # content, support nesting
  365. </\2> # the matching end tag
  366. [ ]* # trailing spaces/tabs
  367. (?=\n+|\Z) # followed by a newline or end of document
  368. | # Special version for tags of group a.
  369. [ ]{0,'.$less_than_tab.'}
  370. <('.$block_tags_a_re.')# start tag = $3
  371. '.$attr.'>[ ]*\n # attributes followed by >
  372. '.$content2.' # content, support nesting
  373. </\3> # the matching end tag
  374. [ ]* # trailing spaces/tabs
  375. (?=\n+|\Z) # followed by a newline or end of document
  376. | # Special case just for <hr />. It was easier to make a special
  377. # case than to make the other regex more complicated.
  378. [ ]{0,'.$less_than_tab.'}
  379. <(hr) # start tag = $2
  380. '.$attr.' # attributes
  381. /?> # the matching end tag
  382. [ ]*
  383. (?=\n{2,}|\Z) # followed by a blank line or end of document
  384. | # Special case for standalone HTML comments:
  385. [ ]{0,'.$less_than_tab.'}
  386. (?s:
  387. <!-- .*? -->
  388. )
  389. [ ]*
  390. (?=\n{2,}|\Z) # followed by a blank line or end of document
  391. | # PHP and ASP-style processor instructions (<? and <%)
  392. [ ]{0,'.$less_than_tab.'}
  393. (?s:
  394. <([?%]) # $2
  395. .*?
  396. \2>
  397. )
  398. [ ]*
  399. (?=\n{2,}|\Z) # followed by a blank line or end of document
  400. )
  401. )}Sxmi',
  402. array($this, '_hashHTMLBlocks_callback'),
  403. $text
  404. );
  405. return $text;
  406. }
  407. /**
  408. * The callback for hashing HTML blocks
  409. * @param string $matches
  410. * @return string
  411. */
  412. protected function _hashHTMLBlocks_callback($matches) {
  413. $text = $matches[1];
  414. $key = $this->hashBlock($text);
  415. return "\n\n$key\n\n";
  416. }
  417. /**
  418. * Called whenever a tag must be hashed when a function insert an atomic
  419. * element in the text stream. Passing $text to through this function gives
  420. * a unique text-token which will be reverted back when calling unhash.
  421. *
  422. * The $boundary argument specify what character should be used to surround
  423. * the token. By convension, "B" is used for block elements that needs not
  424. * to be wrapped into paragraph tags at the end, ":" is used for elements
  425. * that are word separators and "X" is used in the general case.
  426. *
  427. * @param string $text
  428. * @param string $boundary
  429. * @return string
  430. */
  431. protected function hashPart($text, $boundary = 'X') {
  432. // Swap back any tag hash found in $text so we do not have to `unhash`
  433. // multiple times at the end.
  434. $text = $this->unhash($text);
  435. // Then hash the block.
  436. static $i = 0;
  437. $key = "$boundary\x1A" . ++$i . $boundary;
  438. $this->html_hashes[$key] = $text;
  439. return $key; // String that will replace the tag.
  440. }
  441. /**
  442. * Shortcut function for hashPart with block-level boundaries.
  443. * @param string $text
  444. * @return string
  445. */
  446. protected function hashBlock($text) {
  447. return $this->hashPart($text, 'B');
  448. }
  449. /**
  450. * Define the block gamut - these are all the transformations that form
  451. * block-level tags like paragraphs, headers, and list items.
  452. * @var array
  453. */
  454. protected $block_gamut = array(
  455. "doHeaders" => 10,
  456. "doHorizontalRules" => 20,
  457. "doLists" => 40,
  458. "doCodeBlocks" => 50,
  459. "doBlockQuotes" => 60,
  460. );
  461. /**
  462. * Run block gamut tranformations.
  463. *
  464. * We need to escape raw HTML in Markdown source before doing anything
  465. * else. This need to be done for each block, and not only at the
  466. * begining in the Markdown function since hashed blocks can be part of
  467. * list items and could have been indented. Indented blocks would have
  468. * been seen as a code block in a previous pass of hashHTMLBlocks.
  469. *
  470. * @param string $text
  471. * @return string
  472. */
  473. protected function runBlockGamut($text) {
  474. $text = $this->hashHTMLBlocks($text);
  475. return $this->runBasicBlockGamut($text);
  476. }
  477. /**
  478. * Run block gamut tranformations, without hashing HTML blocks. This is
  479. * useful when HTML blocks are known to be already hashed, like in the first
  480. * whole-document pass.
  481. *
  482. * @param string $text
  483. * @return string
  484. */
  485. protected function runBasicBlockGamut($text) {
  486. foreach ($this->block_gamut as $method => $priority) {
  487. $text = $this->$method($text);
  488. }
  489. // Finally form paragraph and restore hashed blocks.
  490. $text = $this->formParagraphs($text);
  491. return $text;
  492. }
  493. /**
  494. * Convert horizontal rules
  495. * @param string $text
  496. * @return string
  497. */
  498. protected function doHorizontalRules($text) {
  499. return preg_replace(
  500. '{
  501. ^[ ]{0,3} # Leading space
  502. ([-*_]) # $1: First marker
  503. (?> # Repeated marker group
  504. [ ]{0,2} # Zero, one, or two spaces.
  505. \1 # Marker character
  506. ){2,} # Group repeated at least twice
  507. [ ]* # Tailing spaces
  508. $ # End of line.
  509. }mx',
  510. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
  511. $text
  512. );
  513. }
  514. /**
  515. * These are all the transformations that occur *within* block-level
  516. * tags like paragraphs, headers, and list items.
  517. * @var array
  518. */
  519. protected $span_gamut = array(
  520. // Process character escapes, code spans, and inline HTML
  521. // in one shot.
  522. "parseSpan" => -30,
  523. // Process anchor and image tags. Images must come first,
  524. // because ![foo][f] looks like an anchor.
  525. "doImages" => 10,
  526. "doAnchors" => 20,
  527. // Make links out of things like `<https://example.com/>`
  528. // Must come after doAnchors, because you can use < and >
  529. // delimiters in inline links like [this](<url>).
  530. "doAutoLinks" => 30,
  531. "encodeAmpsAndAngles" => 40,
  532. "doItalicsAndBold" => 50,
  533. "doHardBreaks" => 60,
  534. );
  535. /**
  536. * Run span gamut transformations
  537. * @param string $text
  538. * @return string
  539. */
  540. protected function runSpanGamut($text) {
  541. foreach ($this->span_gamut as $method => $priority) {
  542. $text = $this->$method($text);
  543. }
  544. return $text;
  545. }
  546. /**
  547. * Do hard breaks
  548. * @param string $text
  549. * @return string
  550. */
  551. protected function doHardBreaks($text) {
  552. if ($this->hard_wrap) {
  553. return preg_replace_callback('/ *\n/',
  554. array($this, '_doHardBreaks_callback'), $text);
  555. } else {
  556. return preg_replace_callback('/ {2,}\n/',
  557. array($this, '_doHardBreaks_callback'), $text);
  558. }
  559. }
  560. /**
  561. * Trigger part hashing for the hard break (callback method)
  562. * @param array $matches
  563. * @return string
  564. */
  565. protected function _doHardBreaks_callback($matches) {
  566. return $this->hashPart("<br$this->empty_element_suffix\n");
  567. }
  568. /**
  569. * Turn Markdown link shortcuts into XHTML <a> tags.
  570. * @param string $text
  571. * @return string
  572. */
  573. protected function doAnchors($text) {
  574. if ($this->in_anchor) {
  575. return $text;
  576. }
  577. $this->in_anchor = true;
  578. // First, handle reference-style links: [link text] [id]
  579. $text = preg_replace_callback('{
  580. ( # wrap whole match in $1
  581. \[
  582. ('.$this->nested_brackets_re.') # link text = $2
  583. \]
  584. [ ]? # one optional space
  585. (?:\n[ ]*)? # one optional newline followed by spaces
  586. \[
  587. (.*?) # id = $3
  588. \]
  589. )
  590. }xs',
  591. array($this, '_doAnchors_reference_callback'), $text);
  592. // Next, inline-style links: [link text](url "optional title")
  593. $text = preg_replace_callback('{
  594. ( # wrap whole match in $1
  595. \[
  596. ('.$this->nested_brackets_re.') # link text = $2
  597. \]
  598. \( # literal paren
  599. [ \n]*
  600. (?:
  601. <(.+?)> # href = $3
  602. |
  603. ('.$this->nested_url_parenthesis_re.') # href = $4
  604. )
  605. [ \n]*
  606. ( # $5
  607. ([\'"]) # quote char = $6
  608. (.*?) # Title = $7
  609. \6 # matching quote
  610. [ \n]* # ignore any spaces/tabs between closing quote and )
  611. )? # title is optional
  612. \)
  613. )
  614. }xs',
  615. array($this, '_doAnchors_inline_callback'), $text);
  616. // Last, handle reference-style shortcuts: [link text]
  617. // These must come last in case you've also got [link text][1]
  618. // or [link text](/foo)
  619. $text = preg_replace_callback('{
  620. ( # wrap whole match in $1
  621. \[
  622. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  623. \]
  624. )
  625. }xs',
  626. array($this, '_doAnchors_reference_callback'), $text);
  627. $this->in_anchor = false;
  628. return $text;
  629. }
  630. /**
  631. * Callback method to parse referenced anchors
  632. * @param string $matches
  633. * @return string
  634. */
  635. protected function _doAnchors_reference_callback($matches) {
  636. $whole_match = $matches[1];
  637. $link_text = $matches[2];
  638. $link_id =& $matches[3];
  639. if ($link_id == "") {
  640. // for shortcut links like [this][] or [this].
  641. $link_id = $link_text;
  642. }
  643. // lower-case and turn embedded newlines into spaces
  644. $link_id = strtolower($link_id);
  645. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  646. if (isset($this->urls[$link_id])) {
  647. $url = $this->urls[$link_id];
  648. $url = $this->encodeURLAttribute($url);
  649. $result = "<a href=\"$url\"";
  650. if ( isset( $this->titles[$link_id] ) ) {
  651. $title = $this->titles[$link_id];
  652. $title = $this->encodeAttribute($title);
  653. $result .= " title=\"$title\"";
  654. }
  655. $link_text = $this->runSpanGamut($link_text);
  656. $result .= ">$link_text</a>";
  657. $result = $this->hashPart($result);
  658. } else {
  659. $result = $whole_match;
  660. }
  661. return $result;
  662. }
  663. /**
  664. * Callback method to parse inline anchors
  665. * @param string $matches
  666. * @return string
  667. */
  668. protected function _doAnchors_inline_callback($matches) {
  669. $whole_match = $matches[1];
  670. $link_text = $this->runSpanGamut($matches[2]);
  671. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  672. $title =& $matches[7];
  673. // If the URL was of the form <s p a c e s> it got caught by the HTML
  674. // tag parser and hashed. Need to reverse the process before using
  675. // the URL.
  676. $unhashed = $this->unhash($url);
  677. if ($unhashed != $url)
  678. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  679. $url = $this->encodeURLAttribute($url);
  680. $result = "<a href=\"$url\"";
  681. if (isset($title)) {
  682. $title = $this->encodeAttribute($title);
  683. $result .= " title=\"$title\"";
  684. }
  685. $link_text = $this->runSpanGamut($link_text);
  686. $result .= ">$link_text</a>";
  687. return $this->hashPart($result);
  688. }
  689. /**
  690. * Turn Markdown image shortcuts into <img> tags.
  691. * @param string $text
  692. * @return string
  693. */
  694. protected function doImages($text) {
  695. // First, handle reference-style labeled images: ![alt text][id]
  696. $text = preg_replace_callback('{
  697. ( # wrap whole match in $1
  698. !\[
  699. ('.$this->nested_brackets_re.') # alt text = $2
  700. \]
  701. [ ]? # one optional space
  702. (?:\n[ ]*)? # one optional newline followed by spaces
  703. \[
  704. (.*?) # id = $3
  705. \]
  706. )
  707. }xs',
  708. array($this, '_doImages_reference_callback'), $text);
  709. // Next, handle inline images: ![alt text](url "optional title")
  710. // Don't forget: encode * and _
  711. $text = preg_replace_callback('{
  712. ( # wrap whole match in $1
  713. !\[
  714. ('.$this->nested_brackets_re.') # alt text = $2
  715. \]
  716. \s? # One optional whitespace character
  717. \( # literal paren
  718. [ \n]*
  719. (?:
  720. <(\S*)> # src url = $3
  721. |
  722. ('.$this->nested_url_parenthesis_re.') # src url = $4
  723. )
  724. [ \n]*
  725. ( # $5
  726. ([\'"]) # quote char = $6
  727. (.*?) # title = $7
  728. \6 # matching quote
  729. [ \n]*
  730. )? # title is optional
  731. \)
  732. )
  733. }xs',
  734. array($this, '_doImages_inline_callback'), $text);
  735. return $text;
  736. }
  737. /**
  738. * Callback to parse references image tags
  739. * @param array $matches
  740. * @return string
  741. */
  742. protected function _doImages_reference_callback($matches) {
  743. $whole_match = $matches[1];
  744. $alt_text = $matches[2];
  745. $link_id = strtolower($matches[3]);
  746. if ($link_id == "") {
  747. $link_id = strtolower($alt_text); // for shortcut links like ![this][].
  748. }
  749. $alt_text = $this->encodeAttribute($alt_text);
  750. if (isset($this->urls[$link_id])) {
  751. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  752. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  753. if (isset($this->titles[$link_id])) {
  754. $title = $this->titles[$link_id];
  755. $title = $this->encodeAttribute($title);
  756. $result .= " title=\"$title\"";
  757. }
  758. $result .= $this->empty_element_suffix;
  759. $result = $this->hashPart($result);
  760. } else {
  761. // If there's no such link ID, leave intact:
  762. $result = $whole_match;
  763. }
  764. return $result;
  765. }
  766. /**
  767. * Callback to parse inline image tags
  768. * @param array $matches
  769. * @return string
  770. */
  771. protected function _doImages_inline_callback($matches) {
  772. $whole_match = $matches[1];
  773. $alt_text = $matches[2];
  774. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  775. $title =& $matches[7];
  776. $alt_text = $this->encodeAttribute($alt_text);
  777. $url = $this->encodeURLAttribute($url);
  778. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  779. if (isset($title)) {
  780. $title = $this->encodeAttribute($title);
  781. $result .= " title=\"$title\""; // $title already quoted
  782. }
  783. $result .= $this->empty_element_suffix;
  784. return $this->hashPart($result);
  785. }
  786. /**
  787. * Parse Markdown heading elements to HTML
  788. * @param string $text
  789. * @return string
  790. */
  791. protected function doHeaders($text) {
  792. /**
  793. * Setext-style headers:
  794. * Header 1
  795. * ========
  796. *
  797. * Header 2
  798. * --------
  799. */
  800. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
  801. array($this, '_doHeaders_callback_setext'), $text);
  802. /**
  803. * atx-style headers:
  804. * # Header 1
  805. * ## Header 2
  806. * ## Header 2 with closing hashes ##
  807. * ...
  808. * ###### Header 6
  809. */
  810. $text = preg_replace_callback('{
  811. ^(\#{1,6}) # $1 = string of #\'s
  812. [ ]*
  813. (.+?) # $2 = Header text
  814. [ ]*
  815. \#* # optional closing #\'s (not counted)
  816. \n+
  817. }xm',
  818. array($this, '_doHeaders_callback_atx'), $text);
  819. return $text;
  820. }
  821. /**
  822. * Setext header parsing callback
  823. * @param array $matches
  824. * @return string
  825. */
  826. protected function _doHeaders_callback_setext($matches) {
  827. // Terrible hack to check we haven't found an empty list item.
  828. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
  829. return $matches[0];
  830. }
  831. $level = $matches[2]{0} == '=' ? 1 : 2;
  832. // ID attribute generation
  833. $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
  834. $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
  835. return "\n" . $this->hashBlock($block) . "\n\n";
  836. }
  837. /**
  838. * ATX header parsing callback
  839. * @param array $matches
  840. * @return string
  841. */
  842. protected function _doHeaders_callback_atx($matches) {
  843. // ID attribute generation
  844. $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
  845. $level = strlen($matches[1]);
  846. $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
  847. return "\n" . $this->hashBlock($block) . "\n\n";
  848. }
  849. /**
  850. * If a header_id_func property is set, we can use it to automatically
  851. * generate an id attribute.
  852. *
  853. * This method returns a string in the form id="foo", or an empty string
  854. * otherwise.
  855. * @param string $headerValue
  856. * @return string
  857. */
  858. protected function _generateIdFromHeaderValue($headerValue) {
  859. if (!is_callable($this->header_id_func)) {
  860. return "";
  861. }
  862. $idValue = call_user_func($this->header_id_func, $headerValue);
  863. if (!$idValue) {
  864. return "";
  865. }
  866. return ' id="' . $this->encodeAttribute($idValue) . '"';
  867. }
  868. /**
  869. * Form HTML ordered (numbered) and unordered (bulleted) lists.
  870. * @param string $text
  871. * @return string
  872. */
  873. protected function doLists($text) {
  874. $less_than_tab = $this->tab_width - 1;
  875. // Re-usable patterns to match list item bullets and number markers:
  876. $marker_ul_re = '[*+-]';
  877. $marker_ol_re = '\d+[\.]';
  878. $markers_relist = array(
  879. $marker_ul_re => $marker_ol_re,
  880. $marker_ol_re => $marker_ul_re,
  881. );
  882. foreach ($markers_relist as $marker_re => $other_marker_re) {
  883. // Re-usable pattern to match any entirel ul or ol list:
  884. $whole_list_re = '
  885. ( # $1 = whole list
  886. ( # $2
  887. ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces
  888. ('.$marker_re.') # $4 = first list item marker
  889. [ ]+
  890. )
  891. (?s:.+?)
  892. ( # $5
  893. \z
  894. |
  895. \n{2,}
  896. (?=\S)
  897. (?! # Negative lookahead for another list item marker
  898. [ ]*
  899. '.$marker_re.'[ ]+
  900. )
  901. |
  902. (?= # Lookahead for another kind of list
  903. \n
  904. \3 # Must have the same indentation
  905. '.$other_marker_re.'[ ]+
  906. )
  907. )
  908. )
  909. '; // mx
  910. // We use a different prefix before nested lists than top-level lists.
  911. //See extended comment in _ProcessListItems().
  912. if ($this->list_level) {
  913. $text = preg_replace_callback('{
  914. ^
  915. '.$whole_list_re.'
  916. }mx',
  917. array($this, '_doLists_callback'), $text);
  918. } else {
  919. $text = preg_replace_callback('{
  920. (?:(?<=\n)\n|\A\n?) # Must eat the newline
  921. '.$whole_list_re.'
  922. }mx',
  923. array($this, '_doLists_callback'), $text);
  924. }
  925. }
  926. return $text;
  927. }
  928. /**
  929. * List parsing callback
  930. * @param array $matches
  931. * @return string
  932. */
  933. protected function _doLists_callback($matches) {
  934. // Re-usable patterns to match list item bullets and number markers:
  935. $marker_ul_re = '[*+-]';
  936. $marker_ol_re = '\d+[\.]';
  937. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
  938. $marker_ol_start_re = '[0-9]+';
  939. $list = $matches[1];
  940. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
  941. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
  942. $list .= "\n";
  943. $result = $this->processListItems($list, $marker_any_re);
  944. $ol_start = 1;
  945. if ($this->enhanced_ordered_list) {
  946. // Get the start number for ordered list.
  947. if ($list_type == 'ol') {
  948. $ol_start_array = array();
  949. $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
  950. if ($ol_start_check){
  951. $ol_start = $ol_start_array[0];
  952. }
  953. }
  954. }
  955. if ($ol_start > 1 && $list_type == 'ol'){
  956. $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
  957. } else {
  958. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
  959. }
  960. return "\n". $result ."\n\n";
  961. }
  962. /**
  963. * Nesting tracker for list levels
  964. * @var integer
  965. */
  966. protected $list_level = 0;
  967. /**
  968. * Process the contents of a single ordered or unordered list, splitting it
  969. * into individual list items.
  970. * @param string $list_str
  971. * @param string $marker_any_re
  972. * @return string
  973. */
  974. protected function processListItems($list_str, $marker_any_re) {
  975. /**
  976. * The $this->list_level global keeps track of when we're inside a list.
  977. * Each time we enter a list, we increment it; when we leave a list,
  978. * we decrement. If it's zero, we're not in a list anymore.
  979. *
  980. * We do this because when we're not inside a list, we want to treat
  981. * something like this:
  982. *
  983. * I recommend upgrading to version
  984. * 8. Oops, now this line is treated
  985. * as a sub-list.
  986. *
  987. * As a single paragraph, despite the fact that the second line starts
  988. * with a digit-period-space sequence.
  989. *
  990. * Whereas when we're inside a list (or sub-list), that line will be
  991. * treated as the start of a sub-list. What a kludge, huh? This is
  992. * an aspect of Markdown's syntax that's hard to parse perfectly
  993. * without resorting to mind-reading. Perhaps the solution is to
  994. * change the syntax rules such that sub-lists must start with a
  995. * starting cardinal number; e.g. "1." or "a.".
  996. */
  997. $this->list_level++;
  998. // Trim trailing blank lines:
  999. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1000. $list_str = preg_replace_callback('{
  1001. (\n)? # leading line = $1
  1002. (^[ ]*) # leading whitespace = $2
  1003. ('.$marker_any_re.' # list marker and space = $3
  1004. (?:[ ]+|(?=\n)) # space only required if item is not empty
  1005. )
  1006. ((?s:.*?)) # list item text = $4
  1007. (?:(\n+(?=\n))|\n) # tailing blank line = $5
  1008. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
  1009. }xm',
  1010. array($this, '_processListItems_callback'), $list_str);
  1011. $this->list_level--;
  1012. return $list_str;
  1013. }
  1014. /**
  1015. * List item parsing callback
  1016. * @param array $matches
  1017. * @return string
  1018. */
  1019. protected function _processListItems_callback($matches) {
  1020. $item = $matches[4];
  1021. $leading_line =& $matches[1];
  1022. $leading_space =& $matches[2];
  1023. $marker_space = $matches[3];
  1024. $tailing_blank_line =& $matches[5];
  1025. if ($leading_line || $tailing_blank_line ||
  1026. preg_match('/\n{2,}/', $item))
  1027. {
  1028. // Replace marker with the appropriate whitespace indentation
  1029. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
  1030. $item = $this->runBlockGamut($this->outdent($item)."\n");
  1031. } else {
  1032. // Recursion for sub-lists:
  1033. $item = $this->doLists($this->outdent($item));
  1034. $item = $this->formParagraphs($item, false);
  1035. }
  1036. return "<li>" . $item . "</li>\n";
  1037. }
  1038. /**
  1039. * Process Markdown `<pre><code>` blocks.
  1040. * @param string $text
  1041. * @return string
  1042. */
  1043. protected function doCodeBlocks($text) {
  1044. $text = preg_replace_callback('{
  1045. (?:\n\n|\A\n?)
  1046. ( # $1 = the code block -- one or more lines, starting with a space/tab
  1047. (?>
  1048. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces
  1049. .*\n+
  1050. )+
  1051. )
  1052. ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  1053. }xm',
  1054. array($this, '_doCodeBlocks_callback'), $text);
  1055. return $text;
  1056. }
  1057. /**
  1058. * Code block parsing callback
  1059. * @param array $matches
  1060. * @return string
  1061. */
  1062. protected function _doCodeBlocks_callback($matches) {
  1063. $codeblock = $matches[1];
  1064. $codeblock = $this->outdent($codeblock);
  1065. if ($this->code_block_content_func) {
  1066. $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
  1067. } else {
  1068. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  1069. }
  1070. # trim leading newlines and trailing newlines
  1071. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
  1072. $codeblock = "<pre><code>$codeblock\n</code></pre>";
  1073. return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
  1074. }
  1075. /**
  1076. * Create a code span markup for $code. Called from handleSpanToken.
  1077. * @param string $code
  1078. * @return string
  1079. */
  1080. protected function makeCodeSpan($code) {
  1081. if ($this->code_span_content_func) {
  1082. $code = call_user_func($this->code_span_content_func, $code);
  1083. } else {
  1084. $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
  1085. }
  1086. return $this->hashPart("<code>$code</code>");
  1087. }
  1088. /**
  1089. * Define the emphasis operators with their regex matches
  1090. * @var array
  1091. */
  1092. protected $em_relist = array(
  1093. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
  1094. '*' => '(?<![\s*])\*(?!\*)',
  1095. '_' => '(?<![\s_])_(?!_)',
  1096. );
  1097. /**
  1098. * Define the strong operators with their regex matches
  1099. * @var array
  1100. */
  1101. protected $strong_relist = array(
  1102. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
  1103. '**' => '(?<![\s*])\*\*(?!\*)',
  1104. '__' => '(?<![\s_])__(?!_)',
  1105. );
  1106. /**
  1107. * Define the emphasis + strong operators with their regex matches
  1108. * @var array
  1109. */
  1110. protected $em_strong_relist = array(
  1111. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
  1112. '***' => '(?<![\s*])\*\*\*(?!\*)',
  1113. '___' => '(?<![\s_])___(?!_)',
  1114. );
  1115. /**
  1116. * Container for prepared regular expressions
  1117. * @var array
  1118. */
  1119. protected $em_strong_prepared_relist;
  1120. /**
  1121. * Prepare regular expressions for searching emphasis tokens in any
  1122. * context.
  1123. * @return void
  1124. */
  1125. protected function prepareItalicsAndBold() {
  1126. foreach ($this->em_relist as $em => $em_re) {
  1127. foreach ($this->strong_relist as $strong => $strong_re) {
  1128. // Construct list of allowed token expressions.
  1129. $token_relist = array();
  1130. if (isset($this->em_strong_relist["$em$strong"])) {
  1131. $token_relist[] = $this->em_strong_relist["$em$strong"];
  1132. }
  1133. $token_relist[] = $em_re;
  1134. $token_relist[] = $strong_re;
  1135. // Construct master expression from list.
  1136. $token_re = '{(' . implode('|', $token_relist) . ')}';
  1137. $this->em_strong_prepared_relist["$em$strong"] = $token_re;
  1138. }
  1139. }
  1140. }
  1141. /**
  1142. * Convert Markdown italics (emphasis) and bold (strong) to HTML
  1143. * @param string $text
  1144. * @return string
  1145. */
  1146. protected function doItalicsAndBold($text) {
  1147. $token_stack = array('');
  1148. $text_stack = array('');
  1149. $em = '';
  1150. $strong = '';
  1151. $tree_char_em = false;
  1152. while (1) {
  1153. // Get prepared regular expression for seraching emphasis tokens
  1154. // in current context.
  1155. $token_re = $this->em_strong_prepared_relist["$em$strong"];
  1156. // Each loop iteration search for the next emphasis token.
  1157. // Each token is then passed to handleSpanToken.
  1158. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  1159. $text_stack[0] .= $parts[0];
  1160. $token =& $parts[1];
  1161. $text =& $parts[2];
  1162. if (empty($token)) {
  1163. // Reached end of text span: empty stack without emitting.
  1164. // any more emphasis.
  1165. while ($token_stack[0]) {
  1166. $text_stack[1] .= array_shift($token_stack);
  1167. $text_stack[0] .= array_shift($text_stack);
  1168. }
  1169. break;
  1170. }
  1171. $token_len = strlen($token);
  1172. if ($tree_char_em) {
  1173. // Reached closing marker while inside a three-char emphasis.
  1174. if ($token_len == 3) {
  1175. // Three-char closing marker, close em and strong.
  1176. array_shift($token_stack);
  1177. $span = array_shift($text_stack);
  1178. $span = $this->runSpanGamut($span);
  1179. $span = "<strong><em>$span</em></strong>";
  1180. $text_stack[0] .= $this->hashPart($span);
  1181. $em = '';
  1182. $strong = '';
  1183. } else {
  1184. // Other closing marker: close one em or strong and
  1185. // change current token state to match the other
  1186. $token_stack[0] = str_repeat($token{0}, 3-$token_len);
  1187. $tag = $token_len == 2 ? "strong" : "em";
  1188. $span = $text_stack[0];
  1189. $span = $this->runSpanGamut($span);
  1190. $span = "<$tag>$span</$tag>";
  1191. $text_stack[0] = $this->hashPart($span);
  1192. $$tag = ''; // $$tag stands for $em or $strong
  1193. }
  1194. $tree_char_em = false;
  1195. } else if ($token_len == 3) {
  1196. if ($em) {
  1197. // Reached closing marker for both em and strong.
  1198. // Closing strong marker:
  1199. for ($i = 0; $i < 2; ++$i) {
  1200. $shifted_token = array_shift($token_stack);
  1201. $tag = strlen($shifted_token) == 2 ? "strong" : "em";
  1202. $span = array_shift($text_stack);
  1203. $span = $this->runSpanGamut($span);
  1204. $span = "<$tag>$span</$tag>";
  1205. $text_stack[0] .= $this->hashPart($span);
  1206. $$tag = ''; // $$tag stands for $em or $strong
  1207. }
  1208. } else {
  1209. // Reached opening three-char emphasis marker. Push on token
  1210. // stack; will be handled by the special condition above.
  1211. $em = $token{0};
  1212. $strong = "$em$em";
  1213. array_unshift($token_stack, $token);
  1214. array_unshift($text_stack, '');
  1215. $tree_char_em = true;
  1216. }
  1217. } else if ($token_len == 2) {
  1218. if ($strong) {
  1219. // Unwind any dangling emphasis marker:
  1220. if (strlen($token_stack[0]) == 1) {
  1221. $text_stack[1] .= array_shift($token_stack);
  1222. $text_stack[0] .= array_shift($text_stack);
  1223. }
  1224. // Closing strong marker:
  1225. array_shift($token_stack);
  1226. $span = array_shift($text_stack);
  1227. $span = $this->runSpanGamut($span);
  1228. $span = "<strong>$span</strong>";
  1229. $text_stack[0] .= $this->hashPart($span);
  1230. $strong = '';
  1231. } else {
  1232. array_unshift($token_stack, $token);
  1233. array_unshift($text_stack, '');
  1234. $strong = $token;
  1235. }
  1236. } else {
  1237. // Here $token_len == 1
  1238. if ($em) {
  1239. if (strlen($token_stack[0]) == 1) {
  1240. // Closing emphasis marker:
  1241. array_shift($token_stack);
  1242. $span = array_shift($text_stack);
  1243. $span = $this->runSpanGamut($span);
  1244. $span = "<em>$span</em>";
  1245. $text_stack[0] .= $this->hashPart($span);
  1246. $em = '';
  1247. } else {
  1248. $text_stack[0] .= $token;
  1249. }
  1250. } else {
  1251. array_unshift($token_stack, $token);
  1252. array_unshift($text_stack, '');
  1253. $em = $token;
  1254. }
  1255. }
  1256. }
  1257. return $text_stack[0];
  1258. }
  1259. /**
  1260. * Parse Markdown blockquotes to HTML
  1261. * @param string $text
  1262. * @return string
  1263. */
  1264. protected function doBlockQuotes($text) {
  1265. $text = preg_replace_callback('/
  1266. ( # Wrap whole match in $1
  1267. (?>
  1268. ^[ ]*>[ ]? # ">" at the start of a line
  1269. .+\n # rest of the first line
  1270. (.+\n)* # subsequent consecutive lines
  1271. \n* # blanks
  1272. )+
  1273. )
  1274. /xm',
  1275. array($this, '_doBlockQuotes_callback'), $text);
  1276. return $text;
  1277. }
  1278. /**
  1279. * Blockquote parsing callback
  1280. * @param array $matches
  1281. * @return string
  1282. */
  1283. protected function _doBlockQuotes_callback($matches) {
  1284. $bq = $matches[1];
  1285. // trim one level of quoting - trim whitespace-only lines
  1286. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
  1287. $bq = $this->runBlockGamut($bq); // recurse
  1288. $bq = preg_replace('/^/m', " ", $bq);
  1289. // These leading spaces cause problem with <pre> content,
  1290. // so we need to fix that:
  1291. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
  1292. array($this, '_doBlockQuotes_callback2'), $bq);
  1293. return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
  1294. }
  1295. /**
  1296. * Blockquote parsing callback
  1297. * @param array $matches
  1298. * @return string
  1299. */
  1300. protected function _doBlockQuotes_callback2($matches) {
  1301. $pre = $matches[1];
  1302. $pre = preg_replace('/^ /m', '', $pre);
  1303. return $pre;
  1304. }
  1305. /**
  1306. * Parse paragraphs
  1307. *
  1308. * @param string $text String to process in paragraphs
  1309. * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
  1310. * @return string
  1311. */
  1312. protected function formParagraphs($text, $wrap_in_p = true) {
  1313. // Strip leading and trailing lines:
  1314. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1315. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1316. // Wrap <p> tags and unhashify HTML blocks
  1317. foreach ($grafs as $key => $value) {
  1318. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
  1319. // Is a paragraph.
  1320. $value = $this->runSpanGamut($value);
  1321. if ($wrap_in_p) {
  1322. $value = preg_replace('/^([ ]*)/', "<p>", $value);
  1323. $value .= "</p>";
  1324. }
  1325. $grafs[$key] = $this->unhash($value);
  1326. } else {
  1327. // Is a block.
  1328. // Modify elements of @grafs in-place...
  1329. $graf = $value;
  1330. $block = $this->html_hashes[$graf];
  1331. $graf = $block;
  1332. // if (preg_match('{
  1333. // \A
  1334. // ( # $1 = <div> tag
  1335. // <div \s+
  1336. // [^>]*
  1337. // \b
  1338. // markdown\s*=\s* ([\'"]) # $2 = attr quote char
  1339. // 1
  1340. // \2
  1341. // [^>]*
  1342. // >
  1343. // )
  1344. // ( # $3 = contents
  1345. // .*
  1346. // )
  1347. // (</div>) # $4 = closing tag
  1348. // \z
  1349. // }xs', $block, $matches))
  1350. // {
  1351. // list(, $div_open, , $div_content, $div_close) = $matches;
  1352. //
  1353. // // We can't call Markdown(), because that resets the hash;
  1354. // // that initialization code should be pulled into its own sub, though.
  1355. // $div_content = $this->hashHTMLBlocks($div_content);
  1356. //
  1357. // // Run document gamut methods on the content.
  1358. // foreach ($this->document_gamut as $method => $priority) {
  1359. // $div_content = $this->$method($div_content);
  1360. // }
  1361. //
  1362. // $div_open = preg_replace(
  1363. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
  1364. //
  1365. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
  1366. // }
  1367. $grafs[$key] = $graf;
  1368. }
  1369. }
  1370. return implode("\n\n", $grafs);
  1371. }
  1372. /**
  1373. * Encode text for a double-quoted HTML attribute. This function
  1374. * is *not* suitable for attributes enclosed in single quotes.
  1375. * @param string $text
  1376. * @return string
  1377. */
  1378. protected function encodeAttribute($text) {
  1379. $text = $this->encodeAmpsAndAngles($text);
  1380. $text = str_replace('"', '&quot;', $text);
  1381. return $text;
  1382. }
  1383. /**
  1384. * Encode text for a double-quoted HTML attribute containing a URL,
  1385. * applying the URL filter if set. Also generates the textual
  1386. * representation for the URL (removing mailto: or tel:) storing it in $text.
  1387. * This function is *not* suitable for attributes enclosed in single quotes.
  1388. *
  1389. * @param string $url
  1390. * @param string &$text Passed by reference
  1391. * @return string URL
  1392. */
  1393. protected function encodeURLAttribute($url, &$text = null) {
  1394. if ($this->url_filter_func) {
  1395. $url = call_user_func($this->url_filter_func, $url);
  1396. }
  1397. if (preg_match('{^mailto:}i', $url)) {
  1398. $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
  1399. } else if (preg_match('{^tel:}i', $url)) {
  1400. $url = $this->encodeAttribute($url);
  1401. $text = substr($url, 4);
  1402. } else {
  1403. $url = $this->encodeAttribute($url);
  1404. $text = $url;
  1405. }
  1406. return $url;
  1407. }
  1408. /**
  1409. * Smart processing for ampersands and angle brackets that need to
  1410. * be encoded. Valid character entities are left alone unless the
  1411. * no-entities mode is set.
  1412. * @param string $text
  1413. * @return string
  1414. */
  1415. protected function encodeAmpsAndAngles($text) {
  1416. if ($this->no_entities) {
  1417. $text = str_replace('&', '&amp;', $text);
  1418. } else {
  1419. // Ampersand-encoding based entirely on Nat Irons's Amputator
  1420. // MT plugin: <http://bumppo.net/projects/amputator/>
  1421. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
  1422. '&amp;', $text);
  1423. }
  1424. // Encode remaining <'s
  1425. $text = str_replace('<', '&lt;', $text);
  1426. return $text;
  1427. }
  1428. /**
  1429. * Parse Markdown automatic links to anchor HTML tags
  1430. * @param string $text
  1431. * @return string
  1432. */
  1433. protected function doAutoLinks($text) {
  1434. $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
  1435. array($this, '_doAutoLinks_url_callback'), $text);
  1436. // Email addresses: <address@domain.foo>
  1437. $text = preg_replace_callback('{
  1438. <
  1439. (?:mailto:)?
  1440. (
  1441. (?:
  1442. [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
  1443. |
  1444. ".*?"
  1445. )
  1446. \@
  1447. (?:
  1448. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
  1449. |
  1450. \[[\d.a-fA-F:]+\] # IPv4 & IPv6
  1451. )
  1452. )
  1453. >
  1454. }xi',
  1455. array($this, '_doAutoLinks_email_callback'), $text);
  1456. return $text;
  1457. }
  1458. /**
  1459. * Parse URL callback
  1460. * @param array $matches
  1461. * @return string
  1462. */
  1463. protected function _doAutoLinks_url_callback($matches) {
  1464. $url = $this->encodeURLAttribute($matches[1], $text);
  1465. $link = "<a href=\"$url\">$text</a>";
  1466. return $this->hashPart($link);
  1467. }
  1468. /**
  1469. * Parse email address callback
  1470. * @param array $matches
  1471. * @return string
  1472. */
  1473. protected function _doAutoLinks_email_callback($matches) {
  1474. $addr = $matches[1];
  1475. $url = $this->encodeURLAttribute("mailto:$addr", $text);
  1476. $link = "<a href=\"$url\">$text</a>";
  1477. return $this->hashPart($link);
  1478. }
  1479. /**
  1480. * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
  1481. *
  1482. * Output: the same text but with most characters encoded as either a
  1483. * decimal or hex entity, in the hopes of foiling most address
  1484. * harvesting spam bots. E.g.:
  1485. *
  1486. * &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
  1487. * &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
  1488. * &#x6d;
  1489. *
  1490. * Note: the additional output $tail is assigned the same value as the
  1491. * ouput, minus the number of characters specified by $head_length.
  1492. *
  1493. * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
  1494. * With some optimizations by Milian Wolff. Forced encoding of HTML
  1495. * attribute special characters by Allan Odgaard.
  1496. *
  1497. * @param string $text
  1498. * @param string &$tail
  1499. * @param integer $head_length
  1500. * @return string
  1501. */
  1502. protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
  1503. if ($text == "") {
  1504. return $tail = "";
  1505. }
  1506. $chars = preg_split('/(?<!^)(?!$)/', $text);
  1507. $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
  1508. foreach ($chars as $key => $char) {
  1509. $ord = ord($char);
  1510. // Ignore non-ascii chars.
  1511. if ($ord < 128) {
  1512. $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
  1513. // roughly 10% raw, 45% hex, 45% dec
  1514. // '@' *must* be encoded. I insist.
  1515. // '"' and '>' have to be encoded inside the attribute
  1516. if ($r > 90 && strpos('@"&>', $char) === false) {
  1517. /* do nothing */
  1518. } else if ($r < 45) {
  1519. $chars[$key] = '&#x'.dechex($ord).';';
  1520. } else {
  1521. $chars[$key] = '&#'.$ord.';';
  1522. }
  1523. }
  1524. }
  1525. $text = implode('', $chars);
  1526. $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
  1527. return $text;
  1528. }
  1529. /**
  1530. * Take the string $str and parse it into tokens, hashing embeded HTML,
  1531. * escaped characters and handling code spans.
  1532. * @param string $str
  1533. * @return string
  1534. */
  1535. protected function parseSpan($str) {
  1536. $output = '';
  1537. $span_re = '{
  1538. (
  1539. \\\\'.$this->escape_chars_re.'
  1540. |
  1541. (?<![`\\\\])
  1542. `+ # code span marker
  1543. '.( $this->no_markup ? '' : '
  1544. |
  1545. <!-- .*? --> # comment
  1546. |
  1547. <\?.*?\?> | <%.*?%> # processing instruction
  1548. |
  1549. <[!$]?[-a-zA-Z0-9:_]+ # regular tags
  1550. (?>
  1551. \s
  1552. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
  1553. )?
  1554. >
  1555. |
  1556. <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
  1557. |
  1558. </[-a-zA-Z0-9:_]+\s*> # closing tag
  1559. ').'
  1560. )
  1561. }xs';
  1562. while (1) {
  1563. // Each loop iteration seach for either the next tag, the next
  1564. // openning code span marker, or the next escaped character.
  1565. // Each token is then passed to handleSpanToken.
  1566. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
  1567. // Create token from text preceding tag.
  1568. if ($parts[0] != "") {
  1569. $output .= $parts[0];
  1570. }
  1571. // Check if we reach the end.
  1572. if (isset($parts[1])) {
  1573. $output .= $this->handleSpanToken($parts[1], $parts[2]);
  1574. $str = $parts[2];
  1575. } else {
  1576. break;
  1577. }
  1578. }
  1579. return $output;
  1580. }
  1581. /**
  1582. * Handle $token provided by parseSpan by determining its nature and
  1583. * returning the corresponding value that should replace it.
  1584. * @param string $token
  1585. * @param string &$str
  1586. * @return string
  1587. */
  1588. protected function handleSpanToken($token, &$str) {
  1589. switch ($token{0}) {
  1590. case "\\":
  1591. return $this->hashPart("&#". ord($token{1}). ";");
  1592. case "`":
  1593. // Search for end marker in remaining text.
  1594. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
  1595. $str, $matches))
  1596. {
  1597. $str = $matches[2];
  1598. $codespan = $this->makeCodeSpan($matches[1]);
  1599. return $this->hashPart($codespan);
  1600. }
  1601. return $token; // Return as text since no ending marker found.
  1602. default:
  1603. return $this->hashPart($token);
  1604. }
  1605. }
  1606. /**
  1607. * Remove one level of line-leading tabs or spaces
  1608. * @param string $text
  1609. * @return string
  1610. */
  1611. protected function outdent($text) {
  1612. return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
  1613. }
  1614. /**
  1615. * String length function for detab. `_initDetab` will create a function to
  1616. * handle UTF-8 if the default function does not exist.
  1617. * @var string
  1618. */
  1619. protected $utf8_strlen = 'mb_strlen';
  1620. /**
  1621. * Replace tabs with the appropriate amount of spaces.
  1622. *
  1623. * For each line we separate the line in blocks delemited by tab characters.
  1624. * Then we reconstruct every line by adding the appropriate number of space
  1625. * between each blocks.
  1626. *
  1627. * @param string $text
  1628. * @return string
  1629. */
  1630. protected function detab($text) {
  1631. $text = preg_replace_callback('/^.*\t.*$/m',
  1632. array($this, '_detab_callback'), $text);
  1633. return $text;
  1634. }
  1635. /**
  1636. * Replace tabs callback
  1637. * @param string $matches
  1638. * @return string
  1639. */
  1640. protected function _detab_callback($matches) {
  1641. $line = $matches[0];
  1642. $strlen = $this->utf8_strlen; // strlen function for UTF-8.
  1643. // Split in blocks.
  1644. $blocks = explode("\t", $line);
  1645. // Add each blocks to the line.
  1646. $line = $blocks[0];
  1647. unset($blocks[0]); // Do not add first block twice.
  1648. foreach ($blocks as $block) {
  1649. // Calculate amount of space, insert spaces, insert block.
  1650. $amount = $this->tab_width -
  1651. $strlen($line, 'UTF-8') % $this->tab_width;
  1652. $line .= str_repeat(" ", $amount) . $block;
  1653. }
  1654. return $line;
  1655. }
  1656. /**
  1657. * Check for the availability of the function in the `utf8_strlen` property
  1658. * (initially `mb_strlen`). If the function is not available, create a
  1659. * function that will loosely count the number of UTF-8 characters with a
  1660. * regular expression.
  1661. * @return void
  1662. */
  1663. protected function _initDetab() {
  1664. if (function_exists($this->utf8_strlen)) {
  1665. return;
  1666. }
  1667. $this->utf8_strlen = create_function('$text', 'return preg_match_all(
  1668. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
  1669. $text, $m);');
  1670. }
  1671. /**
  1672. * Swap back in all the tags hashed by _HashHTMLBlocks.
  1673. * @param string $text
  1674. * @return string
  1675. */
  1676. protected function unhash($text) {
  1677. return preg_replace_callback('/(.)\x1A[0-9]+\1/',
  1678. array($this, '_unhash_callback'), $text);
  1679. }
  1680. /**
  1681. * Unhashing callback
  1682. * @param array $matches
  1683. * @return string
  1684. */
  1685. protected function _unhash_callback($matches) {
  1686. return $this->html_hashes[$matches[0]];
  1687. }
  1688. }