In this repo i store all my websites, each in a different branch
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1785 regels
50 KiB

  1. <?php
  2. /**
  3. * Markdown Extra - A text-to-HTML conversion tool for web writers
  4. *
  5. * @package php-markdown
  6. * @author Michel Fortin <michel.fortin@michelf.com>
  7. * @copyright 2004-2016 Michel Fortin <https://michelf.com/projects/php-markdown/>
  8. * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
  9. */
  10. namespace Michelf;
  11. /**
  12. * Markdown Extra Parser Class
  13. */
  14. class MarkdownExtra extends \Michelf\Markdown {
  15. /**
  16. * Configuration variables
  17. */
  18. /**
  19. * Prefix for footnote ids.
  20. * @var string
  21. */
  22. public $fn_id_prefix = "";
  23. /**
  24. * Optional title attribute for footnote links and backlinks.
  25. * @var string
  26. */
  27. public $fn_link_title = "";
  28. public $fn_backlink_title = "";
  29. /**
  30. * Optional class attribute for footnote links and backlinks.
  31. * @var string
  32. */
  33. public $fn_link_class = "footnote-ref";
  34. public $fn_backlink_class = "footnote-backref";
  35. /**
  36. * Content to be displayed within footnote backlinks. The default is '↩';
  37. * the U+FE0E on the end is a Unicode variant selector used to prevent iOS
  38. * from displaying the arrow character as an emoji.
  39. * @var string
  40. */
  41. public $fn_backlink_html = '&#8617;&#xFE0E;';
  42. /**
  43. * Class name for table cell alignment (%% replaced left/center/right)
  44. * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  45. * If empty, the align attribute is used instead of a class name.
  46. * @var string
  47. */
  48. public $table_align_class_tmpl = '';
  49. /**
  50. * Optional class prefix for fenced code block.
  51. * @var string
  52. */
  53. public $code_class_prefix = "";
  54. /**
  55. * Class attribute for code blocks goes on the `code` tag;
  56. * setting this to true will put attributes on the `pre` tag instead.
  57. * @var boolean
  58. */
  59. public $code_attr_on_pre = false;
  60. /**
  61. * Predefined abbreviations.
  62. * @var array
  63. */
  64. public $predef_abbr = array();
  65. /**
  66. * Parser implementation
  67. */
  68. /**
  69. * Constructor function. Initialize the parser object.
  70. * @return void
  71. */
  72. public function __construct() {
  73. // Add extra escapable characters before parent constructor
  74. // initialize the table.
  75. $this->escape_chars .= ':|';
  76. // Insert extra document, block, and span transformations.
  77. // Parent constructor will do the sorting.
  78. $this->document_gamut += array(
  79. "doFencedCodeBlocks" => 5,
  80. "stripFootnotes" => 15,
  81. "stripAbbreviations" => 25,
  82. "appendFootnotes" => 50,
  83. );
  84. $this->block_gamut += array(
  85. "doFencedCodeBlocks" => 5,
  86. "doTables" => 15,
  87. "doDefLists" => 45,
  88. );
  89. $this->span_gamut += array(
  90. "doFootnotes" => 5,
  91. "doAbbreviations" => 70,
  92. );
  93. $this->enhanced_ordered_list = true;
  94. parent::__construct();
  95. }
  96. /**
  97. * Extra variables used during extra transformations.
  98. * @var array
  99. */
  100. protected $footnotes = array();
  101. protected $footnotes_ordered = array();
  102. protected $footnotes_ref_count = array();
  103. protected $footnotes_numbers = array();
  104. protected $abbr_desciptions = array();
  105. /** @var @string */
  106. protected $abbr_word_re = '';
  107. /**
  108. * Give the current footnote number.
  109. * @var integer
  110. */
  111. protected $footnote_counter = 1;
  112. /**
  113. * Setting up Extra-specific variables.
  114. */
  115. protected function setup() {
  116. parent::setup();
  117. $this->footnotes = array();
  118. $this->footnotes_ordered = array();
  119. $this->footnotes_ref_count = array();
  120. $this->footnotes_numbers = array();
  121. $this->abbr_desciptions = array();
  122. $this->abbr_word_re = '';
  123. $this->footnote_counter = 1;
  124. foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
  125. if ($this->abbr_word_re)
  126. $this->abbr_word_re .= '|';
  127. $this->abbr_word_re .= preg_quote($abbr_word);
  128. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  129. }
  130. }
  131. /**
  132. * Clearing Extra-specific variables.
  133. */
  134. protected function teardown() {
  135. $this->footnotes = array();
  136. $this->footnotes_ordered = array();
  137. $this->footnotes_ref_count = array();
  138. $this->footnotes_numbers = array();
  139. $this->abbr_desciptions = array();
  140. $this->abbr_word_re = '';
  141. parent::teardown();
  142. }
  143. /**
  144. * Extra attribute parser
  145. */
  146. /**
  147. * Expression to use to catch attributes (includes the braces)
  148. * @var string
  149. */
  150. protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
  151. /**
  152. * Expression to use when parsing in a context when no capture is desired
  153. * @var string
  154. */
  155. protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
  156. /**
  157. * Parse attributes caught by the $this->id_class_attr_catch_re expression
  158. * and return the HTML-formatted list of attributes.
  159. *
  160. * Currently supported attributes are .class and #id.
  161. *
  162. * In addition, this method also supports supplying a default Id value,
  163. * which will be used to populate the id attribute in case it was not
  164. * overridden.
  165. * @param string $tag_name
  166. * @param string $attr
  167. * @param mixed $defaultIdValue
  168. * @param array $classes
  169. * @return string
  170. */
  171. protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
  172. if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
  173. // Split on components
  174. preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
  175. $elements = $matches[0];
  176. // Handle classes and IDs (only first ID taken into account)
  177. $attributes = array();
  178. $id = false;
  179. foreach ($elements as $element) {
  180. if ($element{0} == '.') {
  181. $classes[] = substr($element, 1);
  182. } else if ($element{0} == '#') {
  183. if ($id === false) $id = substr($element, 1);
  184. } else if (strpos($element, '=') > 0) {
  185. $parts = explode('=', $element, 2);
  186. $attributes[] = $parts[0] . '="' . $parts[1] . '"';
  187. }
  188. }
  189. if (!$id) $id = $defaultIdValue;
  190. // Compose attributes as string
  191. $attr_str = "";
  192. if (!empty($id)) {
  193. $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
  194. }
  195. if (!empty($classes)) {
  196. $attr_str .= ' class="'. implode(" ", $classes) . '"';
  197. }
  198. if (!$this->no_markup && !empty($attributes)) {
  199. $attr_str .= ' '.implode(" ", $attributes);
  200. }
  201. return $attr_str;
  202. }
  203. /**
  204. * Strips link definitions from text, stores the URLs and titles in
  205. * hash references.
  206. * @param string $text
  207. * @return string
  208. */
  209. protected function stripLinkDefinitions($text) {
  210. $less_than_tab = $this->tab_width - 1;
  211. // Link defs are in the form: ^[id]: url "optional title"
  212. $text = preg_replace_callback('{
  213. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  214. [ ]*
  215. \n? # maybe *one* newline
  216. [ ]*
  217. (?:
  218. <(.+?)> # url = $2
  219. |
  220. (\S+?) # url = $3
  221. )
  222. [ ]*
  223. \n? # maybe one newline
  224. [ ]*
  225. (?:
  226. (?<=\s) # lookbehind for whitespace
  227. ["(]
  228. (.*?) # title = $4
  229. [")]
  230. [ ]*
  231. )? # title is optional
  232. (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
  233. (?:\n+|\Z)
  234. }xm',
  235. array($this, '_stripLinkDefinitions_callback'),
  236. $text);
  237. return $text;
  238. }
  239. /**
  240. * Strip link definition callback
  241. * @param array $matches
  242. * @return string
  243. */
  244. protected function _stripLinkDefinitions_callback($matches) {
  245. $link_id = strtolower($matches[1]);
  246. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  247. $this->urls[$link_id] = $url;
  248. $this->titles[$link_id] =& $matches[4];
  249. $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
  250. return ''; // String that will replace the block
  251. }
  252. /**
  253. * HTML block parser
  254. */
  255. /**
  256. * Tags that are always treated as block tags
  257. * @var string
  258. */
  259. protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
  260. /**
  261. * Tags treated as block tags only if the opening tag is alone on its line
  262. * @var string
  263. */
  264. protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
  265. /**
  266. * Tags where markdown="1" default to span mode:
  267. * @var string
  268. */
  269. protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  270. /**
  271. * Tags which must not have their contents modified, no matter where
  272. * they appear
  273. * @var string
  274. */
  275. protected $clean_tags_re = 'script|style|math|svg';
  276. /**
  277. * Tags that do not need to be closed.
  278. * @var string
  279. */
  280. protected $auto_close_tags_re = 'hr|img|param|source|track';
  281. /**
  282. * Hashify HTML Blocks and "clean tags".
  283. *
  284. * We only want to do this for block-level HTML tags, such as headers,
  285. * lists, and tables. That's because we still want to wrap <p>s around
  286. * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  287. * phrase emphasis, and spans. The list of tags we're looking for is
  288. * hard-coded.
  289. *
  290. * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  291. * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  292. * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
  293. * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  294. * These two functions are calling each other. It's recursive!
  295. * @param string $text
  296. * @return string
  297. */
  298. protected function hashHTMLBlocks($text) {
  299. if ($this->no_markup) {
  300. return $text;
  301. }
  302. // Call the HTML-in-Markdown hasher.
  303. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  304. return $text;
  305. }
  306. /**
  307. * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  308. *
  309. * * $indent is the number of space to be ignored when checking for code
  310. * blocks. This is important because if we don't take the indent into
  311. * account, something like this (which looks right) won't work as expected:
  312. *
  313. * <div>
  314. * <div markdown="1">
  315. * Hello World. <-- Is this a Markdown code block or text?
  316. * </div> <-- Is this a Markdown code block or a real tag?
  317. * <div>
  318. *
  319. * If you don't like this, just don't indent the tag on which
  320. * you apply the markdown="1" attribute.
  321. *
  322. * * If $enclosing_tag_re is not empty, stops at the first unmatched closing
  323. * tag with that name. Nested tags supported.
  324. *
  325. * * If $span is true, text inside must treated as span. So any double
  326. * newline will be replaced by a single newline so that it does not create
  327. * paragraphs.
  328. *
  329. * Returns an array of that form: ( processed text , remaining text )
  330. *
  331. * @param string $text
  332. * @param integer $indent
  333. * @param string $enclosing_tag_re
  334. * @param boolean $span
  335. * @return array
  336. */
  337. protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  338. $enclosing_tag_re = '', $span = false)
  339. {
  340. if ($text === '') return array('', '');
  341. // Regex to check for the presense of newlines around a block tag.
  342. $newline_before_re = '/(?:^\n?|\n\n)*$/';
  343. $newline_after_re =
  344. '{
  345. ^ # Start of text following the tag.
  346. (?>[ ]*<!--.*?-->)? # Optional comment.
  347. [ ]*\n # Must be followed by newline.
  348. }xs';
  349. // Regex to match any tag.
  350. $block_tag_re =
  351. '{
  352. ( # $2: Capture whole tag.
  353. </? # Any opening or closing tag.
  354. (?> # Tag name.
  355. ' . $this->block_tags_re . ' |
  356. ' . $this->context_block_tags_re . ' |
  357. ' . $this->clean_tags_re . ' |
  358. (?!\s)'.$enclosing_tag_re . '
  359. )
  360. (?:
  361. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  362. (?>
  363. ".*?" | # Double quotes (can contain `>`)
  364. \'.*?\' | # Single quotes (can contain `>`)
  365. .+? # Anything but quotes and `>`.
  366. )*?
  367. )?
  368. > # End of tag.
  369. |
  370. <!-- .*? --> # HTML Comment
  371. |
  372. <\?.*?\?> | <%.*?%> # Processing instruction
  373. |
  374. <!\[CDATA\[.*?\]\]> # CData Block
  375. ' . ( !$span ? ' # If not in span.
  376. |
  377. # Indented code block
  378. (?: ^[ ]*\n | ^ | \n[ ]*\n )
  379. [ ]{' . ($indent + 4) . '}[^\n]* \n
  380. (?>
  381. (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n
  382. )*
  383. |
  384. # Fenced code block marker
  385. (?<= ^ | \n )
  386. [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
  387. [ ]*
  388. (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
  389. [ ]*
  390. (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
  391. [ ]*
  392. (?= \n )
  393. ' : '' ) . ' # End (if not is span).
  394. |
  395. # Code span marker
  396. # Note, this regex needs to go after backtick fenced
  397. # code blocks but it should also be kept outside of the
  398. # "if not in span" condition adding backticks to the parser
  399. `+
  400. )
  401. }xs';
  402. $depth = 0; // Current depth inside the tag tree.
  403. $parsed = ""; // Parsed text that will be returned.
  404. // Loop through every tag until we find the closing tag of the parent
  405. // or loop until reaching the end of text if no parent tag specified.
  406. do {
  407. // Split the text using the first $tag_match pattern found.
  408. // Text before pattern will be first in the array, text after
  409. // pattern will be at the end, and between will be any catches made
  410. // by the pattern.
  411. $parts = preg_split($block_tag_re, $text, 2,
  412. PREG_SPLIT_DELIM_CAPTURE);
  413. // If in Markdown span mode, add a empty-string span-level hash
  414. // after each newline to prevent triggering any block element.
  415. if ($span) {
  416. $void = $this->hashPart("", ':');
  417. $newline = "\n$void";
  418. $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
  419. }
  420. $parsed .= $parts[0]; // Text before current tag.
  421. // If end of $text has been reached. Stop loop.
  422. if (count($parts) < 3) {
  423. $text = "";
  424. break;
  425. }
  426. $tag = $parts[1]; // Tag to handle.
  427. $text = $parts[2]; // Remaining text after current tag.
  428. $tag_re = preg_quote($tag); // For use in a regular expression.
  429. // Check for: Fenced code block marker.
  430. // Note: need to recheck the whole tag to disambiguate backtick
  431. // fences from code spans
  432. if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) {
  433. // Fenced code block marker: find matching end marker.
  434. $fence_indent = strlen($capture[1]); // use captured indent in re
  435. $fence_re = $capture[2]; // use captured fence in re
  436. if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text,
  437. $matches))
  438. {
  439. // End marker found: pass text unchanged until marker.
  440. $parsed .= $tag . $matches[0];
  441. $text = substr($text, strlen($matches[0]));
  442. }
  443. else {
  444. // No end marker: just skip it.
  445. $parsed .= $tag;
  446. }
  447. }
  448. // Check for: Indented code block.
  449. else if ($tag{0} == "\n" || $tag{0} == " ") {
  450. // Indented code block: pass it unchanged, will be handled
  451. // later.
  452. $parsed .= $tag;
  453. }
  454. // Check for: Code span marker
  455. // Note: need to check this after backtick fenced code blocks
  456. else if ($tag{0} == "`") {
  457. // Find corresponding end marker.
  458. $tag_re = preg_quote($tag);
  459. if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}',
  460. $text, $matches))
  461. {
  462. // End marker found: pass text unchanged until marker.
  463. $parsed .= $tag . $matches[0];
  464. $text = substr($text, strlen($matches[0]));
  465. }
  466. else {
  467. // Unmatched marker: just skip it.
  468. $parsed .= $tag;
  469. }
  470. }
  471. // Check for: Opening Block level tag or
  472. // Opening Context Block tag (like ins and del)
  473. // used as a block tag (tag is alone on it's line).
  474. else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) ||
  475. ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) &&
  476. preg_match($newline_before_re, $parsed) &&
  477. preg_match($newline_after_re, $text) )
  478. )
  479. {
  480. // Need to parse tag and following text using the HTML parser.
  481. list($block_text, $text) =
  482. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
  483. // Make sure it stays outside of any paragraph by adding newlines.
  484. $parsed .= "\n\n$block_text\n\n";
  485. }
  486. // Check for: Clean tag (like script, math)
  487. // HTML Comments, processing instructions.
  488. else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) ||
  489. $tag{1} == '!' || $tag{1} == '?')
  490. {
  491. // Need to parse tag and following text using the HTML parser.
  492. // (don't check for markdown attribute)
  493. list($block_text, $text) =
  494. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
  495. $parsed .= $block_text;
  496. }
  497. // Check for: Tag with same name as enclosing tag.
  498. else if ($enclosing_tag_re !== '' &&
  499. // Same name as enclosing tag.
  500. preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag))
  501. {
  502. // Increase/decrease nested tag count.
  503. if ($tag{1} == '/') $depth--;
  504. else if ($tag{strlen($tag)-2} != '/') $depth++;
  505. if ($depth < 0) {
  506. // Going out of parent element. Clean up and break so we
  507. // return to the calling function.
  508. $text = $tag . $text;
  509. break;
  510. }
  511. $parsed .= $tag;
  512. }
  513. else {
  514. $parsed .= $tag;
  515. }
  516. } while ($depth >= 0);
  517. return array($parsed, $text);
  518. }
  519. /**
  520. * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  521. *
  522. * * Calls $hash_method to convert any blocks.
  523. * * Stops when the first opening tag closes.
  524. * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  525. * (it is not inside clean tags)
  526. *
  527. * Returns an array of that form: ( processed text , remaining text )
  528. * @param string $text
  529. * @param string $hash_method
  530. * @param string $md_attr
  531. * @return array
  532. */
  533. protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
  534. if ($text === '') return array('', '');
  535. // Regex to match `markdown` attribute inside of a tag.
  536. $markdown_attr_re = '
  537. {
  538. \s* # Eat whitespace before the `markdown` attribute
  539. markdown
  540. \s*=\s*
  541. (?>
  542. (["\']) # $1: quote delimiter
  543. (.*?) # $2: attribute value
  544. \1 # matching delimiter
  545. |
  546. ([^\s>]*) # $3: unquoted attribute value
  547. )
  548. () # $4: make $3 always defined (avoid warnings)
  549. }xs';
  550. // Regex to match any tag.
  551. $tag_re = '{
  552. ( # $2: Capture whole tag.
  553. </? # Any opening or closing tag.
  554. [\w:$]+ # Tag name.
  555. (?:
  556. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  557. (?>
  558. ".*?" | # Double quotes (can contain `>`)
  559. \'.*?\' | # Single quotes (can contain `>`)
  560. .+? # Anything but quotes and `>`.
  561. )*?
  562. )?
  563. > # End of tag.
  564. |
  565. <!-- .*? --> # HTML Comment
  566. |
  567. <\?.*?\?> | <%.*?%> # Processing instruction
  568. |
  569. <!\[CDATA\[.*?\]\]> # CData Block
  570. )
  571. }xs';
  572. $original_text = $text; // Save original text in case of faliure.
  573. $depth = 0; // Current depth inside the tag tree.
  574. $block_text = ""; // Temporary text holder for current text.
  575. $parsed = ""; // Parsed text that will be returned.
  576. // Get the name of the starting tag.
  577. // (This pattern makes $base_tag_name_re safe without quoting.)
  578. if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
  579. $base_tag_name_re = $matches[1];
  580. // Loop through every tag until we find the corresponding closing tag.
  581. do {
  582. // Split the text using the first $tag_match pattern found.
  583. // Text before pattern will be first in the array, text after
  584. // pattern will be at the end, and between will be any catches made
  585. // by the pattern.
  586. $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  587. if (count($parts) < 3) {
  588. // End of $text reached with unbalenced tag(s).
  589. // In that case, we return original text unchanged and pass the
  590. // first character as filtered to prevent an infinite loop in the
  591. // parent function.
  592. return array($original_text{0}, substr($original_text, 1));
  593. }
  594. $block_text .= $parts[0]; // Text before current tag.
  595. $tag = $parts[1]; // Tag to handle.
  596. $text = $parts[2]; // Remaining text after current tag.
  597. // Check for: Auto-close tag (like <hr/>)
  598. // Comments and Processing Instructions.
  599. if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) ||
  600. $tag{1} == '!' || $tag{1} == '?')
  601. {
  602. // Just add the tag to the block as if it was text.
  603. $block_text .= $tag;
  604. }
  605. else {
  606. // Increase/decrease nested tag count. Only do so if
  607. // the tag's name match base tag's.
  608. if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) {
  609. if ($tag{1} == '/') $depth--;
  610. else if ($tag{strlen($tag)-2} != '/') $depth++;
  611. }
  612. // Check for `markdown="1"` attribute and handle it.
  613. if ($md_attr &&
  614. preg_match($markdown_attr_re, $tag, $attr_m) &&
  615. preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
  616. {
  617. // Remove `markdown` attribute from opening tag.
  618. $tag = preg_replace($markdown_attr_re, '', $tag);
  619. // Check if text inside this tag must be parsed in span mode.
  620. $this->mode = $attr_m[2] . $attr_m[3];
  621. $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
  622. preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag);
  623. // Calculate indent before tag.
  624. if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
  625. $strlen = $this->utf8_strlen;
  626. $indent = $strlen($matches[1], 'UTF-8');
  627. } else {
  628. $indent = 0;
  629. }
  630. // End preceding block with this tag.
  631. $block_text .= $tag;
  632. $parsed .= $this->$hash_method($block_text);
  633. // Get enclosing tag name for the ParseMarkdown function.
  634. // (This pattern makes $tag_name_re safe without quoting.)
  635. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  636. $tag_name_re = $matches[1];
  637. // Parse the content using the HTML-in-Markdown parser.
  638. list ($block_text, $text)
  639. = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
  640. $tag_name_re, $span_mode);
  641. // Outdent markdown text.
  642. if ($indent > 0) {
  643. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  644. $block_text);
  645. }
  646. // Append tag content to parsed text.
  647. if (!$span_mode) $parsed .= "\n\n$block_text\n\n";
  648. else $parsed .= "$block_text";
  649. // Start over with a new block.
  650. $block_text = "";
  651. }
  652. else $block_text .= $tag;
  653. }
  654. } while ($depth > 0);
  655. // Hash last block text that wasn't processed inside the loop.
  656. $parsed .= $this->$hash_method($block_text);
  657. return array($parsed, $text);
  658. }
  659. /**
  660. * Called whenever a tag must be hashed when a function inserts a "clean" tag
  661. * in $text, it passes through this function and is automaticaly escaped,
  662. * blocking invalid nested overlap.
  663. * @param string $text
  664. * @return string
  665. */
  666. protected function hashClean($text) {
  667. return $this->hashPart($text, 'C');
  668. }
  669. /**
  670. * Turn Markdown link shortcuts into XHTML <a> tags.
  671. * @param string $text
  672. * @return string
  673. */
  674. protected function doAnchors($text) {
  675. if ($this->in_anchor) {
  676. return $text;
  677. }
  678. $this->in_anchor = true;
  679. // First, handle reference-style links: [link text] [id]
  680. $text = preg_replace_callback('{
  681. ( # wrap whole match in $1
  682. \[
  683. (' . $this->nested_brackets_re . ') # link text = $2
  684. \]
  685. [ ]? # one optional space
  686. (?:\n[ ]*)? # one optional newline followed by spaces
  687. \[
  688. (.*?) # id = $3
  689. \]
  690. )
  691. }xs',
  692. array($this, '_doAnchors_reference_callback'), $text);
  693. // Next, inline-style links: [link text](url "optional title")
  694. $text = preg_replace_callback('{
  695. ( # wrap whole match in $1
  696. \[
  697. (' . $this->nested_brackets_re . ') # link text = $2
  698. \]
  699. \( # literal paren
  700. [ \n]*
  701. (?:
  702. <(.+?)> # href = $3
  703. |
  704. (' . $this->nested_url_parenthesis_re . ') # href = $4
  705. )
  706. [ \n]*
  707. ( # $5
  708. ([\'"]) # quote char = $6
  709. (.*?) # Title = $7
  710. \6 # matching quote
  711. [ \n]* # ignore any spaces/tabs between closing quote and )
  712. )? # title is optional
  713. \)
  714. (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes
  715. )
  716. }xs',
  717. array($this, '_doAnchors_inline_callback'), $text);
  718. // Last, handle reference-style shortcuts: [link text]
  719. // These must come last in case you've also got [link text][1]
  720. // or [link text](/foo)
  721. $text = preg_replace_callback('{
  722. ( # wrap whole match in $1
  723. \[
  724. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  725. \]
  726. )
  727. }xs',
  728. array($this, '_doAnchors_reference_callback'), $text);
  729. $this->in_anchor = false;
  730. return $text;
  731. }
  732. /**
  733. * Callback for reference anchors
  734. * @param array $matches
  735. * @return string
  736. */
  737. protected function _doAnchors_reference_callback($matches) {
  738. $whole_match = $matches[1];
  739. $link_text = $matches[2];
  740. $link_id =& $matches[3];
  741. if ($link_id == "") {
  742. // for shortcut links like [this][] or [this].
  743. $link_id = $link_text;
  744. }
  745. // lower-case and turn embedded newlines into spaces
  746. $link_id = strtolower($link_id);
  747. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  748. if (isset($this->urls[$link_id])) {
  749. $url = $this->urls[$link_id];
  750. $url = $this->encodeURLAttribute($url);
  751. $result = "<a href=\"$url\"";
  752. if ( isset( $this->titles[$link_id] ) ) {
  753. $title = $this->titles[$link_id];
  754. $title = $this->encodeAttribute($title);
  755. $result .= " title=\"$title\"";
  756. }
  757. if (isset($this->ref_attr[$link_id]))
  758. $result .= $this->ref_attr[$link_id];
  759. $link_text = $this->runSpanGamut($link_text);
  760. $result .= ">$link_text</a>";
  761. $result = $this->hashPart($result);
  762. }
  763. else {
  764. $result = $whole_match;
  765. }
  766. return $result;
  767. }
  768. /**
  769. * Callback for inline anchors
  770. * @param array $matches
  771. * @return string
  772. */
  773. protected function _doAnchors_inline_callback($matches) {
  774. $whole_match = $matches[1];
  775. $link_text = $this->runSpanGamut($matches[2]);
  776. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  777. $title =& $matches[7];
  778. $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
  779. // if the URL was of the form <s p a c e s> it got caught by the HTML
  780. // tag parser and hashed. Need to reverse the process before using the URL.
  781. $unhashed = $this->unhash($url);
  782. if ($unhashed != $url)
  783. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  784. $url = $this->encodeURLAttribute($url);
  785. $result = "<a href=\"$url\"";
  786. if (isset($title)) {
  787. $title = $this->encodeAttribute($title);
  788. $result .= " title=\"$title\"";
  789. }
  790. $result .= $attr;
  791. $link_text = $this->runSpanGamut($link_text);
  792. $result .= ">$link_text</a>";
  793. return $this->hashPart($result);
  794. }
  795. /**
  796. * Turn Markdown image shortcuts into <img> tags.
  797. * @param string $text
  798. * @return string
  799. */
  800. protected function doImages($text) {
  801. // First, handle reference-style labeled images: ![alt text][id]
  802. $text = preg_replace_callback('{
  803. ( # wrap whole match in $1
  804. !\[
  805. (' . $this->nested_brackets_re . ') # alt text = $2
  806. \]
  807. [ ]? # one optional space
  808. (?:\n[ ]*)? # one optional newline followed by spaces
  809. \[
  810. (.*?) # id = $3
  811. \]
  812. )
  813. }xs',
  814. array($this, '_doImages_reference_callback'), $text);
  815. // Next, handle inline images: ![alt text](url "optional title")
  816. // Don't forget: encode * and _
  817. $text = preg_replace_callback('{
  818. ( # wrap whole match in $1
  819. !\[
  820. (' . $this->nested_brackets_re . ') # alt text = $2
  821. \]
  822. \s? # One optional whitespace character
  823. \( # literal paren
  824. [ \n]*
  825. (?:
  826. <(\S*)> # src url = $3
  827. |
  828. (' . $this->nested_url_parenthesis_re . ') # src url = $4
  829. )
  830. [ \n]*
  831. ( # $5
  832. ([\'"]) # quote char = $6
  833. (.*?) # title = $7
  834. \6 # matching quote
  835. [ \n]*
  836. )? # title is optional
  837. \)
  838. (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes
  839. )
  840. }xs',
  841. array($this, '_doImages_inline_callback'), $text);
  842. return $text;
  843. }
  844. /**
  845. * Callback for referenced images
  846. * @param array $matches
  847. * @return string
  848. */
  849. protected function _doImages_reference_callback($matches) {
  850. $whole_match = $matches[1];
  851. $alt_text = $matches[2];
  852. $link_id = strtolower($matches[3]);
  853. if ($link_id == "") {
  854. $link_id = strtolower($alt_text); // for shortcut links like ![this][].
  855. }
  856. $alt_text = $this->encodeAttribute($alt_text);
  857. if (isset($this->urls[$link_id])) {
  858. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  859. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  860. if (isset($this->titles[$link_id])) {
  861. $title = $this->titles[$link_id];
  862. $title = $this->encodeAttribute($title);
  863. $result .= " title=\"$title\"";
  864. }
  865. if (isset($this->ref_attr[$link_id]))
  866. $result .= $this->ref_attr[$link_id];
  867. $result .= $this->empty_element_suffix;
  868. $result = $this->hashPart($result);
  869. }
  870. else {
  871. // If there's no such link ID, leave intact:
  872. $result = $whole_match;
  873. }
  874. return $result;
  875. }
  876. /**
  877. * Callback for inline images
  878. * @param array $matches
  879. * @return string
  880. */
  881. protected function _doImages_inline_callback($matches) {
  882. $whole_match = $matches[1];
  883. $alt_text = $matches[2];
  884. $url = $matches[3] == '' ? $matches[4] : $matches[3];
  885. $title =& $matches[7];
  886. $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
  887. $alt_text = $this->encodeAttribute($alt_text);
  888. $url = $this->encodeURLAttribute($url);
  889. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  890. if (isset($title)) {
  891. $title = $this->encodeAttribute($title);
  892. $result .= " title=\"$title\""; // $title already quoted
  893. }
  894. $result .= $attr;
  895. $result .= $this->empty_element_suffix;
  896. return $this->hashPart($result);
  897. }
  898. /**
  899. * Process markdown headers. Redefined to add ID and class attribute support.
  900. * @param string $text
  901. * @return string
  902. */
  903. protected function doHeaders($text) {
  904. // Setext-style headers:
  905. // Header 1 {#header1}
  906. // ========
  907. //
  908. // Header 2 {#header2 .class1 .class2}
  909. // --------
  910. //
  911. $text = preg_replace_callback(
  912. '{
  913. (^.+?) # $1: Header text
  914. (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes
  915. [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
  916. }mx',
  917. array($this, '_doHeaders_callback_setext'), $text);
  918. // atx-style headers:
  919. // # Header 1 {#header1}
  920. // ## Header 2 {#header2}
  921. // ## Header 2 with closing hashes ## {#header3.class1.class2}
  922. // ...
  923. // ###### Header 6 {.class2}
  924. //
  925. $text = preg_replace_callback('{
  926. ^(\#{1,6}) # $1 = string of #\'s
  927. [ ]*
  928. (.+?) # $2 = Header text
  929. [ ]*
  930. \#* # optional closing #\'s (not counted)
  931. (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes
  932. [ ]*
  933. \n+
  934. }xm',
  935. array($this, '_doHeaders_callback_atx'), $text);
  936. return $text;
  937. }
  938. /**
  939. * Callback for setext headers
  940. * @param array $matches
  941. * @return string
  942. */
  943. protected function _doHeaders_callback_setext($matches) {
  944. if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) {
  945. return $matches[0];
  946. }
  947. $level = $matches[3]{0} == '=' ? 1 : 2;
  948. $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
  949. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
  950. $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>";
  951. return "\n" . $this->hashBlock($block) . "\n\n";
  952. }
  953. /**
  954. * Callback for atx headers
  955. * @param array $matches
  956. * @return string
  957. */
  958. protected function _doHeaders_callback_atx($matches) {
  959. $level = strlen($matches[1]);
  960. $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
  961. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
  962. $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>";
  963. return "\n" . $this->hashBlock($block) . "\n\n";
  964. }
  965. /**
  966. * Form HTML tables.
  967. * @param string $text
  968. * @return string
  969. */
  970. protected function doTables($text) {
  971. $less_than_tab = $this->tab_width - 1;
  972. // Find tables with leading pipe.
  973. //
  974. // | Header 1 | Header 2
  975. // | -------- | --------
  976. // | Cell 1 | Cell 2
  977. // | Cell 3 | Cell 4
  978. $text = preg_replace_callback('
  979. {
  980. ^ # Start of a line
  981. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  982. [|] # Optional leading pipe (present)
  983. (.+) \n # $1: Header row (at least one pipe)
  984. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  985. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  986. ( # $3: Cells
  987. (?>
  988. [ ]* # Allowed whitespace.
  989. [|] .* \n # Row content.
  990. )*
  991. )
  992. (?=\n|\Z) # Stop at final double newline.
  993. }xm',
  994. array($this, '_doTable_leadingPipe_callback'), $text);
  995. // Find tables without leading pipe.
  996. //
  997. // Header 1 | Header 2
  998. // -------- | --------
  999. // Cell 1 | Cell 2
  1000. // Cell 3 | Cell 4
  1001. $text = preg_replace_callback('
  1002. {
  1003. ^ # Start of a line
  1004. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  1005. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  1006. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  1007. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  1008. ( # $3: Cells
  1009. (?>
  1010. .* [|] .* \n # Row content
  1011. )*
  1012. )
  1013. (?=\n|\Z) # Stop at final double newline.
  1014. }xm',
  1015. array($this, '_DoTable_callback'), $text);
  1016. return $text;
  1017. }
  1018. /**
  1019. * Callback for removing the leading pipe for each row
  1020. * @param array $matches
  1021. * @return string
  1022. */
  1023. protected function _doTable_leadingPipe_callback($matches) {
  1024. $head = $matches[1];
  1025. $underline = $matches[2];
  1026. $content = $matches[3];
  1027. $content = preg_replace('/^ *[|]/m', '', $content);
  1028. return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
  1029. }
  1030. /**
  1031. * Make the align attribute in a table
  1032. * @param string $alignname
  1033. * @return string
  1034. */
  1035. protected function _doTable_makeAlignAttr($alignname)
  1036. {
  1037. if (empty($this->table_align_class_tmpl)) {
  1038. return " align=\"$alignname\"";
  1039. }
  1040. $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
  1041. return " class=\"$classname\"";
  1042. }
  1043. /**
  1044. * Calback for processing tables
  1045. * @param array $matches
  1046. * @return string
  1047. */
  1048. protected function _doTable_callback($matches) {
  1049. $head = $matches[1];
  1050. $underline = $matches[2];
  1051. $content = $matches[3];
  1052. // Remove any tailing pipes for each line.
  1053. $head = preg_replace('/[|] *$/m', '', $head);
  1054. $underline = preg_replace('/[|] *$/m', '', $underline);
  1055. $content = preg_replace('/[|] *$/m', '', $content);
  1056. // Reading alignement from header underline.
  1057. $separators = preg_split('/ *[|] */', $underline);
  1058. foreach ($separators as $n => $s) {
  1059. if (preg_match('/^ *-+: *$/', $s))
  1060. $attr[$n] = $this->_doTable_makeAlignAttr('right');
  1061. else if (preg_match('/^ *:-+: *$/', $s))
  1062. $attr[$n] = $this->_doTable_makeAlignAttr('center');
  1063. else if (preg_match('/^ *:-+ *$/', $s))
  1064. $attr[$n] = $this->_doTable_makeAlignAttr('left');
  1065. else
  1066. $attr[$n] = '';
  1067. }
  1068. // Parsing span elements, including code spans, character escapes,
  1069. // and inline HTML tags, so that pipes inside those gets ignored.
  1070. $head = $this->parseSpan($head);
  1071. $headers = preg_split('/ *[|] */', $head);
  1072. $col_count = count($headers);
  1073. $attr = array_pad($attr, $col_count, '');
  1074. // Write column headers.
  1075. $text = "<table>\n";
  1076. $text .= "<thead>\n";
  1077. $text .= "<tr>\n";
  1078. foreach ($headers as $n => $header)
  1079. $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n";
  1080. $text .= "</tr>\n";
  1081. $text .= "</thead>\n";
  1082. // Split content by row.
  1083. $rows = explode("\n", trim($content, "\n"));
  1084. $text .= "<tbody>\n";
  1085. foreach ($rows as $row) {
  1086. // Parsing span elements, including code spans, character escapes,
  1087. // and inline HTML tags, so that pipes inside those gets ignored.
  1088. $row = $this->parseSpan($row);
  1089. // Split row by cell.
  1090. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  1091. $row_cells = array_pad($row_cells, $col_count, '');
  1092. $text .= "<tr>\n";
  1093. foreach ($row_cells as $n => $cell)
  1094. $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
  1095. $text .= "</tr>\n";
  1096. }
  1097. $text .= "</tbody>\n";
  1098. $text .= "</table>";
  1099. return $this->hashBlock($text) . "\n";
  1100. }
  1101. /**
  1102. * Form HTML definition lists.
  1103. * @param string $text
  1104. * @return string
  1105. */
  1106. protected function doDefLists($text) {
  1107. $less_than_tab = $this->tab_width - 1;
  1108. // Re-usable pattern to match any entire dl list:
  1109. $whole_list_re = '(?>
  1110. ( # $1 = whole list
  1111. ( # $2
  1112. [ ]{0,' . $less_than_tab . '}
  1113. ((?>.*\S.*\n)+) # $3 = defined term
  1114. \n?
  1115. [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
  1116. )
  1117. (?s:.+?)
  1118. ( # $4
  1119. \z
  1120. |
  1121. \n{2,}
  1122. (?=\S)
  1123. (?! # Negative lookahead for another term
  1124. [ ]{0,' . $less_than_tab . '}
  1125. (?: \S.*\n )+? # defined term
  1126. \n?
  1127. [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
  1128. )
  1129. (?! # Negative lookahead for another definition
  1130. [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
  1131. )
  1132. )
  1133. )
  1134. )'; // mx
  1135. $text = preg_replace_callback('{
  1136. (?>\A\n?|(?<=\n\n))
  1137. ' . $whole_list_re . '
  1138. }mx',
  1139. array($this, '_doDefLists_callback'), $text);
  1140. return $text;
  1141. }
  1142. /**
  1143. * Callback for processing definition lists
  1144. * @param array $matches
  1145. * @return string
  1146. */
  1147. protected function _doDefLists_callback($matches) {
  1148. // Re-usable patterns to match list item bullets and number markers:
  1149. $list = $matches[1];
  1150. // Turn double returns into triple returns, so that we can make a
  1151. // paragraph for the last item in a list, if necessary:
  1152. $result = trim($this->processDefListItems($list));
  1153. $result = "<dl>\n" . $result . "\n</dl>";
  1154. return $this->hashBlock($result) . "\n\n";
  1155. }
  1156. /**
  1157. * Process the contents of a single definition list, splitting it
  1158. * into individual term and definition list items.
  1159. * @param string $list_str
  1160. * @return string
  1161. */
  1162. protected function processDefListItems($list_str) {
  1163. $less_than_tab = $this->tab_width - 1;
  1164. // Trim trailing blank lines:
  1165. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1166. // Process definition terms.
  1167. $list_str = preg_replace_callback('{
  1168. (?>\A\n?|\n\n+) # leading line
  1169. ( # definition terms = $1
  1170. [ ]{0,' . $less_than_tab . '} # leading whitespace
  1171. (?!\:[ ]|[ ]) # negative lookahead for a definition
  1172. # mark (colon) or more whitespace.
  1173. (?> \S.* \n)+? # actual term (not whitespace).
  1174. )
  1175. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  1176. # with a definition mark.
  1177. }xm',
  1178. array($this, '_processDefListItems_callback_dt'), $list_str);
  1179. // Process actual definitions.
  1180. $list_str = preg_replace_callback('{
  1181. \n(\n+)? # leading line = $1
  1182. ( # marker space = $2
  1183. [ ]{0,' . $less_than_tab . '} # whitespace before colon
  1184. \:[ ]+ # definition mark (colon)
  1185. )
  1186. ((?s:.+?)) # definition text = $3
  1187. (?= \n+ # stop at next definition mark,
  1188. (?: # next term or end of text
  1189. [ ]{0,' . $less_than_tab . '} \:[ ] |
  1190. <dt> | \z
  1191. )
  1192. )
  1193. }xm',
  1194. array($this, '_processDefListItems_callback_dd'), $list_str);
  1195. return $list_str;
  1196. }
  1197. /**
  1198. * Callback for <dt> elements in definition lists
  1199. * @param array $matches
  1200. * @return string
  1201. */
  1202. protected function _processDefListItems_callback_dt($matches) {
  1203. $terms = explode("\n", trim($matches[1]));
  1204. $text = '';
  1205. foreach ($terms as $term) {
  1206. $term = $this->runSpanGamut(trim($term));
  1207. $text .= "\n<dt>" . $term . "</dt>";
  1208. }
  1209. return $text . "\n";
  1210. }
  1211. /**
  1212. * Callback for <dd> elements in definition lists
  1213. * @param array $matches
  1214. * @return string
  1215. */
  1216. protected function _processDefListItems_callback_dd($matches) {
  1217. $leading_line = $matches[1];
  1218. $marker_space = $matches[2];
  1219. $def = $matches[3];
  1220. if ($leading_line || preg_match('/\n{2,}/', $def)) {
  1221. // Replace marker with the appropriate whitespace indentation
  1222. $def = str_repeat(' ', strlen($marker_space)) . $def;
  1223. $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
  1224. $def = "\n". $def ."\n";
  1225. }
  1226. else {
  1227. $def = rtrim($def);
  1228. $def = $this->runSpanGamut($this->outdent($def));
  1229. }
  1230. return "\n<dd>" . $def . "</dd>\n";
  1231. }
  1232. /**
  1233. * Adding the fenced code block syntax to regular Markdown:
  1234. *
  1235. * ~~~
  1236. * Code block
  1237. * ~~~
  1238. *
  1239. * @param string $text
  1240. * @return string
  1241. */
  1242. protected function doFencedCodeBlocks($text) {
  1243. $less_than_tab = $this->tab_width;
  1244. $text = preg_replace_callback('{
  1245. (?:\n|\A)
  1246. # 1: Opening marker
  1247. (
  1248. (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
  1249. )
  1250. [ ]*
  1251. (?:
  1252. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
  1253. )?
  1254. [ ]*
  1255. (?:
  1256. ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes
  1257. )?
  1258. [ ]* \n # Whitespace and newline following marker.
  1259. # 4: Content
  1260. (
  1261. (?>
  1262. (?!\1 [ ]* \n) # Not a closing marker.
  1263. .*\n+
  1264. )+
  1265. )
  1266. # Closing marker.
  1267. \1 [ ]* (?= \n )
  1268. }xm',
  1269. array($this, '_doFencedCodeBlocks_callback'), $text);
  1270. return $text;
  1271. }
  1272. /**
  1273. * Callback to process fenced code blocks
  1274. * @param array $matches
  1275. * @return string
  1276. */
  1277. protected function _doFencedCodeBlocks_callback($matches) {
  1278. $classname =& $matches[2];
  1279. $attrs =& $matches[3];
  1280. $codeblock = $matches[4];
  1281. if ($this->code_block_content_func) {
  1282. $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
  1283. } else {
  1284. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  1285. }
  1286. $codeblock = preg_replace_callback('/^\n+/',
  1287. array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
  1288. $classes = array();
  1289. if ($classname != "") {
  1290. if ($classname{0} == '.')
  1291. $classname = substr($classname, 1);
  1292. $classes[] = $this->code_class_prefix . $classname;
  1293. }
  1294. $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
  1295. $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
  1296. $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
  1297. $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
  1298. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  1299. }
  1300. /**
  1301. * Replace new lines in fenced code blocks
  1302. * @param array $matches
  1303. * @return string
  1304. */
  1305. protected function _doFencedCodeBlocks_newlines($matches) {
  1306. return str_repeat("<br$this->empty_element_suffix",
  1307. strlen($matches[0]));
  1308. }
  1309. /**
  1310. * Redefining emphasis markers so that emphasis by underscore does not
  1311. * work in the middle of a word.
  1312. * @var array
  1313. */
  1314. protected $em_relist = array(
  1315. '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
  1316. '*' => '(?<![\s*])\*(?!\*)',
  1317. '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
  1318. );
  1319. protected $strong_relist = array(
  1320. '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
  1321. '**' => '(?<![\s*])\*\*(?!\*)',
  1322. '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
  1323. );
  1324. protected $em_strong_relist = array(
  1325. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
  1326. '***' => '(?<![\s*])\*\*\*(?!\*)',
  1327. '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
  1328. );
  1329. /**
  1330. * Parse text into paragraphs
  1331. * @param string $text String to process in paragraphs
  1332. * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
  1333. * @return string HTML output
  1334. */
  1335. protected function formParagraphs($text, $wrap_in_p = true) {
  1336. // Strip leading and trailing lines:
  1337. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1338. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1339. // Wrap <p> tags and unhashify HTML blocks
  1340. foreach ($grafs as $key => $value) {
  1341. $value = trim($this->runSpanGamut($value));
  1342. // Check if this should be enclosed in a paragraph.
  1343. // Clean tag hashes & block tag hashes are left alone.
  1344. $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
  1345. if ($is_p) {
  1346. $value = "<p>$value</p>";
  1347. }
  1348. $grafs[$key] = $value;
  1349. }
  1350. // Join grafs in one text, then unhash HTML tags.
  1351. $text = implode("\n\n", $grafs);
  1352. // Finish by removing any tag hashes still present in $text.
  1353. $text = $this->unhash($text);
  1354. return $text;
  1355. }
  1356. /**
  1357. * Footnotes - Strips link definitions from text, stores the URLs and
  1358. * titles in hash references.
  1359. * @param string $text
  1360. * @return string
  1361. */
  1362. protected function stripFootnotes($text) {
  1363. $less_than_tab = $this->tab_width - 1;
  1364. // Link defs are in the form: [^id]: url "optional title"
  1365. $text = preg_replace_callback('{
  1366. ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1
  1367. [ ]*
  1368. \n? # maybe *one* newline
  1369. ( # text = $2 (no blank lines allowed)
  1370. (?:
  1371. .+ # actual text
  1372. |
  1373. \n # newlines but
  1374. (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
  1375. (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
  1376. # by non-indented content
  1377. )*
  1378. )
  1379. }xm',
  1380. array($this, '_stripFootnotes_callback'),
  1381. $text);
  1382. return $text;
  1383. }
  1384. /**
  1385. * Callback for stripping footnotes
  1386. * @param array $matches
  1387. * @return string
  1388. */
  1389. protected function _stripFootnotes_callback($matches) {
  1390. $note_id = $this->fn_id_prefix . $matches[1];
  1391. $this->footnotes[$note_id] = $this->outdent($matches[2]);
  1392. return ''; // String that will replace the block
  1393. }
  1394. /**
  1395. * Replace footnote references in $text [^id] with a special text-token
  1396. * which will be replaced by the actual footnote marker in appendFootnotes.
  1397. * @param string $text
  1398. * @return string
  1399. */
  1400. protected function doFootnotes($text) {
  1401. if (!$this->in_anchor) {
  1402. $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
  1403. }
  1404. return $text;
  1405. }
  1406. /**
  1407. * Append footnote list to text
  1408. * @param string $text
  1409. * @return string
  1410. */
  1411. protected function appendFootnotes($text) {
  1412. $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  1413. array($this, '_appendFootnotes_callback'), $text);
  1414. if (!empty($this->footnotes_ordered)) {
  1415. $text .= "\n\n";
  1416. $text .= "<div class=\"footnotes\">\n";
  1417. $text .= "<hr" . $this->empty_element_suffix . "\n";
  1418. $text .= "<ol>\n\n";
  1419. $attr = "";
  1420. if ($this->fn_backlink_class != "") {
  1421. $class = $this->fn_backlink_class;
  1422. $class = $this->encodeAttribute($class);
  1423. $attr .= " class=\"$class\"";
  1424. }
  1425. if ($this->fn_backlink_title != "") {
  1426. $title = $this->fn_backlink_title;
  1427. $title = $this->encodeAttribute($title);
  1428. $attr .= " title=\"$title\"";
  1429. }
  1430. $backlink_text = $this->fn_backlink_html;
  1431. $num = 0;
  1432. while (!empty($this->footnotes_ordered)) {
  1433. $footnote = reset($this->footnotes_ordered);
  1434. $note_id = key($this->footnotes_ordered);
  1435. unset($this->footnotes_ordered[$note_id]);
  1436. $ref_count = $this->footnotes_ref_count[$note_id];
  1437. unset($this->footnotes_ref_count[$note_id]);
  1438. unset($this->footnotes[$note_id]);
  1439. $footnote .= "\n"; // Need to append newline before parsing.
  1440. $footnote = $this->runBlockGamut("$footnote\n");
  1441. $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  1442. array($this, '_appendFootnotes_callback'), $footnote);
  1443. $attr = str_replace("%%", ++$num, $attr);
  1444. $note_id = $this->encodeAttribute($note_id);
  1445. // Prepare backlink, multiple backlinks if multiple references
  1446. $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
  1447. for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
  1448. $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
  1449. }
  1450. // Add backlink to last paragraph; create new paragraph if needed.
  1451. if (preg_match('{</p>$}', $footnote)) {
  1452. $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
  1453. } else {
  1454. $footnote .= "\n\n<p>$backlink</p>";
  1455. }
  1456. $text .= "<li id=\"fn:$note_id\">\n";
  1457. $text .= $footnote . "\n";
  1458. $text .= "</li>\n\n";
  1459. }
  1460. $text .= "</ol>\n";
  1461. $text .= "</div>";
  1462. }
  1463. return $text;
  1464. }
  1465. /**
  1466. * Callback for appending footnotes
  1467. * @param array $matches
  1468. * @return string
  1469. */
  1470. protected function _appendFootnotes_callback($matches) {
  1471. $node_id = $this->fn_id_prefix . $matches[1];
  1472. // Create footnote marker only if it has a corresponding footnote *and*
  1473. // the footnote hasn't been used by another marker.
  1474. if (isset($this->footnotes[$node_id])) {
  1475. $num =& $this->footnotes_numbers[$node_id];
  1476. if (!isset($num)) {
  1477. // Transfer footnote content to the ordered list and give it its
  1478. // number
  1479. $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
  1480. $this->footnotes_ref_count[$node_id] = 1;
  1481. $num = $this->footnote_counter++;
  1482. $ref_count_mark = '';
  1483. } else {
  1484. $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
  1485. }
  1486. $attr = "";
  1487. if ($this->fn_link_class != "") {
  1488. $class = $this->fn_link_class;
  1489. $class = $this->encodeAttribute($class);
  1490. $attr .= " class=\"$class\"";
  1491. }
  1492. if ($this->fn_link_title != "") {
  1493. $title = $this->fn_link_title;
  1494. $title = $this->encodeAttribute($title);
  1495. $attr .= " title=\"$title\"";
  1496. }
  1497. $attr = str_replace("%%", $num, $attr);
  1498. $node_id = $this->encodeAttribute($node_id);
  1499. return
  1500. "<sup id=\"fnref$ref_count_mark:$node_id\">".
  1501. "<a href=\"#fn:$node_id\"$attr>$num</a>".
  1502. "</sup>";
  1503. }
  1504. return "[^" . $matches[1] . "]";
  1505. }
  1506. /**
  1507. * Abbreviations - strips abbreviations from text, stores titles in hash
  1508. * references.
  1509. * @param string $text
  1510. * @return string
  1511. */
  1512. protected function stripAbbreviations($text) {
  1513. $less_than_tab = $this->tab_width - 1;
  1514. // Link defs are in the form: [id]*: url "optional title"
  1515. $text = preg_replace_callback('{
  1516. ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1
  1517. (.*) # text = $2 (no blank lines allowed)
  1518. }xm',
  1519. array($this, '_stripAbbreviations_callback'),
  1520. $text);
  1521. return $text;
  1522. }
  1523. /**
  1524. * Callback for stripping abbreviations
  1525. * @param array $matches
  1526. * @return string
  1527. */
  1528. protected function _stripAbbreviations_callback($matches) {
  1529. $abbr_word = $matches[1];
  1530. $abbr_desc = $matches[2];
  1531. if ($this->abbr_word_re) {
  1532. $this->abbr_word_re .= '|';
  1533. }
  1534. $this->abbr_word_re .= preg_quote($abbr_word);
  1535. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  1536. return ''; // String that will replace the block
  1537. }
  1538. /**
  1539. * Find defined abbreviations in text and wrap them in <abbr> elements.
  1540. * @param string $text
  1541. * @return string
  1542. */
  1543. protected function doAbbreviations($text) {
  1544. if ($this->abbr_word_re) {
  1545. // cannot use the /x modifier because abbr_word_re may
  1546. // contain significant spaces:
  1547. $text = preg_replace_callback('{' .
  1548. '(?<![\w\x1A])' .
  1549. '(?:' . $this->abbr_word_re . ')' .
  1550. '(?![\w\x1A])' .
  1551. '}',
  1552. array($this, '_doAbbreviations_callback'), $text);
  1553. }
  1554. return $text;
  1555. }
  1556. /**
  1557. * Callback for processing abbreviations
  1558. * @param array $matches
  1559. * @return string
  1560. */
  1561. protected function _doAbbreviations_callback($matches) {
  1562. $abbr = $matches[0];
  1563. if (isset($this->abbr_desciptions[$abbr])) {
  1564. $desc = $this->abbr_desciptions[$abbr];
  1565. if (empty($desc)) {
  1566. return $this->hashPart("<abbr>$abbr</abbr>");
  1567. } else {
  1568. $desc = $this->encodeAttribute($desc);
  1569. return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
  1570. }
  1571. } else {
  1572. return $matches[0];
  1573. }
  1574. }
  1575. }