0
0
Fork 0
mirror of https://github.com/BookStackApp/BookStack.git synced 2025-04-16 17:47:52 +00:00

Includes: Updated logic regarding parent block els, added tests

Expanded tests with many more cases, and added fixes for failed
scenarios.
Updated logic to specifically handling parent <p> tags, and now assume
compatibility with parent block types elswhere to allow use in a
variety of scenarios (td, details, blockquote etc...).
This commit is contained in:
Dan Brown 2023-11-25 17:32:00 +00:00
parent c88eb729a4
commit 4874dc1304
No known key found for this signature in database
GPG key ID: 46D9F943C24A2EF9
2 changed files with 158 additions and 31 deletions

View file

@ -13,37 +13,45 @@ class PageIncludeParser
{ {
protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/"; protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
/**
* Elements to clean up and remove if left empty after a parsing operation.
* @var DOMElement[]
*/
protected array $toCleanup = [];
public function __construct( public function __construct(
protected string $pageHtml, protected string $pageHtml,
protected Closure $pageContentForId, protected Closure $pageContentForId,
) { ) {
} }
/**
* Parse out the include tags.
*/
public function parse(): string public function parse(): string
{ {
$doc = new HtmlDocument($this->pageHtml); $doc = new HtmlDocument($this->pageHtml);
$tags = $this->locateAndIsolateIncludeTags($doc); $tags = $this->locateAndIsolateIncludeTags($doc);
$topLevel = [...$doc->getBodyChildren()];
foreach ($tags as $tag) { foreach ($tags as $tag) {
$htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
$content = new PageIncludeContent($htmlContent, $tag); $content = new PageIncludeContent($htmlContent, $tag);
if (!$content->isInline()) { if (!$content->isInline()) {
$isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true); $parentP = $this->getParentParagraph($tag->domNode);
if ($isParentTopLevel) { $isWithinParentP = $parentP === $tag->domNode->parentNode;
if ($parentP && $isWithinParentP) {
$this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode); $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
} else { } else if ($parentP) {
$this->promoteTagNodeToBody($tag, $doc->getBody()); $this->moveTagNodeToBesideParent($tag, $parentP);
} }
} }
$this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes()); $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
} }
// TODO Notes: May want to eventually parse through backwards, which should avoid issues $this->cleanup();
// in changes affecting the next tag, where tags may be in the same/adjacent nodes.
return $doc->getBodyInnerHtml(); return $doc->getBodyInnerHtml();
} }
@ -55,7 +63,7 @@ class PageIncludeParser
*/ */
protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
{ {
$includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]"); $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]");
$includeTags = []; $includeTags = [];
/** @var DOMNode $node */ /** @var DOMNode $node */
@ -107,6 +115,7 @@ class PageIncludeParser
} }
/** /**
* Replace the given node with all those in $replacements
* @param DOMNode[] $replacements * @param DOMNode[] $replacements
*/ */
protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
@ -125,51 +134,79 @@ class PageIncludeParser
$toReplace->parentNode->removeChild($toReplace); $toReplace->parentNode->removeChild($toReplace);
} }
protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void /**
* Move a tag node to become a sibling of the given parent.
* Will attempt to guess a position based upon the tag content within the parent.
*/
protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
{ {
/** @var DOMNode $topParent */ $parentText = $parent->textContent;
$topParent = $tag->domNode->parentNode;
while ($topParent->parentNode !== $body) {
$topParent = $topParent->parentNode;
}
$parentText = $topParent->textContent;
$tagPos = strpos($parentText, $tag->tagContent); $tagPos = strpos($parentText, $tag->tagContent);
$before = $tagPos < (strlen($parentText) / 2); $before = $tagPos < (strlen($parentText) / 2);
if ($before) { if ($before) {
$body->insertBefore($tag->domNode, $topParent); $parent->parentNode->insertBefore($tag->domNode, $parent);
} else { } else {
$body->insertBefore($tag->domNode, $topParent->nextSibling); $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
} }
} }
/**
* Splits the given $parentNode at the location of the $domNode within it.
* Attempts replicate the original $parentNode, moving some of their parent
* children in where needed, before adding the $domNode between.
*/
protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
{ {
$children = [...$parentNode->childNodes]; $children = [...$parentNode->childNodes];
$splitPos = array_search($domNode, $children, true) ?: count($children); $splitPos = array_search($domNode, $children, true);
if ($splitPos === false) {
$splitPos = count($children) - 1;
}
$parentClone = $parentNode->cloneNode(); $parentClone = $parentNode->cloneNode();
$parentNode->parentNode->insertBefore($parentClone, $parentNode);
$parentClone->removeAttribute('id'); $parentClone->removeAttribute('id');
/** @var DOMNode $child */ /** @var DOMNode $child */
for ($i = 0; $i < $splitPos; $i++) { for ($i = 0; $i < $splitPos; $i++) {
$child = $children[0]; $child = $children[$i];
$parentClone->appendChild($child); $parentClone->appendChild($child);
} }
if ($parentClone->hasChildNodes()) {
$parentNode->parentNode->insertBefore($parentClone, $parentNode);
}
$parentNode->parentNode->insertBefore($domNode, $parentNode); $parentNode->parentNode->insertBefore($domNode, $parentNode);
$parentClone->normalize(); $this->toCleanup[] = $parentNode;
$parentNode->normalize(); $this->toCleanup[] = $parentClone;
if (!$parentNode->hasChildNodes()) { }
$parentNode->remove();
} /**
if (!$parentClone->hasChildNodes()) { * Get the parent paragraph of the given node, if existing.
$parentClone->remove(); */
protected function getParentParagraph(DOMNode $parent): ?DOMNode
{
do {
if (strtolower($parent->nodeName) === 'p') {
return $parent;
}
$parent = $parent->parentElement;
} while ($parent !== null);
return null;
}
/**
* Cleanup after a parse operation.
* Removes stranded elements we may have left during the parse.
*/
protected function cleanup(): void
{
foreach ($this->toCleanup as $element) {
$element->normalize();
if ($element->parentNode && !$element->hasChildNodes()) {
$element->parentNode->removeChild($element);
}
} }
} }
} }

View file

@ -34,6 +34,15 @@ class PageIncludeParserTest extends TestCase
); );
} }
public function test_complex_inline_text_within_other_text()
{
$this->runParserTest(
'<p>Hello {{@45#content}}there!</p>',
['45' => '<p id="content"><strong>Testing</strong> with<em>some</em><i>extra</i>tags</p>'],
'<p>Hello <strong>Testing</strong> with<em>some</em><i>extra</i>tagsthere!</p>',
);
}
public function test_block_content_types() public function test_block_content_types()
{ {
$inputs = [ $inputs = [
@ -97,6 +106,51 @@ class PageIncludeParserTest extends TestCase
); );
} }
public function test_block_content_in_allowable_parent_element()
{
$this->runParserTest(
'<div>{{@45#content}}</div>',
['45' => '<pre id="content">doggos</pre>'],
'<div><pre id="content">doggos</pre></div>',
);
}
public function test_block_content_in_paragraph_origin_with_allowable_grandparent()
{
$this->runParserTest(
'<div><p>{{@45#content}}</p></div>',
['45' => '<pre id="content">doggos</pre>'],
'<div><pre id="content">doggos</pre></div>',
);
}
public function test_block_content_in_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
{
$this->runParserTest(
'<div><p>Cute {{@45#content}} over there!</p></div>',
['45' => '<pre id="content">doggos</pre>'],
'<div><p>Cute </p><pre id="content">doggos</pre><p> over there!</p></div>',
);
}
public function test_block_content_in_child_within_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
{
$this->runParserTest(
'<div><p><strong>Cute {{@45#content}} over there!</strong></p></div>',
['45' => '<pre id="content">doggos</pre>'],
'<div><pre id="content">doggos</pre><p><strong>Cute over there!</strong></p></div>',
);
}
public function test_block_content_in_paragraph_origin_within_details()
{
$this->runParserTest(
'<details><p>{{@45#content}}</p></details>',
['45' => '<pre id="content">doggos</pre>'],
'<details><pre id="content">doggos</pre></details>',
);
}
public function test_simple_whole_document() public function test_simple_whole_document()
{ {
$this->runParserTest( $this->runParserTest(
@ -124,6 +178,42 @@ class PageIncludeParserTest extends TestCase
); );
} }
public function test_multiple_tags_in_same_origin_with_inline_content()
{
$this->runParserTest(
'<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
['45' => '<p id="content">inline</p>'],
'<p>This inlineinline content is inline</p>',
);
}
public function test_multiple_tags_in_same_origin_with_block_content()
{
$this->runParserTest(
'<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
['45' => '<pre id="content">block</pre>'],
'<p>This </p><pre id="content">block</pre><pre id="content">block</pre><p> content is </p><pre id="content">block</pre>',
);
}
public function test_multiple_tags_in_differing_origin_levels_with_block_content()
{
$this->runParserTest(
'<div><p>This <strong>{{@45#content}}</strong> content is {{@45#content}}</p>{{@45#content}}</div>',
['45' => '<pre id="content">block</pre>'],
'<div><pre id="content">block</pre><p>This <strong></strong> content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
);
}
public function test_multiple_tags_in_shallow_origin_with_multi_block_content()
{
$this->runParserTest(
'<p>{{@45}}C{{@45}}</p><div>{{@45}}{{@45}}</div>',
['45' => '<p>A</p><p>B</p>'],
'<p>A</p><p>B</p><p>C</p><p>A</p><p>B</p><div><p>A</p><p>B</p><p>A</p><p>B</p></div>',
);
}
protected function runParserTest(string $html, array $contentById, string $expected) protected function runParserTest(string $html, array $contentById, string $expected)
{ {
$parser = new PageIncludeParser($html, function (int $id) use ($contentById) { $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {