From c88eb729a499197e8b3ab9d5019b4426a65d3d41 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Fri, 24 Nov 2023 23:39:16 +0000
Subject: [PATCH] Includes: Added block-level handling to new include system

Implements block promoting to body (including position choosing based
upon likely tag position within parent) and block splitting where we're
only a single depth down from the body child.
---
 app/Entities/Tools/PageIncludeParser.php | 111 ++++++++++++++++-------
 app/Util/HtmlDocument.php                |  15 ---
 tests/Unit/PageIncludeParserTest.php     |  96 +++++++++++++++++++-
 3 files changed, 172 insertions(+), 50 deletions(-)

diff --git a/app/Entities/Tools/PageIncludeParser.php b/app/Entities/Tools/PageIncludeParser.php
index 070b0cc11..5ce847d6c 100644
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@@ -4,6 +4,8 @@ namespace BookStack\Entities\Tools;
 
 use BookStack\Util\HtmlDocument;
 use Closure;
+use DOMDocument;
+use DOMElement;
 use DOMNode;
 use DOMText;
 
@@ -22,48 +24,26 @@ class PageIncludeParser
         $doc = new HtmlDocument($this->pageHtml);
 
         $tags = $this->locateAndIsolateIncludeTags($doc);
+        $topLevel = [...$doc->getBodyChildren()];
 
         foreach ($tags as $tag) {
             $htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
             $content = new PageIncludeContent($htmlContent, $tag);
 
-            if ($content->isInline()) {
-                $adopted = $doc->adoptNodes($content->toDomNodes());
-                foreach ($adopted as $adoptedContentNode) {
-                    $tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
+            if (!$content->isInline()) {
+                $isParentTopLevel = in_array($tag->domNode->parentNode, $topLevel, true);
+                if ($isParentTopLevel) {
+                    $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
+                } else {
+                    $this->promoteTagNodeToBody($tag, $doc->getBody());
                 }
-                $tag->domNode->parentNode->removeChild($tag->domNode);
-                continue;
             }
 
-            // TODO - Non-inline
+            $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes());
         }
 
-        // TODO:
-        // Hunt down the specific text nodes with matches
-        // Split out tag text node from rest of content
-        // Fetch tag content->
-          // If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty
-          // If inline: Replace current text node with new text or elem
-        // !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content
-        //     since we could have a range of inline elements
-
-        // [Promote to top-block]
-        // Tricky operation.
-        // Can throw in before or after current top-block depending on relative position
-        // Could [Split] top-block but complex past a single level depth.
-        // Maybe [Split] if one level depth, otherwise default to before/after block
-        // Should work for the vast majority of cases, and not for those which would
-        // technically be invalid in-editor anyway.
-
-        // [Split]
-        // Copy original top-block node type and attrs (apart from ID)
-        // Move nodes after promoted tag-node into copy
-        // Insert copy after original (after promoted top-block eventually)
-
-        // Notes: May want to eventually parse through backwards, which should avoid issues
-        // in changes affecting the next tag, where tags may be in the same/adjacent nodes.
-
+        // TODO Notes: May want to eventually parse through backwards, which should avoid issues
+        //   in changes affecting the next tag, where tags may be in the same/adjacent nodes.
 
         return $doc->getBodyInnerHtml();
     }
@@ -125,4 +105,71 @@ class PageIncludeParser
 
         return $includeTags;
     }
+
+    /**
+     * @param DOMNode[] $replacements
+     */
+    protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
+    {
+        /** @var DOMDocument $targetDoc */
+        $targetDoc = $toReplace->ownerDocument;
+
+        foreach ($replacements as $replacement) {
+            if ($replacement->ownerDocument !== $targetDoc) {
+                $replacement = $targetDoc->adoptNode($replacement);
+            }
+
+            $toReplace->parentNode->insertBefore($replacement, $toReplace);
+        }
+
+        $toReplace->parentNode->removeChild($toReplace);
+    }
+
+    protected function promoteTagNodeToBody(PageIncludeTag $tag, DOMNode $body): void
+    {
+        /** @var DOMNode $topParent */
+        $topParent = $tag->domNode->parentNode;
+        while ($topParent->parentNode !== $body) {
+            $topParent = $topParent->parentNode;
+        }
+
+        $parentText = $topParent->textContent;
+        $tagPos = strpos($parentText, $tag->tagContent);
+        $before = $tagPos < (strlen($parentText) / 2);
+
+        if ($before) {
+            $body->insertBefore($tag->domNode, $topParent);
+        } else {
+            $body->insertBefore($tag->domNode, $topParent->nextSibling);
+        }
+    }
+
+    protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
+    {
+        $children = [...$parentNode->childNodes];
+        $splitPos = array_search($domNode, $children, true) ?: count($children);
+        $parentClone = $parentNode->cloneNode();
+        $parentClone->removeAttribute('id');
+
+        /** @var DOMNode $child */
+        for ($i = 0; $i < $splitPos; $i++) {
+            $child = $children[0];
+            $parentClone->appendChild($child);
+        }
+
+        if ($parentClone->hasChildNodes()) {
+            $parentNode->parentNode->insertBefore($parentClone, $parentNode);
+        }
+
+        $parentNode->parentNode->insertBefore($domNode, $parentNode);
+
+        $parentClone->normalize();
+        $parentNode->normalize();
+        if (!$parentNode->hasChildNodes()) {
+            $parentNode->remove();
+        }
+        if (!$parentClone->hasChildNodes()) {
+            $parentClone->remove();
+        }
+    }
 }
diff --git a/app/Util/HtmlDocument.php b/app/Util/HtmlDocument.php
index ad5dacd82..b8c53d439 100644
--- a/app/Util/HtmlDocument.php
+++ b/app/Util/HtmlDocument.php
@@ -149,19 +149,4 @@ class HtmlDocument
     {
         return $this->document->saveHTML($node);
     }
-
-    /**
-     * Adopt the given nodes into this document.
-     * @param DOMNode[] $nodes
-     * @return DOMNode[]
-     */
-    public function adoptNodes(array $nodes): array
-    {
-        $adopted = [];
-        foreach ($nodes as $node) {
-            $adopted[] = $this->document->importNode($node, true);
-        }
-
-        return $adopted;
-    }
 }
diff --git a/tests/Unit/PageIncludeParserTest.php b/tests/Unit/PageIncludeParserTest.php
index d1912270e..e0bd69e93 100644
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@@ -7,7 +7,7 @@ use Tests\TestCase;
 
 class PageIncludeParserTest extends TestCase
 {
-    public function test_include_simple_inline_text()
+    public function test_simple_inline_text()
     {
         $this->runParserTest(
             '<p>{{@45#content}}</p>',
@@ -16,7 +16,7 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
-    public function test_include_simple_inline_text_with_existing_siblings()
+    public function test_simple_inline_text_with_existing_siblings()
     {
         $this->runParserTest(
             '<p>{{@45#content}} <strong>Hi</strong>there!</p>',
@@ -25,7 +25,7 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
-    public function test_include_simple_inline_text_within_other_text()
+    public function test_simple_inline_text_within_other_text()
     {
         $this->runParserTest(
             '<p>Hello {{@45#content}}there!</p>',
@@ -34,6 +34,96 @@ class PageIncludeParserTest extends TestCase
         );
     }
 
+    public function test_block_content_types()
+    {
+        $inputs = [
+            '<table id="content"><td>Text</td></table>',
+            '<ul id="content"><li>Item A</li></ul>',
+            '<ol id="content"><li>Item A</li></ol>',
+            '<pre id="content">Code</pre>',
+        ];
+
+        foreach ($inputs as $input) {
+            $this->runParserTest(
+                '<p>A{{@45#content}}B</p>',
+                ['45' => $input],
+                '<p>A</p>' . $input . '<p>B</p>',
+            );
+        }
+    }
+
+    public function test_block_content_nested_origin_gets_placed_before()
+    {
+        $this->runParserTest(
+            '<p><strong>A {{@45#content}} there!</strong></p>',
+            ['45' => '<pre id="content">Testing</pre>'],
+            '<pre id="content">Testing</pre><p><strong>A  there!</strong></p>',
+        );
+    }
+
+    public function test_block_content_nested_origin_gets_placed_after()
+    {
+        $this->runParserTest(
+            '<p><strong>Some really good {{@45#content}} there!</strong></p>',
+            ['45' => '<pre id="content">Testing</pre>'],
+            '<p><strong>Some really good  there!</strong></p><pre id="content">Testing</pre>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_gets_split()
+    {
+        $this->runParserTest(
+            '<p>Some really good {{@45#content}} there!</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<p>Some really good </p><pre id="content">doggos</pre><p> there!</p>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_split_does_not_duplicate_id()
+    {
+        $this->runParserTest(
+            '<p id="test" title="Hi">Some really good {{@45#content}} there!</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<p title="Hi">Some really good </p><pre id="content">doggos</pre><p id="test" title="Hi"> there!</p>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_does_not_leave_empty_nodes()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}}</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<pre id="content">doggos</pre>',
+        );
+    }
+
+    public function test_simple_whole_document()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p id="content">Testing</p>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}</p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>Testing</p><blockquote>This</blockquote>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document_with_shared_content_origin()
+    {
+        $this->runParserTest(
+            '<p>This is {{@45}} some text</p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>This is </p><p>Testing</p><blockquote>This</blockquote><p> some text</p>',
+        );
+    }
+
     protected function runParserTest(string $html, array $contentById, string $expected)
     {
         $parser = new PageIncludeParser($html, function (int $id) use ($contentById) {