MM-46943 Parse tables directly following text and remove remaining TODOs

mattermost · Oct 28, 2024 · bdd88ab · bdd88ab
1 parent 2148a1a
commit bdd88ab
Show file tree

Hide file tree

Showing 5 changed files with 144 additions and 30 deletions.
diff --git a/dist/commonmark.js b/dist/commonmark.js
@@ -14863,17 +14863,17 @@
             const delimiterCells = parseTableRow(parser.currentLine, parser.nextNonspace);
             if (!delimiterCells || !validateDelimiterRow(delimiterCells)) {
                 // The second line of the paragraph isn't a table row, so this paragraph isn't actually a table
-                container._tableVisited = true;
                 return 0;
             }
 
-            // The previous line of text that we've read is stored as the paragraph's _string_content, and that will
-            // contain the header row if this is actually a table
-            const headerCharacters = container._string_content.indexOf("\n"); // TODO this should probably be returned explicilty by parseTableRow
-            const headerCells = parseTableRow(container._string_content.substring(0, container._string_content.indexOf("\n")), 0);
+            // container._string_content contains everything in the paragraph so far including a trailing newline, but
+            // we only want to check the last line of it for the header row
+            const lastLineMatch = matchLastLine(container._string_content);
+            const lastLine = lastLineMatch[1];
+
+            const headerCells = parseTableRow(lastLine, 0);
             if (!headerCells) {
                 // The first line isn't a header row, so this isn't a table
-                container._tableVisited = true;
                 return 0;
             }
 
@@ -14891,17 +14891,36 @@
                 return 0;
             }
 
-            // TODO try inserting header paragraph to match GitHub letting you break from a paragraph into a table with only one newline
+            // If there's any text before lastLine, then there's text before the table that we need to re-add as a
+            // paragraph before the table
+            if (lastLineMatch.index > 0) {
+                // Create the new paragraph node based on where we found the header row
+                const textBeforeTable = parser.tip._string_content.substring(0, lastLineMatch.index);
+                const lastLineBeforeTable = matchLastLine(textBeforeTable);
+
+                const newParagraph = new Node("paragraph", [
+                    parser.tip.sourcepos[0],
+                    [parser.lineNumber - 2, lastLineBeforeTable.length],
+                ]);
+                newParagraph._string_content = textBeforeTable;
+
+                // Update the parser.tip which is now the table with its new position
+                parser.tip._string_content = parser.tip._string_content.substring(lastLineMatch.index);
+                parser.tip._sourcepos[0] = [parser.lineNumber - 1, 0];
+
+                // Add the paragraph before the table
+                parser.tip.insertBefore(newParagraph);
+            }
 
             // Store the alignments of the columns and then skip the delimiter line since we've
             // gotten what we need from it
             parser.tip.alignColumns = delimiterCells.map(getCellAlignment);
 
             const headerRow = new Node("table_row", [
                 [parser.lineNumber - 1, parser.offset + 1],
-                [parser.lineNumber - 1, parser.offset + headerCharacters],
+                [parser.lineNumber - 1, parser.offset + lastLine.length],
             ]);
-            headerRow._string_content = container._string_content.substring(0, headerCharacters);
+            headerRow._string_content = container._string_content.substring(0, lastLine.length);
             headerRow._isHeading = true;
 
             for (let i = 0; i < headerCells.length; i++) {
@@ -15006,9 +15025,8 @@
                 // We've read the last cell, so check if we've reached the end of the row
                 const rowEndLength = scanTableRowEnd(line, offset);
 
-                // TODO there's something in the GH code about the row being part of a preceding paragraph which we may not
-                // need. I think it has to do with GH's behaviour where there's no double-newline needed between a
-                // paragraph and a table which we don't support in Marked or Commonmark.
+                // Unlike cmark-gfm, we don't need to try again on the next line because we only call this function with
+                // one line at a time
 
                 if (rowEndLength === -1) ; else {
                     offset += rowEndLength;
@@ -15090,6 +15108,10 @@
         return str.replace("\\|", "|");
     };
 
+    const matchLastLine = function(str) {
+        return (/([^\n]*)\n$/).exec(str);
+    };
+
     var getCellAlignment = function(cell) {
         const cellContents = cell.contents.trim();
 

diff --git a/dist/commonmark.min.js b/dist/commonmark.min.js
diff --git a/lib/blocks.js b/lib/blocks.js
@@ -784,17 +784,17 @@ var blockStarts = [
         const delimiterCells = parseTableRow(parser.currentLine, parser.nextNonspace);
         if (!delimiterCells || !validateDelimiterRow(delimiterCells)) {
             // The second line of the paragraph isn't a table row, so this paragraph isn't actually a table
-            container._tableVisited = true;
             return 0;
         }
 
-        // The previous line of text that we've read is stored as the paragraph's _string_content, and that will
-        // contain the header row if this is actually a table
-        const headerCharacters = container._string_content.indexOf("\n"); // TODO this should probably be returned explicilty by parseTableRow
-        const headerCells = parseTableRow(container._string_content.substring(0, container._string_content.indexOf("\n")), 0);
+        // container._string_content contains everything in the paragraph so far including a trailing newline, but
+        // we only want to check the last line of it for the header row
+        const lastLineMatch = matchLastLine(container._string_content);
+        const lastLine = lastLineMatch[1];
+
+        const headerCells = parseTableRow(lastLine, 0);
         if (!headerCells) {
             // The first line isn't a header row, so this isn't a table
-            container._tableVisited = true;
             return 0;
         }
 
@@ -812,17 +812,36 @@ var blockStarts = [
             return 0;
         }
 
-        // TODO try inserting header paragraph to match GitHub letting you break from a paragraph into a table with only one newline
+        // If there's any text before lastLine, then there's text before the table that we need to re-add as a
+        // paragraph before the table
+        if (lastLineMatch.index > 0) {
+            // Create the new paragraph node based on where we found the header row
+            const textBeforeTable = parser.tip._string_content.substring(0, lastLineMatch.index);
+            const lastLineBeforeTable = matchLastLine(textBeforeTable);
+
+            const newParagraph = new Node("paragraph", [
+                parser.tip.sourcepos[0],
+                [parser.lineNumber - 2, lastLineBeforeTable.length],
+            ]);
+            newParagraph._string_content = textBeforeTable;
+
+            // Update the parser.tip which is now the table with its new position
+            parser.tip._string_content = parser.tip._string_content.substring(lastLineMatch.index);
+            parser.tip._sourcepos[0] = [parser.lineNumber - 1, 0];
+
+            // Add the paragraph before the table
+            parser.tip.insertBefore(newParagraph);
+        }
 
         // Store the alignments of the columns and then skip the delimiter line since we've
         // gotten what we need from it
         parser.tip.alignColumns = delimiterCells.map(getCellAlignment);
 
         const headerRow = new Node("table_row", [
             [parser.lineNumber - 1, parser.offset + 1],
-            [parser.lineNumber - 1, parser.offset + headerCharacters],
+            [parser.lineNumber - 1, parser.offset + lastLine.length],
         ]);
-        headerRow._string_content = container._string_content.substring(0, headerCharacters);
+        headerRow._string_content = container._string_content.substring(0, lastLine.length);
         headerRow._isHeading = true;
 
         for (let i = 0; i < headerCells.length; i++) {
@@ -927,9 +946,8 @@ const parseTableRow = function(line, startAt) {
             // We've read the last cell, so check if we've reached the end of the row
             const rowEndLength = scanTableRowEnd(line, offset);
 
-            // TODO there's something in the GH code about the row being part of a preceding paragraph which we may not
-            // need. I think it has to do with GH's behaviour where there's no double-newline needed between a
-            // paragraph and a table which we don't support in Marked or Commonmark.
+            // Unlike cmark-gfm, we don't need to try again on the next line because we only call this function with
+            // one line at a time
 
             if (rowEndLength === -1) {
                 // There's more text after this, so this isn't a table row
@@ -1011,7 +1029,11 @@ const validateDelimiterRow = function(cells) {
 
 const unescapePipes = function(str) {
     return str.replace("\\|", "|");
-}
+};
+
+const matchLastLine = function(str) {
+    return (/([^\n]*)\n$/).exec(str);
+};
 
 var getCellAlignment = function(cell) {
     const cellContents = cell.contents.trim();

diff --git a/test/gfm_extensions.txt b/test/gfm_extensions.txt
@@ -1,6 +1,6 @@
 The following is a section of
 https://github.com/github/cmark-gfm/blob/587a12bb54d95ac37241377e6ddc93ea0e45439b/test/extensions.txt
-with everything not related to tables taken out. Any other changes are specifically noted.
+with everything not related to tables taken out.
 
 ---
 title: Extensions test
@@ -478,9 +478,7 @@ Here's a link to [Freedom Planet 2][].
 
 ### a table can be recognised when separated from a paragraph of text without an empty line
 
-@hmhealey We differ from GitHub here, so I've skipped this one for the time being.
-
-```````````````````````````````` skipped
+```````````````````````````````` example
 123
 456
 | a | b |

diff --git a/test/tables.txt b/test/tables.txt
@@ -1677,3 +1677,75 @@ some text
 <p>some text</p>
 <p>|</p>
 ````````````````````````````````
+
+### Table directly after a text in a list item (MM-46943)
+
+```````````````````````````````` example
+- See table below
+  | First Header  | Second Header |
+  | ------------- | ------------- |
+  | Content Cell  | Content Cell |
+  | Content Cell  | Content Cell |
+- Second item
+.
+<ul>
+<li>See table below
+<table>
+<thead>
+<tr>
+<th>First Header</th>
+<th>Second Header</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Content Cell</td>
+<td>Content Cell</td>
+</tr>
+<tr>
+<td>Content Cell</td>
+<td>Content Cell</td>
+</tr>
+</tbody>
+</table>
+</li>
+<li>Second item</li>
+</ul>
+````````````````````````````````
+
+```````````````````````````````` example
+- See table below
+
+  | First Header  | Second Header |
+  | ------------- | ------------- |
+  | Content Cell  | Content Cell |
+  | Content Cell  | Content Cell |
+- Second item
+.
+<ul>
+<li>
+<p>See table below</p>
+<table>
+<thead>
+<tr>
+<th>First Header</th>
+<th>Second Header</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Content Cell</td>
+<td>Content Cell</td>
+</tr>
+<tr>
+<td>Content Cell</td>
+<td>Content Cell</td>
+</tr>
+</tbody>
+</table>
+</li>
+<li>
+<p>Second item</p>
+</li>
+</ul>
+````````````````````````````````