Skip to content

Commit 2003e01

Browse files
author
David Kutugata
authored
Fix HTML links in markdown (#12395)
* try string manipulation, katex and markdown-it * try to use markdown-it-latex * try to use latex2html5 * clean up latexManipulartion.ts and cellOutput.tsx * remove unused changes * add tests * rename latexManipulation to markdownManipulation * add news file and rename the test file * update package-lock * added one more test and a try catch * separate the fix of latex and links into different functions, and add comments to the fixLinks function
1 parent 29e844c commit 2003e01

8 files changed

Lines changed: 235 additions & 118 deletions

File tree

news/2 Fixes/11254.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
In Markdown cells, turn HTML links to markdown links so that nteract renders them.

package-lock.json

Lines changed: 149 additions & 91 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3168,7 +3168,7 @@
31683168
"@nteract/transform-model-debug": "^3.2.3",
31693169
"@nteract/transform-plotly": "^6.0.0",
31703170
"@nteract/transform-vega": "^6.0.3",
3171-
"@nteract/transforms": "^4.4.4",
3171+
"@nteract/transforms": "^4.4.7",
31723172
"@phosphor/widgets": "^1.9.3",
31733173
"@raghb1/node-memwatch": "^3.0.1",
31743174
"@testing-library/react": "^9.4.0",

src/datascience-ui/interactive-common/cellOutput.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ import { WidgetManager } from '../ipywidgets';
1414
import { Image, ImageName } from '../react-common/image';
1515
import { ImageButton } from '../react-common/imageButton';
1616
import { getLocString } from '../react-common/locReactSide';
17-
import { fixLatexEquations } from './latexManipulation';
1817
import { ICellViewModel } from './mainState';
18+
import { fixMarkdown } from './markdownManipulation';
1919
import { getRichestMimetype, getTransform, isIPyWidgetOutput, isMimeTypeSupported } from './transforms';
2020

2121
// tslint:disable-next-line: no-var-requires no-require-imports
@@ -290,7 +290,7 @@ export class CellOutput extends React.Component<ICellOutputProps> {
290290
private renderMarkdownOutputs = () => {
291291
const markdown = this.getMarkdownCell();
292292
// React-markdown expects that the source is a string
293-
const source = fixLatexEquations(concatMultilineStringInput(markdown.source));
293+
const source = fixMarkdown(concatMultilineStringInput(markdown.source));
294294
const Transform = getTransform('text/markdown');
295295
const MarkdownClassName = 'markdown-cell-output';
296296

@@ -378,7 +378,7 @@ export class CellOutput extends React.Component<ICellOutputProps> {
378378

379379
// Fixup latex to make sure it has the requisite $$ around it
380380
if (mimeType === 'text/latex') {
381-
data = fixLatexEquations(concatMultilineStringOutput(data as nbformat.MultilineString), true);
381+
data = fixMarkdown(concatMultilineStringOutput(data as nbformat.MultilineString), true);
382382
}
383383

384384
return {

src/datascience-ui/interactive-common/latexManipulation.ts renamed to src/datascience-ui/interactive-common/markdownManipulation.ts

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
// tslint:disable-next-line:no-require-imports no-var-requires
55
const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/escapeRegExp');
66

7+
export function fixMarkdown(input: string, wrapSingles: boolean = false): string {
8+
const latexFixed = fixLatex(input, wrapSingles);
9+
10+
try {
11+
return fixLinks(latexFixed);
12+
} catch {
13+
return latexFixed;
14+
}
15+
}
16+
717
// Adds '$$' to latex formulas that don't have a '$', allowing users to input the formula directly.
818
//
919
// The general algorithm here is:
@@ -13,9 +23,10 @@ const _escapeRegExp = require('lodash/escapeRegExp') as typeof import('lodash/es
1323
//
1424
// LaTeX seems to follow the pattern of \begin{name} or is escaped with $$ or $. See here for a bunch of examples:
1525
// https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Typesetting%20Equations.html
16-
export function fixLatexEquations(input: string, wrapSingles: boolean = false): string {
26+
export function fixLatex(input: string, wrapSingles: boolean = false): string {
1727
const output: string[] = [];
1828

29+
// change latex
1930
// Search for begin/end pairs, outputting as we go
2031
let start = 0;
2132

@@ -94,5 +105,41 @@ export function fixLatexEquations(input: string, wrapSingles: boolean = false):
94105
start = input.length;
95106
}
96107
}
108+
97109
return output.join('');
98110
}
111+
112+
// Look for HTML 'A' tags to replace them with the Markdown format
113+
export function fixLinks(input: string): string {
114+
let linkStartIndex = input.indexOf('<a');
115+
while (linkStartIndex !== -1) {
116+
const linkEnd = '</a>';
117+
const linkEndIndex = input.indexOf(linkEnd, linkStartIndex);
118+
119+
if (linkEndIndex !== -1) {
120+
const hferIndex = input.indexOf('href', linkStartIndex);
121+
122+
const quoteSearch1 = input.indexOf("'", hferIndex);
123+
const urlStartIndex = quoteSearch1 === -1 ? input.indexOf('"', hferIndex) : quoteSearch1;
124+
125+
const quoteSearch2 = input.indexOf("'", urlStartIndex + 1);
126+
const urlEndIndex = quoteSearch2 === -1 ? input.indexOf('"', urlStartIndex + 1) : quoteSearch2;
127+
128+
const url = input.substring(urlStartIndex + 1, urlEndIndex);
129+
130+
const textStartIndex = input.indexOf('>', linkStartIndex);
131+
132+
if (textStartIndex < linkEndIndex) {
133+
const text = input.substring(textStartIndex + 1, linkEndIndex);
134+
input = input.replace(
135+
input.substring(linkStartIndex, linkEndIndex + linkEnd.length),
136+
`[${text}](${url})`
137+
);
138+
}
139+
}
140+
141+
linkStartIndex = input.indexOf('<a', linkStartIndex + 1);
142+
}
143+
144+
return input;
145+
}

src/datascience-ui/renderers/render.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import type { nbformat } from '@jupyterlab/coreutils';
77
import type { JSONObject } from '@phosphor/coreutils';
88
import * as React from 'react';
99
import { concatMultilineStringOutput } from '../common';
10-
import { fixLatexEquations } from '../interactive-common/latexManipulation';
10+
import { fixMarkdown } from '../interactive-common/markdownManipulation';
1111
import { getTransform } from '../interactive-common/transforms';
1212

1313
export interface ICellOutputProps {
@@ -26,7 +26,7 @@ export class CellOutput extends React.Component<ICellOutputProps> {
2626

2727
// Fixup latex to make sure it has the requisite $$ around it
2828
if (this.props.mimeType! === 'text/latex') {
29-
data = fixLatexEquations(concatMultilineStringOutput(data as nbformat.MultilineString), true);
29+
data = fixMarkdown(concatMultilineStringOutput(data as nbformat.MultilineString), true);
3030
}
3131

3232
const Transform = getTransform(this.props.mimeType!);

src/test/datascience/latexManipulation.unit.test.ts renamed to src/test/datascience/markdownManipulation.unit.test.ts

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
// Licensed under the MIT License.
33
'use strict';
44
import { expect } from 'chai';
5-
import { fixLatexEquations } from '../../datascience-ui/interactive-common/latexManipulation';
5+
import { fixMarkdown } from '../../datascience-ui/interactive-common/markdownManipulation';
66

77
// tslint:disable: max-func-body-length
8-
suite('Data Science - LaTeX Manipulation', () => {
8+
suite('Data Science - Markdown Manipulation', () => {
99
const markdown1 = `\\begin{align}
1010
\\nabla \\cdot \\vec{\\mathbf{E}} & = 4 \\pi \\rho \\\\
1111
\\nabla \\times \\vec{\\mathbf{E}}\\, +\\, \\frac1c\\, \\frac{\\partial\\vec{\\mathbf{B}}}{\\partial t} & = \\vec{\\mathbf{0}} \\\\
@@ -225,47 +225,65 @@ X^TX\\omega = X^TT
225225
$$`;
226226

227227
test("Latex - Equations don't have $$", () => {
228-
const result = fixLatexEquations(markdown1);
228+
const result = fixMarkdown(markdown1);
229229
expect(result).to.be.equal(output1, 'Result is incorrect');
230230
});
231231

232232
test('Latex - Equations have $', () => {
233-
const result = fixLatexEquations(markdown2);
233+
const result = fixMarkdown(markdown2);
234234
expect(result).to.be.equal(markdown2, 'Result is incorrect');
235235
});
236236

237237
test("Latex - Multiple equations don't have $$", () => {
238-
const result = fixLatexEquations(markdown3);
238+
const result = fixMarkdown(markdown3);
239239
expect(result).to.be.equal(output3, 'Result is incorrect');
240240
});
241241

242242
test('Latex - All on the same line', () => {
243243
const line = '\\begin{matrix}1 & 0\\0 & 1\\end{matrix}';
244244
const after = '\n$$\n\\begin{matrix}1 & 0\\0 & 1\\end{matrix}\n$$\n';
245-
const result = fixLatexEquations(line);
245+
const result = fixMarkdown(line);
246246
expect(result).to.be.equal(after, 'Result is incorrect');
247247
});
248248

249249
test('Latex - Invalid', () => {
250250
const invalid = '\n\\begin{eq*}do stuff\\end{eq}';
251-
const result = fixLatexEquations(invalid);
251+
const result = fixMarkdown(invalid);
252252
expect(result).to.be.equal(invalid, 'Result should not have changed');
253253
});
254254

255255
test('Latex - $$ already present', () => {
256-
const result = fixLatexEquations(markdown4);
256+
const result = fixMarkdown(markdown4);
257257
expect(result).to.be.equal(markdown4, 'Result should not have changed');
258258
});
259259

260260
test('Latex - Multiple types', () => {
261-
const result = fixLatexEquations(markdown5);
261+
const result = fixMarkdown(markdown5);
262262
expect(result).to.be.equal(output5, 'Result is incorrect');
263263
});
264264

265265
test('Latex - Multiple /begins inside $$', () => {
266-
const result = fixLatexEquations(markdown6, true);
266+
const result = fixMarkdown(markdown6, true);
267267
expect(result).to.be.equal(output6, 'Result is incorrect');
268-
const result2 = fixLatexEquations(markdown6, false);
268+
const result2 = fixMarkdown(markdown6, false);
269269
expect(result2).to.be.equal(output6_nonSingle, 'Result is incorrect');
270270
});
271+
272+
test('Links - Change HTML links to Markdown links', () => {
273+
// tag with single quotes
274+
const result = fixMarkdown(`<a href='https://aka.ms/AA8dqti'>link</a>`);
275+
expect(result).to.be.equal(`[link](https://aka.ms/AA8dqti)`, 'Result is incorrect');
276+
277+
// tag with double quotes
278+
const result2 = fixMarkdown(`<a href="https://aka.ms/AA8dqti">link <a</a>`);
279+
expect(result2).to.be.equal(`[link <a](https://aka.ms/AA8dqti)`, 'Result is incorrect');
280+
281+
// tag with space in href and two endings
282+
const result3 = fixMarkdown(`<a href = "https://aka.ms/AA8dqti">link </a></a>`);
283+
expect(result3).to.be.equal(`[link ](https://aka.ms/AA8dqti)</a>`, 'Result is incorrect');
284+
285+
// mal formed tag
286+
const result4 = fixMarkdown(`<a href = "https://aka.ms/AA8dqti" link </a></a>`);
287+
expect(result4).to.be.equal(`<a href = "https://aka.ms/AA8dqti" link </a></a>`, 'Result is incorrect');
288+
});
271289
});

tsconfig.json

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,12 @@
22
"compilerOptions": {
33
"baseUrl": ".",
44
"paths": {
5-
"*": [
6-
"types/*"
7-
]
5+
"*": ["types/*"]
86
},
97
"module": "commonjs",
108
"target": "es2018",
119
"outDir": "out",
12-
"lib": [
13-
"es6",
14-
"es2018",
15-
"dom",
16-
"ES2019"
17-
],
10+
"lib": ["es6", "es2018", "dom", "ES2019"],
1811
"jsx": "react",
1912
"sourceMap": true,
2013
"rootDir": "src",

0 commit comments

Comments
 (0)