marktext/src/editor/utils/importMarkdown.js

374 lines
13 KiB
JavaScript

/**
* translate markdown format to content state used by Mark Text
* there is some difference when parse loose list item and tight lsit item.
* Both of them add a p block in li block, use the CSS style to distinguish loose and tight.
*/
import parse5 from 'parse5'
import TurndownService from 'turndown'
import marked from '../parser/marked'
import ExportMarkdown from './exportMarkdown'
// To be disabled rules when parse markdown, Because content state don't need to parse inline rules
import { turndownConfig, CLASS_OR_ID, CURSOR_DNA, TABLE_TOOLS, BLOCK_TYPE7, LINE_BREAK } from '../config'
const turndownPluginGfm = require('turndown-plugin-gfm')
const LINE_BREAKS_REG = /\n/
// turn html to markdown
const turndownService = new TurndownService(turndownConfig)
const gfm = turndownPluginGfm.gfm
// Use the gfm plugin
turndownService.use(gfm)
// because the strikethrough rule in gfm is single `~`, So need rewrite the strikethrough rule.
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~~' + content + '~~'
}
})
// handle `soft line break` and `hard line break`
// add `LINE_BREAK` to the end of soft line break and hard line break.
turndownService.addRule('lineBreak', {
filter (node, options) {
return node.nodeName === 'SPAN' && node.classList.contains(CLASS_OR_ID['AG_LINE']) && node.nextElementSibling
},
replacement (content, node, options) {
return content + LINE_BREAK
}
})
// remove `\` in emoji text when paste
turndownService.addRule('normalEmoji', {
filter (node, options) {
return node.nodeName === 'SPAN' &&
node.classList.contains(CLASS_OR_ID['AG_EMOJI_MARKED_TEXT'])
},
replacement (content, node, options) {
return content.replace(/\\/g, '')
}
})
const checkIsHTML = value => {
const trimedValue = value.trim()
const match = /^<([a-zA-Z\d-]+)(?=\s|>).*>/.exec(trimedValue)
if (match && match[1]) {
const tag = match[1]
if (BLOCK_TYPE7.indexOf(tag) > -1) {
return /^<([a-zA-Z\d-]+)(?=\s|>).*>\n/.test(trimedValue)
}
return true
}
return false
}
const chopHTML = value => {
return value.trim().split(/\n{2,}/)
}
const importRegister = ContentState => {
ContentState.prototype.getStateFragment = function (markdown) {
// mock a root block...
const rootState = {
key: null,
type: 'root',
text: '',
parent: null,
preSibling: null,
nextSibling: null,
children: []
}
const htmlText = marked(markdown, { disableInline: true })
const domAst = parse5.parseFragment(htmlText)
// console.log(markdown)
// console.log(htmlText)
// console.log(domAst)
const childNodes = domAst.childNodes
const getLang = node => {
let lang = ''
if (node.nodeName === 'code') {
const classAttr = node.attrs.filter(attr => attr.name === 'class')[0]
if (classAttr && /^lang-/.test(classAttr.value)) {
lang = classAttr.value.split('-')[1]
}
}
return lang
}
const getRowColumnCount = childNodes => {
const THEAD_ROW_COUNT = 1
const tbodyNode = childNodes.find(child => child.nodeName === 'tbody')
const row = tbodyNode.childNodes.filter(child => child.nodeName === 'tr').length + THEAD_ROW_COUNT - 1
const column = tbodyNode.childNodes
.find(child => child.nodeName === 'tr').childNodes
.filter(td => td.nodeName === 'td')
.length - 1
return { row, column } // zero base
}
const travel = (parent, childNodes) => {
for (const child of childNodes) {
let block
let value
switch (child.nodeName) {
case 'th':
case 'td':
case 'h1':
case 'h2':
case 'h3':
case 'h4':
case 'h5':
case 'h6':
const textValue = child.childNodes.length ? child.childNodes[0].value : ''
const match = /\d/.exec(child.nodeName)
value = match ? '#'.repeat(+match[0]) + ` ${textValue}` : textValue
block = this.createBlock(child.nodeName, value)
// handle `th` and `td`
if (child.nodeName === 'th' || child.nodeName === 'td') {
const column = childNodes.filter(child => /th|td/.test(child.nodeName)).indexOf(child)
let align = ''
const styleAttr = child.attrs.filter(attr => attr.name === 'style')
if (styleAttr.length) {
const styleValue = styleAttr[0].value
if (/text-align/.test(styleValue)) {
align = styleValue.split(':')[1]
}
}
Object.assign(block, { column, align })
}
this.appendChild(parent, block)
break
case 'p':
value = child.childNodes.length ? child.childNodes[0].value : ''
if (checkIsHTML(value)) {
travel(parent, child.childNodes)
} else {
block = this.createBlock('p')
travel(block, child.childNodes)
this.appendChild(parent, block)
}
break
case 'table':
const toolBar = this.createToolBar(TABLE_TOOLS, 'table')
const table = this.createBlock('table')
Object.assign(table, getRowColumnCount(child.childNodes)) // set row and column
block = this.createBlock('figure')
block.functionType = 'table'
this.appendChild(block, toolBar)
this.appendChild(block, table)
this.appendChild(parent, block)
travel(table, child.childNodes)
break
case 'tr':
case 'tbody':
case 'thead':
block = this.createBlock(child.nodeName)
this.appendChild(parent, block)
travel(block, child.childNodes)
break
case 'hr':
const initValue = '---'
block = this.createBlock(child.nodeName, initValue)
this.appendChild(parent, block)
break
case 'input':
const isTaskListItemCheckbox = child.attrs.some(attr => attr.name === 'class' && attr.value === 'task-list-item-checkbox')
const checked = child.attrs.some(attr => attr.name === 'checked' && attr.value === '')
if (isTaskListItemCheckbox) {
parent.listItemType = 'task' // double check
block = this.createBlock('input')
block.checked = checked
this.appendChild(parent, block)
}
break
case 'li':
const isTask = child.attrs.some(attr => attr.name === 'class' && attr.value.includes('task-list-item'))
const isLoose = child.attrs.some(attr => attr.name === 'class' && attr.value.includes(CLASS_OR_ID['AG_LOOSE_LIST_ITEM']))
block = this.createBlock('li')
block.listItemType = parent.nodeName === 'ul' ? (isTask ? 'task' : 'bullet') : 'order'
block.isLooseListItem = isLoose
this.appendChild(parent, block)
travel(block, child.childNodes)
break
case 'ul':
const isTaskList = child.attrs.some(attr => attr.name === 'class' && attr.value === 'task-list')
block = this.createBlock('ul')
block.listType = isTaskList ? 'task' : 'bullet'
travel(block, child.childNodes)
this.appendChild(parent, block)
break
case 'ol':
block = this.createBlock('ol')
block.listType = 'order'
child.attrs.forEach(attr => {
block[attr.name] = attr.value
})
if (!block.start) {
block.start = 1
}
travel(block, child.childNodes)
this.appendChild(parent, block)
break
case 'blockquote':
block = this.createBlock('blockquote')
travel(block, child.childNodes)
this.appendChild(parent, block)
break
case 'pre':
const codeNode = child.childNodes[0]
value = codeNode.childNodes[0].value
if (value.endsWith('\n')) {
value = value.replace(/\n+$/, '')
}
block = this.createBlock('pre', value)
block.functionType = 'code'
block.lang = getLang(codeNode)
this.appendChild(parent, block)
break
case 'script':
const code = child.childNodes.length ? child.childNodes[0].value : ''
const fullCode = `<script>${code}</script>`
block = this.createHtmlBlock(fullCode)
this.appendChild(parent, block)
break
case '#text':
const { parentNode } = child
value = child.value
if (/\S/.test(value)) {
if (checkIsHTML(value) && /^(#document-fragment|pre|p)$/.test(parentNode.nodeName)) {
const fragments = chopHTML(value)
fragments.forEach(fragment => {
if (checkIsHTML(fragment)) {
// is html block
block = this.createHtmlBlock(fragment)
this.appendChild(parent, block)
} else {
// not html block
block = this.createBlockP(fragment)
this.appendChild(parent, block)
}
})
} else if (parentNode.nodeName === 'li') {
block = this.createBlock('p')
// fix: #153
const lines = value.replace(/^\s+/, '').split(LINE_BREAKS_REG).map(line => this.createBlock('span', line))
for (const line of lines) {
this.appendChild(block, line)
}
this.appendChild(parent, block)
} else if (parentNode.nodeName === 'p') {
const lines = value.split(LINE_BREAKS_REG).map(line => this.createBlock('span', line))
for (const line of lines) {
this.appendChild(parent, line)
}
}
}
break
default:
if (child.tagName) {
throw new Error(`unHandle node type ${child.tagName}`)
}
break
}
}
}
travel(rootState, childNodes)
return rootState.children.length ? rootState.children : [this.createBlockP()]
}
// transform `paste's text/html data` to content state blocks.
ContentState.prototype.html2State = function (html) {
// remove double `\\` in Math but I dont know why there are two '\' when paste. @jocs
const markdown = turndownService.turndown(html).replace(/(\\)\\/g, '$1')
return this.getStateFragment(markdown)
}
ContentState.prototype.addCursorToMarkdown = function (markdown, cursor) {
const { ch, line } = cursor
const lines = markdown.split('\n')
const rawText = lines[line]
lines[line] = rawText.substring(0, ch) + CURSOR_DNA + rawText.substring(ch)
return lines.join('\n')
}
ContentState.prototype.getCodeMirrorCursor = function () {
const blocks = this.getBlocks()
const { start: { key, offset } } = this.cursor
const block = this.getBlock(key)
const { text } = block
block.text = text.substring(0, offset) + CURSOR_DNA + text.substring(offset)
const markdown = new ExportMarkdown(blocks).generate()
const cursor = markdown.split('\n').reduce((acc, line, index) => {
const ch = line.indexOf(CURSOR_DNA)
if (ch > -1) {
Object.assign(acc, { line: index, ch })
}
return acc
}, {
line: 0,
ch: 0
})
// remove CURSOR_DNA
block.text = text
return cursor
}
ContentState.prototype.importCursor = function (cursor) {
// set cursor
if (cursor) {
const blocks = this.getArrayBlocks()
for (const block of blocks) {
const { text, key } = block
if (text) {
const offset = block.text.indexOf(CURSOR_DNA)
if (offset > -1) {
// remove the CURSOR_DNA in the block text
block.text = text.substring(0, offset) + text.substring(offset + CURSOR_DNA.length)
this.cursor = {
start: { key, offset },
end: { key, offset }
}
}
}
}
} else {
const lastBlock = this.getLastBlock()
const key = lastBlock.key
const offset = lastBlock.text.length
this.cursor = {
start: { key, offset },
end: { key, offset }
}
}
}
ContentState.prototype.importMarkdown = function (markdown) {
// empty the blocks and codeBlocks
this.keys = new Set()
this.codeBlocks = new Map()
this.blocks = this.getStateFragment(markdown)
}
}
export default importRegister