marktext/src/renderer/spellchecker/index.js

536 lines
15 KiB
JavaScript

import fs from 'fs'
import path from 'path'
import os from 'os'
import { SpellCheckHandler, fallbackLocales, normalizeLanguageCode } from '@hfelix/electron-spellchecker'
import { isDirectory, isFile } from 'common/filesystem'
import { cloneObj, isOsx, isLinux, isWindows } from '@/util'
// NOTE: Hardcoded in "@hfelix/electron-spellchecker/src/spell-check-handler.js"
export const getDictionaryPath = () => {
const { userDataPath } = global.marktext.paths
return path.join(userDataPath, 'dictionaries')
}
// Source: https://github.com/Microsoft/vscode/blob/master/src/vs/editor/common/model/wordHelper.ts
// /(-?\d*\.\d\w*)|([^\`\~\!\@\#\$\%\^\&\*\(\)\-\=\+\[\{\]\}\\\|\;\:\'\"\,\.\<\>\/\?\s]+)/
/* eslint-disable no-useless-escape */
const WORD_SEPARATORS = /(?:[`~!@#$%^&*()-=+[{\]}\\|;:'",\.<>\/?\s])/g
const WORD_DEFINITION = /(?:-?\d*\.\d\w*)|(?:[^`~!@#$%^&*()-=+[{\]}\\|;:'",\.<>\/?\s]+)/g
/* eslint-enable no-useless-escape */
/**
* Translate a left and right offset from a word in `line` into a cursor with
* the given line cursor.
*
* @param {*} lineCursor The original line cursor.
* @param {number} left Start offset/index of word in `lineCursor`.
* @param {number} right End offset/index of word in `lineCursor`.
* @returns {*} Return a cursor of the word selected in `lineCursor`(e.g.
* "foo >bar< foo" where `>`/`<` start and end offset).
*/
export const offsetToWordCursor = (lineCursor, left, right) => {
// Deep clone cursor start and end
const start = cloneObj(lineCursor.start, true)
const end = cloneObj(lineCursor.end, true)
start.offset = left
end.offset = right
return { start, end }
}
/**
* Validate whether the selection is valid for spelling correction.
*
* @param {*} selection The preview editor selection range.
*/
export const validateLineCursor = selection => {
// Validate selection range.
if (!selection && !selection.start && !selection.start.hasOwnProperty('offset') &&
!selection.end && !selection.end.hasOwnProperty('offset')) {
return false
}
// Allow only single lines
const { start: startCursor, end: endCursor } = selection
if (startCursor.key !== endCursor.key || !startCursor.block) {
return false
}
// Don't correct words in code blocks or editors for HTML, LaTex and diagrams.
if (startCursor.block.functionType === 'codeContent' &&
startCursor.block.lang !== undefined) {
return false
}
// Don't correct words in code blocks or pre elements such as language identifier.
if (selection.affiliation && selection.affiliation.length === 1 &&
selection.affiliation[0].type === 'pre') {
return false
}
return true
}
/**
* Returns a list of local available Hunspell dictionaries.
*
* @returns {string[]} List of available Hunspell dictionary language codes.
*/
export const getAvailableHunspellDictionaries = () => {
const dictionaryPath = getDictionaryPath()
const dict = []
// Search for dictionaries on filesystem.
if (isDirectory(dictionaryPath)) {
fs.readdirSync(dictionaryPath).forEach(filename => {
const fullname = path.join(dictionaryPath, filename)
const match = filename.match(/^([a-z]{2}(?:[-][A-Z]{2})?)\.bdic$/)
if (match && match[1] && isFile(fullname)) {
dict.push(match[1])
}
})
}
return dict
}
export const isOsSpellcheckerSupported = () => {
let envOverwrite = !!process.env['SPELLCHECKER_PREFER_HUNSPELL'] // eslint-disable-line dot-notation
if (isLinux || envOverwrite) {
return false
} else if (isOsx) {
return true
} else if (isWindows) {
// NOTE: Normally we need to initialize the spellchecker and check the result.
const windowsVersion = os.release().match(/^(\d+)\./)
if (windowsVersion && windowsVersion[1]) {
const windowsMajor = Number(windowsVersion[1])
if (windowsMajor >= 10) {
return true
}
}
}
return false
}
/**
* High level spell checker API.
*
* Language providers:
* - macOS: NSSpellChecker (default) or Hunspell
* - Linux and Windows: Hunspell
*/
export class SpellChecker {
/**
* ctor
*
* @param {boolean} enabled Whether spell checking is enabled.
*/
constructor (enabled = true) {
// Hunspell is used on Linux and Windows but macOS can use Hunspell if preferred.
this.isHunspell = !isOsSpellcheckerSupported() || !!process.env['SPELLCHECKER_PREFER_HUNSPELL'] // eslint-disable-line dot-notation
// Initialize spell check provider. If spell check is not enabled don't
// initialize the handler to not load the native module.
if (enabled) {
this._initHandler()
} else {
this.provider = null
this.fallbackLang = null
this.isEnabled = false
this.isInitialized = false
}
}
_initHandler () {
if (this.isInitialized) {
throw new Error('Invalid state.')
}
this.provider = new SpellCheckHandler(getDictionaryPath())
this.isHunspell = this.provider.isHunspell
// The spell checker is now initialized but not yet enabled. You need to call `init`.
this.isEnabled = false
this.isInitialized = true
}
/**
* Initialize the spell checker and attach it to the window.
*
* @param {string} lang 4-letter language ISO-code.
* @param {boolean} automaticallyIdentifyLanguages Whether we should try to identify the typed language.
* @param {boolean} isPassiveMode Should we highlight misspelled words?
* @param {[HTMLElement]} container The optional container to attach the automatic spell detection when
* using Hunspell. Default `document.body`.
* @returns {string} Returns current spell checker language.
*/
async init (lang = '', automaticallyIdentifyLanguages = false, isPassiveMode = false, container = null) {
if (this.isEnabled) {
return
} else if (!this.isInitialized) {
this._initHandler()
}
if (!lang && !automaticallyIdentifyLanguages) {
throw new Error('Init: Either language or automatic language detection must be set.')
}
// TODO(spell): Language detection is currently unavailable when another
// spell checker than the macOS spell checker is used because Node worker
// threads doesn't work in Electron (Electon#18540).
if (this.isHunspell || !isOsx) {
automaticallyIdentifyLanguages = false
}
// This just set a variable when using Hunspell and switch the spell checker mode
// when using macOS spell checker. Calling switchLanguage after this using macOS
// spell checker will deactivate automatic language detection.
this.provider.automaticallyIdentifyLanguages =
automaticallyIdentifyLanguages || (!this.isHunspell && !lang)
// If true, don't highlight misspelled words. Just like above, this method only
// affect the macOS spell checker.
this.provider.isPassiveMode = isPassiveMode
if (!this.isHunspell && (automaticallyIdentifyLanguages || !lang)) {
// Attach the spell checker to the our editor.
// NOTE: Calling this method is normally not necessary on macOS with
// OS spell checker.
this.provider.attachToInput(container)
this.fallbackLang = null
this.isEnabled = true
return this.lang
}
if (!lang) {
// Set to Hunspell fallback language.
lang = 'en-US'
}
// We have to call our switch language method to ensure that the provider is in a valid state.
const currentLang = await this._switchLanguage(lang)
if (!currentLang) {
throw new Error(`Language "${lang}" is not available.`)
}
// Attach the spell checker to the our editor.
this.provider.attachToInput(container)
this.fallbackLang = currentLang
this.isEnabled = true
return currentLang
}
/**
* Enable spell checker.
*
* NOTE: Using `undefined` will use the existing values.
* NOTE: When spell checker is already enabled this method has no effect.
*
* @param {[string]} lang 4-letter language ISO-code.
* @param {[boolean]} automaticallyIdentifyLanguages Whether we should try to identify the typed language.
* @param {[boolean]} isPassiveMode Should we highlight misspelled words?
*/
async enableSpellchecker (lang = undefined, automaticallyIdentifyLanguages = undefined, isPassiveMode = undefined) {
if (this.isEnabled) {
return true
}
const result = await this.provider.enableSpellchecker(
lang,
automaticallyIdentifyLanguages,
isPassiveMode
)
if (!result) {
// Spell checker may be in an invalid state and don't try to recover.
this.disableSpellchecker()
return false
}
this.fallbackLang = this.lang
this.isEnabled = true
return true
}
/**
* Disable spell checker.
*/
disableSpellchecker () {
if (!this.isEnabled) {
return
}
this.provider.disableSpellchecker()
this.isEnabled = false
}
/**
* Add a word to the user dictionary.
*
* @param {string} word The word to add.
*/
async addToDictionary (word) {
return await this.provider.addToDictionary(word)
}
/**
* Remove a word frome the user dictionary.
*
* @param {string} word The word to remove.
*/
async removeFromDictionary (word) {
return await this.provider.removeFromDictionary(word)
}
/**
* Ignore a word for the current runtime.
*
* @param {string} word The word to ignore.
*/
ignoreWord (word) {
this.provider.ignoreWord(word)
}
/**
* Returns a list of available dictionaries.
* @returns {string[]} Available dictionary languages.
*/
getAvailableDictionaries () {
// NOTE: We only receive the dictionaries when the spellchecker is active
// on macOS! Therefore be consistent.
if (!this.provider.currentSpellchecker) {
return []
}
if (!this.isHunspell) {
// NOTE: OS X will return lists that are half just a language, half
// language + locale, like ['en', 'pt_BR', 'ko'] and Windows also returns
// BCP-47 ones.
return this.provider.currentSpellchecker.getAvailableDictionaries()
.map(x => {
if (x.length === 2) return fallbackLocales[x]
try {
return normalizeLanguageCode(x)
} catch (_) {
return null
}
})
.filter(x => { return !!x })
}
// Load hunspell dictionaries from disk.
return getAvailableHunspellDictionaries()
}
/**
* Is the spellchecker trying to detect the typed language automatically?
*/
get automaticallyIdentifyLanguages () {
if (!this.isEnabled) {
return false
}
return this.provider.automaticallyIdentifyLanguages
}
/**
* Is the spellchecker trying to detect the typed language automatically?
*/
set automaticallyIdentifyLanguages (value) {
if (!this.isEnabled) {
return
}
// TODO(spell): Language detection is currently unavailable when another
// spell checker than the macOS spell checker is used because Node worker
// threads doesn't work in Electron (Electon#18540).
if (this.isHunspell || !isOsx) {
value = false
}
this.provider.automaticallyIdentifyLanguages = !!value
}
/**
* Returns true if not misspelled words should be highlighted.
*/
get isPassiveMode () {
if (!this.isEnabled) {
return false
}
return this.provider.isPassiveMode
}
/**
* Should we highlight misspelled words.
*/
set isPassiveMode (value) {
if (!this.isEnabled) {
return
}
this.provider.isPassiveMode = !!value
}
/**
* Return the current language.
*/
get lang () {
if (!this.provider) {
return ''
}
return this.provider.currentSpellcheckerLanguage
}
/**
* Whether the spell checker is in an invalid state and therefore deactivated.
*/
get isInvalidState () {
if (!this.provider) {
return false
}
return this.provider.invalidState
}
/**
* Explicitly switch the language to a specific language.
*
* NOTE: This function can throw an exception.
*
* @param {string} lang The language code
* @returns {string|null} Return the language on success or null.
*/
async switchLanguage (lang) {
if (!this.isEnabled) {
throw new Error('Invalid state: spell checker is disabled.')
} else if (!lang) {
throw new Error('Invalid language.')
}
const currentLang = await this._switchLanguage(lang)
if (currentLang) {
this.fallbackLang = currentLang
}
return currentLang
}
/**
* Is the given word misspelled.
*
* @param {string} word The word to check.
*/
isMisspelled (word) {
if (!this.isEnabled) {
return false
}
return this.provider.isMisspelled(word)
}
/**
* Get corrections.
*
* @param {string} word The word to get suggestion for.
* @returns {string[]} A array of suggestions.
*/
async getWordSuggestion (word) {
if (!this.isMisspelled(word)) {
return []
}
return await this.provider.getCorrectionsForMisspelling(word)
}
/**
* Extract the word at the given offset from the text.
*
* @param {string} text Text
* @param {number} offset Normalized cursor offset (e.g. ab<cursor>c def --> 2)
*/
static extractWord (text, offset) {
if (!text || text.length === 0) {
return null
} else if (offset < 0) {
offset = 0
} else if (offset >= text.length) {
offset = text.length - 1
}
// Matches all words starting at a good position.
WORD_DEFINITION.lastIndex = text.lastIndexOf(' ', offset - 1) + 1
let match = null
let left = -1
while (match = WORD_DEFINITION.exec(text)) { // eslint-disable-line
if (match && match.index <= offset) {
if (WORD_DEFINITION.lastIndex > offset) {
left = match.index
}
} else {
break
}
}
WORD_DEFINITION.lastIndex = 0
// Cursor is between two word separators (e.g "*<cursor>*" or " <cursor>*")
if (left <= -1) {
return null
}
// Find word ending.
WORD_SEPARATORS.lastIndex = offset
match = WORD_SEPARATORS.exec(text)
let right = -1
if (match) {
right = match.index
}
WORD_SEPARATORS.lastIndex = 0
// The last word in the string is a special case.
if (right < 0) {
return {
left,
right: text.length,
word: text.slice(left)
}
}
return {
left,
right: right,
word: text.slice(left, right)
}
}
/**
* @private
* @param {string} lang The language code
* @returns {string|null} Return the language on success or null.
*/
async _switchLanguage (lang) {
const result = await this.provider.switchLanguage(lang)
if (!result) {
return await this._tryRecover()
}
return this.lang
}
/**
* Try to recover the spell checker's invalid state.
*
* @returns {string|null} Return the language on success or null.
*/
async _tryRecover () {
const lang = this.fallbackLang
if (lang) {
// Prevent rekursiv loop.
this.fallbackLang = null
// Try fallback language.
const result = await this._switchLanguage(lang)
if (result) {
this.fallbackLang = lang
return lang
}
// Spell checker is deactivated from rekursiv call.
return null
}
// Spell checker is in an invalid state. We can recover it by enabling
// with a valid language.
this.disableSpellchecker()
return null
}
}