From d9813aa1841ec364ba289c32e8ffaaef74186a63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81bel=20Ny=C3=A9ki?= Date: Mon, 3 Aug 2020 15:48:33 +0200 Subject: [PATCH] smarter spellcheck loading --- debug/simplemde.js | 386 ++++++++++++++++++++++----------------------- 1 file changed, 193 insertions(+), 193 deletions(-) diff --git a/debug/simplemde.js b/debug/simplemde.js index b753bae..a95c83c 100644 --- a/debug/simplemde.js +++ b/debug/simplemde.js @@ -1866,6 +1866,16 @@ function CodeMirrorSpellChecker(options) { // Define the new mode options.codeMirrorInstance.defineMode("spell-checker", function(config) { // Load AFF/DIC data + var aff_loaded, dic_loaded; + + var loadSpellChecker = function() { + if(aff_loaded && dic_loaded) { + CodeMirrorSpellChecker.typo = new Typo("en_US", CodeMirrorSpellChecker.aff_data, CodeMirrorSpellChecker.dic_data, { + platform: "any" + }); + } + }; + if(!CodeMirrorSpellChecker.aff_loading) { CodeMirrorSpellChecker.aff_loading = true; var xhr_aff = new XMLHttpRequest(); @@ -1873,13 +1883,8 @@ function CodeMirrorSpellChecker(options) { xhr_aff.onload = function() { if(xhr_aff.readyState === 4 && xhr_aff.status === 200) { CodeMirrorSpellChecker.aff_data = xhr_aff.responseText; - CodeMirrorSpellChecker.num_loaded++; - - if(CodeMirrorSpellChecker.num_loaded == 2) { - CodeMirrorSpellChecker.typo = new Typo("en_US", CodeMirrorSpellChecker.aff_data, CodeMirrorSpellChecker.dic_data, { - platform: "any" - }); - } + aff_loaded = true; + loadSpellChecker(); } }; xhr_aff.send(null); @@ -1892,13 +1897,8 @@ function CodeMirrorSpellChecker(options) { xhr_dic.onload = function() { if(xhr_dic.readyState === 4 && xhr_dic.status === 200) { CodeMirrorSpellChecker.dic_data = xhr_dic.responseText; - CodeMirrorSpellChecker.num_loaded++; - - if(CodeMirrorSpellChecker.num_loaded == 2) { - CodeMirrorSpellChecker.typo = new Typo("en_US", CodeMirrorSpellChecker.aff_data, CodeMirrorSpellChecker.dic_data, { - platform: "any" - }); - } + aff_loaded = true; + loadSpellChecker(); } }; xhr_dic.send(null); @@ -14153,7 +14153,7 @@ if (typeof module !== 'undefined' && typeof exports === 'object') { 'use strict'; /** - * Typo is a JavaScript implementation of a spellchecker using hunspell-style + * Typo is a JavaScript implementation of a spellchecker using hunspell-style * dictionaries. */ @@ -14185,22 +14185,22 @@ if (typeof module !== 'undefined' && typeof exports === 'object') { var Typo = function (dictionary, affData, wordsData, settings) { settings = settings || {}; - + this.dictionary = null; - + this.rules = {}; this.dictionaryTable = {}; - + this.compoundRules = []; this.compoundRuleCodes = {}; - + this.replacementTable = []; - - this.flags = settings.flags || {}; - + + this.flags = settings.flags || {}; + if (dictionary) { this.dictionary = dictionary; - + if (typeof window !== 'undefined' && 'chrome' in window && 'extension' in window.chrome && 'getURL' in window.chrome.extension) { if (!affData) affData = this._readFile(chrome.extension.getURL("lib/typo/dictionaries/" + dictionary + "/" + dictionary + ".aff")); if (!wordsData) wordsData = this._readFile(chrome.extension.getURL("lib/typo/dictionaries/" + dictionary + "/" + dictionary + ".dic")); @@ -14214,51 +14214,51 @@ var Typo = function (dictionary, affData, wordsData, settings) { else { var path = './dictionaries'; } - + if (!affData) affData = this._readFile(path + "/" + dictionary + "/" + dictionary + ".aff"); if (!wordsData) wordsData = this._readFile(path + "/" + dictionary + "/" + dictionary + ".dic"); } - + this.rules = this._parseAFF(affData); - + // Save the rule codes that are used in compound rules. this.compoundRuleCodes = {}; - + for (var i = 0, _len = this.compoundRules.length; i < _len; i++) { var rule = this.compoundRules[i]; - + for (var j = 0, _jlen = rule.length; j < _jlen; j++) { this.compoundRuleCodes[rule[j]] = []; } } - + // If we add this ONLYINCOMPOUND flag to this.compoundRuleCodes, then _parseDIC // will do the work of saving the list of words that are compound-only. if ("ONLYINCOMPOUND" in this.flags) { this.compoundRuleCodes[this.flags.ONLYINCOMPOUND] = []; } - + this.dictionaryTable = this._parseDIC(wordsData); - - // Get rid of any codes from the compound rule codes that are never used - // (or that were special regex characters). Not especially necessary... + + // Get rid of any codes from the compound rule codes that are never used + // (or that were special regex characters). Not especially necessary... for (var i in this.compoundRuleCodes) { if (this.compoundRuleCodes[i].length == 0) { delete this.compoundRuleCodes[i]; } } - + // Build the full regular expressions for each compound rule. - // I have a feeling (but no confirmation yet) that this method of + // I have a feeling (but no confirmation yet) that this method of // testing for compound words is probably slow. for (var i = 0, _len = this.compoundRules.length; i < _len; i++) { var ruleText = this.compoundRules[i]; - + var expressionText = ""; - + for (var j = 0, _jlen = ruleText.length; j < _jlen; j++) { var character = ruleText[j]; - + if (character in this.compoundRuleCodes) { expressionText += "(" + this.compoundRuleCodes[character].join("|") + ")"; } @@ -14266,11 +14266,11 @@ var Typo = function (dictionary, affData, wordsData, settings) { expressionText += character; } } - + this.compoundRules[i] = new RegExp(expressionText, "i"); } } - + return this; }; @@ -14280,51 +14280,51 @@ Typo.prototype = { * * @param object obj A hash of Typo properties, probably gotten from a JSON.parse(JSON.stringify(typo_instance)). */ - + load : function (obj) { for (var i in obj) { this[i] = obj[i]; } - + return this; }, - + /** * Read the contents of a file. - * + * * @param {String} path The path (relative) to the file. * @param {String} [charset="ISO8859-1"] The expected charset of the file * @returns string The file data. */ - + _readFile : function (path, charset) { if (!charset) charset = "utf8"; - + if (typeof XMLHttpRequest !== 'undefined') { var req = new XMLHttpRequest(); req.open("GET", path, false); - + if (req.overrideMimeType) req.overrideMimeType("text/plain; charset=" + charset); - + req.send(null); - + return req.responseText; } else if (typeof require !== 'undefined') { // Node.js var fs = require("fs"); - + try { if (fs.existsSync(path)) { var stats = fs.statSync(path); - + var fileDescriptor = fs.openSync(path, 'r'); - + var buffer = new Buffer(stats.size); - + fs.readSync(fileDescriptor, buffer, 0, buffer.length, null); - + return buffer.toString(charset, 0, buffer.length); } else { @@ -14336,56 +14336,56 @@ Typo.prototype = { } } }, - + /** * Parse the rules out from a .aff file. * * @param {String} data The contents of the affix file. * @returns object The rules from the file. */ - + _parseAFF : function (data) { var rules = {}; - + // Remove comment lines data = this._removeAffixComments(data); - + var lines = data.split("\n"); - + for (var i = 0, _len = lines.length; i < _len; i++) { var line = lines[i]; - + var definitionParts = line.split(/\s+/); - + var ruleType = definitionParts[0]; - + if (ruleType == "PFX" || ruleType == "SFX") { var ruleCode = definitionParts[1]; var combineable = definitionParts[2]; var numEntries = parseInt(definitionParts[3], 10); - + var entries = []; - + for (var j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) { var line = lines[j]; - + var lineParts = line.split(/\s+/); var charactersToRemove = lineParts[2]; - + var additionParts = lineParts[3].split("/"); - + var charactersToAdd = additionParts[0]; if (charactersToAdd === "0") charactersToAdd = ""; - + var continuationClasses = this.parseRuleCodes(additionParts[1]); - + var regexToMatch = lineParts[4]; - + var entry = {}; entry.add = charactersToAdd; - + if (continuationClasses.length > 0) entry.continuationClasses = continuationClasses; - + if (regexToMatch !== ".") { if (ruleType === "SFX") { entry.match = new RegExp(regexToMatch + "$"); @@ -14394,7 +14394,7 @@ Typo.prototype = { entry.match = new RegExp("^" + regexToMatch); } } - + if (charactersToRemove != "0") { if (ruleType === "SFX") { entry.remove = new RegExp(charactersToRemove + "$"); @@ -14403,29 +14403,29 @@ Typo.prototype = { entry.remove = charactersToRemove; } } - + entries.push(entry); } - + rules[ruleCode] = { "type" : ruleType, "combineable" : (combineable == "Y"), "entries" : entries }; - + i += numEntries; } else if (ruleType === "COMPOUNDRULE") { var numEntries = parseInt(definitionParts[1], 10); - + for (var j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) { var line = lines[j]; - + var lineParts = line.split(/\s+/); this.compoundRules.push(lineParts[1]); } - + i += numEntries; } else if (ruleType === "REP") { var lineParts = line.split(/\s+/); - + if (lineParts.length === 3) { this.replacementTable.push([ lineParts[1], lineParts[2] ]); } @@ -14436,37 +14436,37 @@ Typo.prototype = { // FLAG // KEEPCASE // NEEDAFFIX - + this.flags[ruleType] = definitionParts[1]; } } - + return rules; }, - + /** * Removes comment lines and then cleans up blank lines and trailing whitespace. * * @param {String} data The data from an affix file. * @return {String} The cleaned-up data. */ - + _removeAffixComments : function (data) { // Remove comments data = data.replace(/#.*$/mg, ""); - + // Trim each line data = data.replace(/^\s\s*/m, '').replace(/\s\s*$/m, ''); - + // Remove blank lines. data = data.replace(/\n{2,}/g, "\n"); - + // Trim the entire string data = data.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); - + return data; }, - + /** * Parses the words out from the .dic file. * @@ -14474,62 +14474,62 @@ Typo.prototype = { * @returns object The lookup table containing all of the words and * word forms from the dictionary. */ - + _parseDIC : function (data) { data = this._removeDicComments(data); - + var lines = data.split("\n"); var dictionaryTable = {}; - + function addWord(word, rules) { // Some dictionaries will list the same word multiple times with different rule sets. if (!(word in dictionaryTable) || typeof dictionaryTable[word] != 'object') { dictionaryTable[word] = []; } - + dictionaryTable[word].push(rules); } - + // The first line is the number of words in the dictionary. for (var i = 1, _len = lines.length; i < _len; i++) { var line = lines[i]; - + var parts = line.split("/", 2); - + var word = parts[0]; // Now for each affix rule, generate that form of the word. if (parts.length > 1) { var ruleCodesArray = this.parseRuleCodes(parts[1]); - + // Save the ruleCodes for compound word situations. if (!("NEEDAFFIX" in this.flags) || ruleCodesArray.indexOf(this.flags.NEEDAFFIX) == -1) { addWord(word, ruleCodesArray); } - + for (var j = 0, _jlen = ruleCodesArray.length; j < _jlen; j++) { var code = ruleCodesArray[j]; - + var rule = this.rules[code]; - + if (rule) { var newWords = this._applyRule(word, rule); - + for (var ii = 0, _iilen = newWords.length; ii < _iilen; ii++) { var newWord = newWords[ii]; - + addWord(newWord, []); - + if (rule.combineable) { for (var k = j + 1; k < _jlen; k++) { var combineCode = ruleCodesArray[k]; - + var combineRule = this.rules[combineCode]; - + if (combineRule) { if (combineRule.combineable && (rule.type != combineRule.type)) { var otherNewWords = this._applyRule(newWord, combineRule); - + for (var iii = 0, _iiilen = otherNewWords.length; iii < _iiilen; iii++) { var otherNewWord = otherNewWords[iii]; addWord(otherNewWord, []); @@ -14540,7 +14540,7 @@ Typo.prototype = { } } } - + if (code in this.compoundRuleCodes) { this.compoundRuleCodes[code].push(word); } @@ -14550,28 +14550,28 @@ Typo.prototype = { addWord(word.trim(), []); } } - + return dictionaryTable; }, - - + + /** * Removes comment lines and then cleans up blank lines and trailing whitespace. * * @param {String} data The data from a .dic file. * @return {String} The cleaned-up data. */ - + _removeDicComments : function (data) { // I can't find any official documentation on it, but at least the de_DE // dictionary uses tab-indented lines as comments. - + // Remove comments data = data.replace(/^\t.*$/mg, ""); - + return data; }, - + parseRuleCodes : function (textCodes) { if (!textCodes) { return []; @@ -14581,18 +14581,18 @@ Typo.prototype = { } else if (this.flags.FLAG === "long") { var flags = []; - + for (var i = 0, _len = textCodes.length; i < _len; i += 2) { flags.push(textCodes.substr(i, 2)); } - + return flags; } else if (this.flags.FLAG === "num") { return textCode.split(","); } }, - + /** * Applies an affix rule to a word. * @@ -14600,41 +14600,41 @@ Typo.prototype = { * @param {Object} rule The affix rule. * @returns {String[]} The new words generated by the rule. */ - + _applyRule : function (word, rule) { var entries = rule.entries; var newWords = []; - + for (var i = 0, _len = entries.length; i < _len; i++) { var entry = entries[i]; - + if (!entry.match || word.match(entry.match)) { var newWord = word; - + if (entry.remove) { newWord = newWord.replace(entry.remove, ""); } - + if (rule.type === "SFX") { newWord = newWord + entry.add; } else { newWord = entry.add + newWord; } - + newWords.push(newWord); - + if ("continuationClasses" in entry) { for (var j = 0, _jlen = entry.continuationClasses.length; j < _jlen; j++) { var continuationRule = this.rules[entry.continuationClasses[j]]; - + if (continuationRule) { newWords = newWords.concat(this._applyRule(newWord, continuationRule)); } /* else { // This shouldn't happen, but it does, at least in the de_DE dictionary. - // I think the author mistakenly supplied lower-case rule codes instead + // I think the author mistakenly supplied lower-case rule codes instead // of upper-case. } */ @@ -14642,10 +14642,10 @@ Typo.prototype = { } } } - + return newWords; }, - + /** * Checks whether a word or a capitalization variant exists in the current dictionary. * The word is trimmed and several variations of capitalizations are checked. @@ -14656,58 +14656,58 @@ Typo.prototype = { * @param {String} aWord The word to check. * @returns {Boolean} */ - + check : function (aWord) { // Remove leading and trailing whitespace var trimmedWord = aWord.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); - + if (this.checkExact(trimmedWord)) { return true; } - + // The exact word is not in the dictionary. if (trimmedWord.toUpperCase() === trimmedWord) { // The word was supplied in all uppercase. // Check for a capitalized form of the word. var capitalizedWord = trimmedWord[0] + trimmedWord.substring(1).toLowerCase(); - + if (this.hasFlag(capitalizedWord, "KEEPCASE")) { // Capitalization variants are not allowed for this word. return false; } - + if (this.checkExact(capitalizedWord)) { return true; } } - + var lowercaseWord = trimmedWord.toLowerCase(); - + if (lowercaseWord !== trimmedWord) { if (this.hasFlag(lowercaseWord, "KEEPCASE")) { // Capitalization variants are not allowed for this word. return false; } - + // Check for a lowercase form if (this.checkExact(lowercaseWord)) { return true; } } - + return false; }, - + /** * Checks whether a word exists in the current dictionary. * * @param {String} word The word to check. * @returns {Boolean} */ - + checkExact : function (word) { var ruleCodes = this.dictionaryTable[word]; - + if (typeof ruleCodes === 'undefined') { // Check if this might be a compound word. if ("COMPOUNDMIN" in this.flags && word.length >= this.flags.COMPOUNDMIN) { @@ -14717,7 +14717,7 @@ Typo.prototype = { } } } - + return false; } else if (typeof ruleCodes === 'object') { // this.dictionary['hasOwnProperty'] will be a function. @@ -14726,11 +14726,11 @@ Typo.prototype = { return true; } } - + return false; } }, - + /** * Looks up whether a given word is flagged with a given flag. * @@ -14738,21 +14738,21 @@ Typo.prototype = { * @param {String} flag The flag in question. * @return {Boolean} */ - + hasFlag : function (word, flag, wordFlags) { if (flag in this.flags) { if (typeof wordFlags === 'undefined') { var wordFlags = Array.prototype.concat.apply([], this.dictionaryTable[word]); } - + if (wordFlags && wordFlags.indexOf(this.flags[flag]) !== -1) { return true; } } - + return false; }, - + /** * Returns a list of suggestions for a misspelled word. * @@ -14763,138 +14763,138 @@ Typo.prototype = { * @param {Number} [limit=5] The maximum number of suggestions to return. * @returns {String[]} The array of suggestions. */ - + alphabet : "", - + suggest : function (word, limit) { if (!limit) limit = 5; - + if (this.check(word)) return []; - + // Check the replacement table. for (var i = 0, _len = this.replacementTable.length; i < _len; i++) { var replacementEntry = this.replacementTable[i]; - + if (word.indexOf(replacementEntry[0]) !== -1) { var correctedWord = word.replace(replacementEntry[0], replacementEntry[1]); - + if (this.check(correctedWord)) { return [ correctedWord ]; } } } - + var self = this; self.alphabet = "abcdefghijklmnopqrstuvwxyz"; - + /* if (!self.alphabet) { // Use the alphabet as implicitly defined by the words in the dictionary. var alphaHash = {}; - + for (var i in self.dictionaryTable) { for (var j = 0, _len = i.length; j < _len; j++) { alphaHash[i[j]] = true; } } - + for (var i in alphaHash) { self.alphabet += i; } - + var alphaArray = self.alphabet.split(""); alphaArray.sort(); self.alphabet = alphaArray.join(""); } */ - + function edits1(words) { var rv = []; - + for (var ii = 0, _iilen = words.length; ii < _iilen; ii++) { var word = words[ii]; - + var splits = []; - + for (var i = 0, _len = word.length + 1; i < _len; i++) { splits.push([ word.substring(0, i), word.substring(i, word.length) ]); } - + var deletes = []; - + for (var i = 0, _len = splits.length; i < _len; i++) { var s = splits[i]; - + if (s[1]) { deletes.push(s[0] + s[1].substring(1)); } } - + var transposes = []; - + for (var i = 0, _len = splits.length; i < _len; i++) { var s = splits[i]; - + if (s[1].length > 1) { transposes.push(s[0] + s[1][1] + s[1][0] + s[1].substring(2)); } } - + var replaces = []; - + for (var i = 0, _len = splits.length; i < _len; i++) { var s = splits[i]; - + if (s[1]) { for (var j = 0, _jlen = self.alphabet.length; j < _jlen; j++) { replaces.push(s[0] + self.alphabet[j] + s[1].substring(1)); } } } - + var inserts = []; - + for (var i = 0, _len = splits.length; i < _len; i++) { var s = splits[i]; - + if (s[1]) { for (var j = 0, _jlen = self.alphabet.length; j < _jlen; j++) { replaces.push(s[0] + self.alphabet[j] + s[1]); } } } - + rv = rv.concat(deletes); rv = rv.concat(transposes); rv = rv.concat(replaces); rv = rv.concat(inserts); } - + return rv; } - + function known(words) { var rv = []; - + for (var i = 0; i < words.length; i++) { if (self.check(words[i])) { rv.push(words[i]); } } - + return rv; } - + function correct(word) { // Get the edit-distance-1 and edit-distance-2 forms of this word. var ed1 = edits1([word]); var ed2 = edits1(ed1); - + var corrections = known(ed1).concat(known(ed2)); - + // Sort the edits based on how many different ways they were created. var weighted_corrections = {}; - + for (var i = 0, _len = corrections.length; i < _len; i++) { if (!(corrections[i] in weighted_corrections)) { weighted_corrections[corrections[i]] = 1; @@ -14903,34 +14903,34 @@ Typo.prototype = { weighted_corrections[corrections[i]] += 1; } } - + var sorted_corrections = []; - + for (var i in weighted_corrections) { sorted_corrections.push([ i, weighted_corrections[i] ]); } - + function sorter(a, b) { if (a[1] < b[1]) { return -1; } - + return 1; } - + sorted_corrections.sort(sorter).reverse(); - + var rv = []; - + for (var i = 0, _len = Math.min(limit, sorted_corrections.length); i < _len; i++) { if (!self.hasFlag(sorted_corrections[i][0], "NOSUGGEST")) { rv.push(sorted_corrections[i][0]); } } - + return rv; } - + return correct(word); } }; @@ -17016,4 +17016,4 @@ SimpleMDE.prototype.toTextArea = function() { module.exports = SimpleMDE; },{"./codemirror/tablist":19,"codemirror":10,"codemirror-spell-checker":4,"codemirror/addon/display/fullscreen.js":5,"codemirror/addon/display/placeholder.js":6,"codemirror/addon/edit/continuelist.js":7,"codemirror/addon/mode/overlay.js":8,"codemirror/addon/selection/mark-selection.js":9,"codemirror/mode/gfm/gfm.js":11,"codemirror/mode/markdown/markdown.js":12,"codemirror/mode/xml/xml.js":14,"marked":17}]},{},[20])(20) -}); \ No newline at end of file +});