smarter spellcheck loading

pull/796/head
Ábel Nyéki 4 years ago
parent 6abda7ab68
commit d9813aa184
No known key found for this signature in database
GPG Key ID: 1AD1D8A0D3DA1169

@ -1866,6 +1866,16 @@ function CodeMirrorSpellChecker(options) {
// Define the new mode
options.codeMirrorInstance.defineMode("spell-checker", function(config) {
// Load AFF/DIC data
var aff_loaded, dic_loaded;
var loadSpellChecker = function() {
if(aff_loaded && dic_loaded) {
CodeMirrorSpellChecker.typo = new Typo("en_US", CodeMirrorSpellChecker.aff_data, CodeMirrorSpellChecker.dic_data, {
platform: "any"
});
}
};
if(!CodeMirrorSpellChecker.aff_loading) {
CodeMirrorSpellChecker.aff_loading = true;
var xhr_aff = new XMLHttpRequest();
@ -1873,13 +1883,8 @@ function CodeMirrorSpellChecker(options) {
xhr_aff.onload = function() {
if(xhr_aff.readyState === 4 && xhr_aff.status === 200) {
CodeMirrorSpellChecker.aff_data = xhr_aff.responseText;
CodeMirrorSpellChecker.num_loaded++;
if(CodeMirrorSpellChecker.num_loaded == 2) {
CodeMirrorSpellChecker.typo = new Typo("en_US", CodeMirrorSpellChecker.aff_data, CodeMirrorSpellChecker.dic_data, {
platform: "any"
});
}
aff_loaded = true;
loadSpellChecker();
}
};
xhr_aff.send(null);
@ -1892,13 +1897,8 @@ function CodeMirrorSpellChecker(options) {
xhr_dic.onload = function() {
if(xhr_dic.readyState === 4 && xhr_dic.status === 200) {
CodeMirrorSpellChecker.dic_data = xhr_dic.responseText;
CodeMirrorSpellChecker.num_loaded++;
if(CodeMirrorSpellChecker.num_loaded == 2) {
CodeMirrorSpellChecker.typo = new Typo("en_US", CodeMirrorSpellChecker.aff_data, CodeMirrorSpellChecker.dic_data, {
platform: "any"
});
}
aff_loaded = true;
loadSpellChecker();
}
};
xhr_dic.send(null);
@ -14153,7 +14153,7 @@ if (typeof module !== 'undefined' && typeof exports === 'object') {
'use strict';
/**
* Typo is a JavaScript implementation of a spellchecker using hunspell-style
* Typo is a JavaScript implementation of a spellchecker using hunspell-style
* dictionaries.
*/
@ -14185,22 +14185,22 @@ if (typeof module !== 'undefined' && typeof exports === 'object') {
var Typo = function (dictionary, affData, wordsData, settings) {
settings = settings || {};
this.dictionary = null;
this.rules = {};
this.dictionaryTable = {};
this.compoundRules = [];
this.compoundRuleCodes = {};
this.replacementTable = [];
this.flags = settings.flags || {};
this.flags = settings.flags || {};
if (dictionary) {
this.dictionary = dictionary;
if (typeof window !== 'undefined' && 'chrome' in window && 'extension' in window.chrome && 'getURL' in window.chrome.extension) {
if (!affData) affData = this._readFile(chrome.extension.getURL("lib/typo/dictionaries/" + dictionary + "/" + dictionary + ".aff"));
if (!wordsData) wordsData = this._readFile(chrome.extension.getURL("lib/typo/dictionaries/" + dictionary + "/" + dictionary + ".dic"));
@ -14214,51 +14214,51 @@ var Typo = function (dictionary, affData, wordsData, settings) {
else {
var path = './dictionaries';
}
if (!affData) affData = this._readFile(path + "/" + dictionary + "/" + dictionary + ".aff");
if (!wordsData) wordsData = this._readFile(path + "/" + dictionary + "/" + dictionary + ".dic");
}
this.rules = this._parseAFF(affData);
// Save the rule codes that are used in compound rules.
this.compoundRuleCodes = {};
for (var i = 0, _len = this.compoundRules.length; i < _len; i++) {
var rule = this.compoundRules[i];
for (var j = 0, _jlen = rule.length; j < _jlen; j++) {
this.compoundRuleCodes[rule[j]] = [];
}
}
// If we add this ONLYINCOMPOUND flag to this.compoundRuleCodes, then _parseDIC
// will do the work of saving the list of words that are compound-only.
if ("ONLYINCOMPOUND" in this.flags) {
this.compoundRuleCodes[this.flags.ONLYINCOMPOUND] = [];
}
this.dictionaryTable = this._parseDIC(wordsData);
// Get rid of any codes from the compound rule codes that are never used
// (or that were special regex characters). Not especially necessary...
// Get rid of any codes from the compound rule codes that are never used
// (or that were special regex characters). Not especially necessary...
for (var i in this.compoundRuleCodes) {
if (this.compoundRuleCodes[i].length == 0) {
delete this.compoundRuleCodes[i];
}
}
// Build the full regular expressions for each compound rule.
// I have a feeling (but no confirmation yet) that this method of
// I have a feeling (but no confirmation yet) that this method of
// testing for compound words is probably slow.
for (var i = 0, _len = this.compoundRules.length; i < _len; i++) {
var ruleText = this.compoundRules[i];
var expressionText = "";
for (var j = 0, _jlen = ruleText.length; j < _jlen; j++) {
var character = ruleText[j];
if (character in this.compoundRuleCodes) {
expressionText += "(" + this.compoundRuleCodes[character].join("|") + ")";
}
@ -14266,11 +14266,11 @@ var Typo = function (dictionary, affData, wordsData, settings) {
expressionText += character;
}
}
this.compoundRules[i] = new RegExp(expressionText, "i");
}
}
return this;
};
@ -14280,51 +14280,51 @@ Typo.prototype = {
*
* @param object obj A hash of Typo properties, probably gotten from a JSON.parse(JSON.stringify(typo_instance)).
*/
load : function (obj) {
for (var i in obj) {
this[i] = obj[i];
}
return this;
},
/**
* Read the contents of a file.
*
*
* @param {String} path The path (relative) to the file.
* @param {String} [charset="ISO8859-1"] The expected charset of the file
* @returns string The file data.
*/
_readFile : function (path, charset) {
if (!charset) charset = "utf8";
if (typeof XMLHttpRequest !== 'undefined') {
var req = new XMLHttpRequest();
req.open("GET", path, false);
if (req.overrideMimeType)
req.overrideMimeType("text/plain; charset=" + charset);
req.send(null);
return req.responseText;
}
else if (typeof require !== 'undefined') {
// Node.js
var fs = require("fs");
try {
if (fs.existsSync(path)) {
var stats = fs.statSync(path);
var fileDescriptor = fs.openSync(path, 'r');
var buffer = new Buffer(stats.size);
fs.readSync(fileDescriptor, buffer, 0, buffer.length, null);
return buffer.toString(charset, 0, buffer.length);
}
else {
@ -14336,56 +14336,56 @@ Typo.prototype = {
}
}
},
/**
* Parse the rules out from a .aff file.
*
* @param {String} data The contents of the affix file.
* @returns object The rules from the file.
*/
_parseAFF : function (data) {
var rules = {};
// Remove comment lines
data = this._removeAffixComments(data);
var lines = data.split("\n");
for (var i = 0, _len = lines.length; i < _len; i++) {
var line = lines[i];
var definitionParts = line.split(/\s+/);
var ruleType = definitionParts[0];
if (ruleType == "PFX" || ruleType == "SFX") {
var ruleCode = definitionParts[1];
var combineable = definitionParts[2];
var numEntries = parseInt(definitionParts[3], 10);
var entries = [];
for (var j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) {
var line = lines[j];
var lineParts = line.split(/\s+/);
var charactersToRemove = lineParts[2];
var additionParts = lineParts[3].split("/");
var charactersToAdd = additionParts[0];
if (charactersToAdd === "0") charactersToAdd = "";
var continuationClasses = this.parseRuleCodes(additionParts[1]);
var regexToMatch = lineParts[4];
var entry = {};
entry.add = charactersToAdd;
if (continuationClasses.length > 0) entry.continuationClasses = continuationClasses;
if (regexToMatch !== ".") {
if (ruleType === "SFX") {
entry.match = new RegExp(regexToMatch + "$");
@ -14394,7 +14394,7 @@ Typo.prototype = {
entry.match = new RegExp("^" + regexToMatch);
}
}
if (charactersToRemove != "0") {
if (ruleType === "SFX") {
entry.remove = new RegExp(charactersToRemove + "$");
@ -14403,29 +14403,29 @@ Typo.prototype = {
entry.remove = charactersToRemove;
}
}
entries.push(entry);
}
rules[ruleCode] = { "type" : ruleType, "combineable" : (combineable == "Y"), "entries" : entries };
i += numEntries;
}
else if (ruleType === "COMPOUNDRULE") {
var numEntries = parseInt(definitionParts[1], 10);
for (var j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) {
var line = lines[j];
var lineParts = line.split(/\s+/);
this.compoundRules.push(lineParts[1]);
}
i += numEntries;
}
else if (ruleType === "REP") {
var lineParts = line.split(/\s+/);
if (lineParts.length === 3) {
this.replacementTable.push([ lineParts[1], lineParts[2] ]);
}
@ -14436,37 +14436,37 @@ Typo.prototype = {
// FLAG
// KEEPCASE
// NEEDAFFIX
this.flags[ruleType] = definitionParts[1];
}
}
return rules;
},
/**
* Removes comment lines and then cleans up blank lines and trailing whitespace.
*
* @param {String} data The data from an affix file.
* @return {String} The cleaned-up data.
*/
_removeAffixComments : function (data) {
// Remove comments
data = data.replace(/#.*$/mg, "");
// Trim each line
data = data.replace(/^\s\s*/m, '').replace(/\s\s*$/m, '');
// Remove blank lines.
data = data.replace(/\n{2,}/g, "\n");
// Trim the entire string
data = data.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
return data;
},
/**
* Parses the words out from the .dic file.
*
@ -14474,62 +14474,62 @@ Typo.prototype = {
* @returns object The lookup table containing all of the words and
* word forms from the dictionary.
*/
_parseDIC : function (data) {
data = this._removeDicComments(data);
var lines = data.split("\n");
var dictionaryTable = {};
function addWord(word, rules) {
// Some dictionaries will list the same word multiple times with different rule sets.
if (!(word in dictionaryTable) || typeof dictionaryTable[word] != 'object') {
dictionaryTable[word] = [];
}
dictionaryTable[word].push(rules);
}
// The first line is the number of words in the dictionary.
for (var i = 1, _len = lines.length; i < _len; i++) {
var line = lines[i];
var parts = line.split("/", 2);
var word = parts[0];
// Now for each affix rule, generate that form of the word.
if (parts.length > 1) {
var ruleCodesArray = this.parseRuleCodes(parts[1]);
// Save the ruleCodes for compound word situations.
if (!("NEEDAFFIX" in this.flags) || ruleCodesArray.indexOf(this.flags.NEEDAFFIX) == -1) {
addWord(word, ruleCodesArray);
}
for (var j = 0, _jlen = ruleCodesArray.length; j < _jlen; j++) {
var code = ruleCodesArray[j];
var rule = this.rules[code];
if (rule) {
var newWords = this._applyRule(word, rule);
for (var ii = 0, _iilen = newWords.length; ii < _iilen; ii++) {
var newWord = newWords[ii];
addWord(newWord, []);
if (rule.combineable) {
for (var k = j + 1; k < _jlen; k++) {
var combineCode = ruleCodesArray[k];
var combineRule = this.rules[combineCode];
if (combineRule) {
if (combineRule.combineable && (rule.type != combineRule.type)) {
var otherNewWords = this._applyRule(newWord, combineRule);
for (var iii = 0, _iiilen = otherNewWords.length; iii < _iiilen; iii++) {
var otherNewWord = otherNewWords[iii];
addWord(otherNewWord, []);
@ -14540,7 +14540,7 @@ Typo.prototype = {
}
}
}
if (code in this.compoundRuleCodes) {
this.compoundRuleCodes[code].push(word);
}
@ -14550,28 +14550,28 @@ Typo.prototype = {
addWord(word.trim(), []);
}
}
return dictionaryTable;
},
/**
* Removes comment lines and then cleans up blank lines and trailing whitespace.
*
* @param {String} data The data from a .dic file.
* @return {String} The cleaned-up data.
*/
_removeDicComments : function (data) {
// I can't find any official documentation on it, but at least the de_DE
// dictionary uses tab-indented lines as comments.
// Remove comments
data = data.replace(/^\t.*$/mg, "");
return data;
},
parseRuleCodes : function (textCodes) {
if (!textCodes) {
return [];
@ -14581,18 +14581,18 @@ Typo.prototype = {
}
else if (this.flags.FLAG === "long") {
var flags = [];
for (var i = 0, _len = textCodes.length; i < _len; i += 2) {
flags.push(textCodes.substr(i, 2));
}
return flags;
}
else if (this.flags.FLAG === "num") {
return textCode.split(",");
}
},
/**
* Applies an affix rule to a word.
*
@ -14600,41 +14600,41 @@ Typo.prototype = {
* @param {Object} rule The affix rule.
* @returns {String[]} The new words generated by the rule.
*/
_applyRule : function (word, rule) {
var entries = rule.entries;
var newWords = [];
for (var i = 0, _len = entries.length; i < _len; i++) {
var entry = entries[i];
if (!entry.match || word.match(entry.match)) {
var newWord = word;
if (entry.remove) {
newWord = newWord.replace(entry.remove, "");
}
if (rule.type === "SFX") {
newWord = newWord + entry.add;
}
else {
newWord = entry.add + newWord;
}
newWords.push(newWord);
if ("continuationClasses" in entry) {
for (var j = 0, _jlen = entry.continuationClasses.length; j < _jlen; j++) {
var continuationRule = this.rules[entry.continuationClasses[j]];
if (continuationRule) {
newWords = newWords.concat(this._applyRule(newWord, continuationRule));
}
/*
else {
// This shouldn't happen, but it does, at least in the de_DE dictionary.
// I think the author mistakenly supplied lower-case rule codes instead
// I think the author mistakenly supplied lower-case rule codes instead
// of upper-case.
}
*/
@ -14642,10 +14642,10 @@ Typo.prototype = {
}
}
}
return newWords;
},
/**
* Checks whether a word or a capitalization variant exists in the current dictionary.
* The word is trimmed and several variations of capitalizations are checked.
@ -14656,58 +14656,58 @@ Typo.prototype = {
* @param {String} aWord The word to check.
* @returns {Boolean}
*/
check : function (aWord) {
// Remove leading and trailing whitespace
var trimmedWord = aWord.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
if (this.checkExact(trimmedWord)) {
return true;
}
// The exact word is not in the dictionary.
if (trimmedWord.toUpperCase() === trimmedWord) {
// The word was supplied in all uppercase.
// Check for a capitalized form of the word.
var capitalizedWord = trimmedWord[0] + trimmedWord.substring(1).toLowerCase();
if (this.hasFlag(capitalizedWord, "KEEPCASE")) {
// Capitalization variants are not allowed for this word.
return false;
}
if (this.checkExact(capitalizedWord)) {
return true;
}
}
var lowercaseWord = trimmedWord.toLowerCase();
if (lowercaseWord !== trimmedWord) {
if (this.hasFlag(lowercaseWord, "KEEPCASE")) {
// Capitalization variants are not allowed for this word.
return false;
}
// Check for a lowercase form
if (this.checkExact(lowercaseWord)) {
return true;
}
}
return false;
},
/**
* Checks whether a word exists in the current dictionary.
*
* @param {String} word The word to check.
* @returns {Boolean}
*/
checkExact : function (word) {
var ruleCodes = this.dictionaryTable[word];
if (typeof ruleCodes === 'undefined') {
// Check if this might be a compound word.
if ("COMPOUNDMIN" in this.flags && word.length >= this.flags.COMPOUNDMIN) {
@ -14717,7 +14717,7 @@ Typo.prototype = {
}
}
}
return false;
}
else if (typeof ruleCodes === 'object') { // this.dictionary['hasOwnProperty'] will be a function.
@ -14726,11 +14726,11 @@ Typo.prototype = {
return true;
}
}
return false;
}
},
/**
* Looks up whether a given word is flagged with a given flag.
*
@ -14738,21 +14738,21 @@ Typo.prototype = {
* @param {String} flag The flag in question.
* @return {Boolean}
*/
hasFlag : function (word, flag, wordFlags) {
if (flag in this.flags) {
if (typeof wordFlags === 'undefined') {
var wordFlags = Array.prototype.concat.apply([], this.dictionaryTable[word]);
}
if (wordFlags && wordFlags.indexOf(this.flags[flag]) !== -1) {
return true;
}
}
return false;
},
/**
* Returns a list of suggestions for a misspelled word.
*
@ -14763,138 +14763,138 @@ Typo.prototype = {
* @param {Number} [limit=5] The maximum number of suggestions to return.
* @returns {String[]} The array of suggestions.
*/
alphabet : "",
suggest : function (word, limit) {
if (!limit) limit = 5;
if (this.check(word)) return [];
// Check the replacement table.
for (var i = 0, _len = this.replacementTable.length; i < _len; i++) {
var replacementEntry = this.replacementTable[i];
if (word.indexOf(replacementEntry[0]) !== -1) {
var correctedWord = word.replace(replacementEntry[0], replacementEntry[1]);
if (this.check(correctedWord)) {
return [ correctedWord ];
}
}
}
var self = this;
self.alphabet = "abcdefghijklmnopqrstuvwxyz";
/*
if (!self.alphabet) {
// Use the alphabet as implicitly defined by the words in the dictionary.
var alphaHash = {};
for (var i in self.dictionaryTable) {
for (var j = 0, _len = i.length; j < _len; j++) {
alphaHash[i[j]] = true;
}
}
for (var i in alphaHash) {
self.alphabet += i;
}
var alphaArray = self.alphabet.split("");
alphaArray.sort();
self.alphabet = alphaArray.join("");
}
*/
function edits1(words) {
var rv = [];
for (var ii = 0, _iilen = words.length; ii < _iilen; ii++) {
var word = words[ii];
var splits = [];
for (var i = 0, _len = word.length + 1; i < _len; i++) {
splits.push([ word.substring(0, i), word.substring(i, word.length) ]);
}
var deletes = [];
for (var i = 0, _len = splits.length; i < _len; i++) {
var s = splits[i];
if (s[1]) {
deletes.push(s[0] + s[1].substring(1));
}
}
var transposes = [];
for (var i = 0, _len = splits.length; i < _len; i++) {
var s = splits[i];
if (s[1].length > 1) {
transposes.push(s[0] + s[1][1] + s[1][0] + s[1].substring(2));
}
}
var replaces = [];
for (var i = 0, _len = splits.length; i < _len; i++) {
var s = splits[i];
if (s[1]) {
for (var j = 0, _jlen = self.alphabet.length; j < _jlen; j++) {
replaces.push(s[0] + self.alphabet[j] + s[1].substring(1));
}
}
}
var inserts = [];
for (var i = 0, _len = splits.length; i < _len; i++) {
var s = splits[i];
if (s[1]) {
for (var j = 0, _jlen = self.alphabet.length; j < _jlen; j++) {
replaces.push(s[0] + self.alphabet[j] + s[1]);
}
}
}
rv = rv.concat(deletes);
rv = rv.concat(transposes);
rv = rv.concat(replaces);
rv = rv.concat(inserts);
}
return rv;
}
function known(words) {
var rv = [];
for (var i = 0; i < words.length; i++) {
if (self.check(words[i])) {
rv.push(words[i]);
}
}
return rv;
}
function correct(word) {
// Get the edit-distance-1 and edit-distance-2 forms of this word.
var ed1 = edits1([word]);
var ed2 = edits1(ed1);
var corrections = known(ed1).concat(known(ed2));
// Sort the edits based on how many different ways they were created.
var weighted_corrections = {};
for (var i = 0, _len = corrections.length; i < _len; i++) {
if (!(corrections[i] in weighted_corrections)) {
weighted_corrections[corrections[i]] = 1;
@ -14903,34 +14903,34 @@ Typo.prototype = {
weighted_corrections[corrections[i]] += 1;
}
}
var sorted_corrections = [];
for (var i in weighted_corrections) {
sorted_corrections.push([ i, weighted_corrections[i] ]);
}
function sorter(a, b) {
if (a[1] < b[1]) {
return -1;
}
return 1;
}
sorted_corrections.sort(sorter).reverse();
var rv = [];
for (var i = 0, _len = Math.min(limit, sorted_corrections.length); i < _len; i++) {
if (!self.hasFlag(sorted_corrections[i][0], "NOSUGGEST")) {
rv.push(sorted_corrections[i][0]);
}
}
return rv;
}
return correct(word);
}
};
@ -17016,4 +17016,4 @@ SimpleMDE.prototype.toTextArea = function() {
module.exports = SimpleMDE;
},{"./codemirror/tablist":19,"codemirror":10,"codemirror-spell-checker":4,"codemirror/addon/display/fullscreen.js":5,"codemirror/addon/display/placeholder.js":6,"codemirror/addon/edit/continuelist.js":7,"codemirror/addon/mode/overlay.js":8,"codemirror/addon/selection/mark-selection.js":9,"codemirror/mode/gfm/gfm.js":11,"codemirror/mode/markdown/markdown.js":12,"codemirror/mode/xml/xml.js":14,"marked":17}]},{},[20])(20)
});
});

Loading…
Cancel
Save