![]() System : Linux absol.cf 5.4.0-198-generic #218-Ubuntu SMP Fri Sep 27 20:18:53 UTC 2024 x86_64 User : www-data ( 33) PHP Version : 7.4.33 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals,pcntl_unshare, Directory : /var/www/html/libs/absol-full/dist/js/ |
Upload File : |
/*** module: node_modules/absol-vietnamese/tokenizeByWord.js ***/ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.default = tokenizeByWord; var _tokenizeBySound = _interopRequireDefault(require("./tokenizeBySound")); function subWord(tokens, i, l) { var res = ''; var token; var end = i + l; while (i < end) { token = tokens[i]; res += token.type == 'break' ? ' ' : token.text; ++i; } return res; } function breakWord(tokens, wordDict) { var res = []; var lastText; var token; var cText; var i = 0; while (i < tokens.length) { token = tokens[i]; if (token.type == 'break') { res.push(token); ++i; } else { for (var l = Math.min(20, tokens.length - i); l > 0; --l) { lastText = subWord(tokens, i, l); if (l == 1 || wordDict[lastText] || wordDict[lastText.toLowerCase()]) { res.push({ text: lastText, type: 'word' }); i += l; break; } } } } return res; } function tokenizeByWord(text, wordDict) { if (wordDict instanceof Array) { wordDict = wordDict.reduce(function (ac, cr) { ac[cr] = true; return ac; }, {}); } var tokens = (0, _tokenizeBySound.default)(text); return breakWord(tokens, wordDict); }