| /**
 * WordGetter.js
 *
 * Released under LGPL License.
 * Copyright (c) 1999-2016 Ephox Corp. All rights reserved
 *
 * License: http://www.tinymce.com/license
 * Contributing: http://www.tinymce.com/contributing
 */
define(
  'tinymce.plugins.wordcount.text.WordGetter',
  [
    'tinymce.plugins.wordcount.text.UnicodeData',
    'tinymce.plugins.wordcount.text.StringMapper',
    'tinymce.plugins.wordcount.text.WordBoundary'
  ],
  function (UnicodeData, StringMapper, WordBoundary) {
    var EMPTY_STRING = UnicodeData.EMPTY_STRING;
    var WHITESPACE = UnicodeData.WHITESPACE;
    var PUNCTUATION = UnicodeData.PUNCTUATION;
    var isProtocol = function (word) {
      return word === 'http' || word === 'https';
    };
    var findWordEnd = function (string, index) {
      var i;
      for (i = index; i < string.length; ++i) {
        var chr = string.charAt(i);
        if (WHITESPACE.test(chr)) {
          break;
        }
      }
      return i;
    };
    var extractUrl = function (word, string, index) {
      var endIndex = findWordEnd(string, index + 1);
      var peakedWord = string.substring(index + 1, endIndex);
      if (peakedWord.substr(0, 3) === '://') {
        return {
          word: word + peakedWord,
          index: endIndex
        };
      }
      return {
        word: word,
        index: index
      };
    };
    var doGetWords = function (string, options) {
      var i = 0;
      var map = StringMapper.classify(string);
      var len = map.length;
      var word = [];
      var words = [];
      var chr;
      var includePunctuation;
      var includeWhitespace;
      if (!options) {
        options = {};
      }
      if (options.ignoreCase) {
        string = string.toLowerCase();
      }
      includePunctuation = options.includePunctuation;
      includeWhitespace = options.includeWhitespace;
      // Loop through each character in the classification map and determine
      // whether it precedes a word boundary, building an array of distinct
      // words as we go.
      for (; i < len; ++i) {
        chr = string.charAt(i);
        // Append this character to the current word.
        word.push(chr);
        // If there's a word boundary between the current character and the
        // next character, append the current word to the words array and
        // start building a new word.
        if (WordBoundary.isWordBoundary(map, i)) {
          word = word.join(EMPTY_STRING);
          if (word &&
            (includeWhitespace || !WHITESPACE.test(word)) &&
            (includePunctuation || !PUNCTUATION.test(word))) {
            if (isProtocol(word)) {
              var obj = extractUrl(word, string, i);
              words.push(obj.word);
              i = obj.index;
            } else {
              words.push(word);
            }
          }
          word = [];
        }
      }
      return words;
    };
    var getWords = function (string, options) {
      return doGetWords(string.replace(/\ufeff/g, ''), options);
    };
    return {
      getWords: getWords
    };
  }
);
 |