是否有允许正则表达式的JavaScript的String.indexOf()版本?


Answers

String构造函数的实例有一个.search()方法 ,它接受RegExp并返回第一个匹配的索引。

要从特定位置开始搜索(伪造.indexOf()的第二个参数),可以切掉第一个i字符:

str.slice(i).search(/re/)

但是这会在较短的字符串中获得索引(在第一部分被切掉之后),因此如果不是-1 ,则需要将切掉的部分( i )的长度添加到返回的索引中。 这会给你在原始字符串的索引:

function regexIndexOf(text, re, i) {
    var indexInSuffix = text.slice(i).search(re);
    return indexInSuffix < 0 ? indexInSuffix : indexInSuffix + i;
}
Question

在JavaScript中,是否有等价的String.indexOf(),它需要一个正则表达式而不是第一个参数的字符串,同时还允许第二个参数?

我需要做类似的事情

str.indexOf(/[abc]/ , i);

str.lastIndexOf(/[abc]/ , i);

虽然String.search()将一个正则表达式作为参数,但它不允许我指定第二个参数!

编辑:
事实证明,这比我原先想象的要难,于是我编写了一个小测试函数来测试所有提供的解决方案......它假定regexIndexOf和regexLastIndexOf已被添加到String对象中。

function test (str) {
    var i = str.length +2;
    while (i--) {
        if (str.indexOf('a',i) != str.regexIndexOf(/a/,i)) 
            alert (['failed regexIndexOf ' , str,i , str.indexOf('a',i) , str.regexIndexOf(/a/,i)]) ;
        if (str.lastIndexOf('a',i) != str.regexLastIndexOf(/a/,i) ) 
            alert (['failed regexLastIndexOf ' , str,i,str.lastIndexOf('a',i) , str.regexLastIndexOf(/a/,i)]) ;
    }
}

我正在测试如下,以确保至少对于一个字符的正则表达式,结果与我们使用indexOf

//在xes中寻找一个
试验( 'XXX');
试验( 'AXX');
试验( 'XAX');
试验( 'XXA');
试验( 'AXA');
试验( '的Xaa');
试验( 'AAX');
试验( 'AAA');




仍然没有执行请求的任务的本地方法。

这是我正在使用的代码。 它模仿了String.prototype.indexOfString.prototype.lastIndexOf方法的行为,但除了表示要搜索的值的字符串外,它们还接受RegExp作为搜索参数。

是的,答案很长,因为它尽可能接近当前的标准,当然也包含了合理的JSDOC评论。 但是,一旦缩小,代码只有2.27k,并且一旦被传输,就只有1023字节。

这添加到String.prototype的2个方法(在可用的情况下使用Object.defineProperty )是:

  1. searchOf
  2. searchLastOf

它通过了OP发布的所有测试,另外我已经在日常使用中彻底测试了这些例程,并试图确保它们可以在多个环境中工作,但始终欢迎反馈/问题。

/*jslint maxlen:80, browser:true */

/*
 * Properties used by searchOf and searchLastOf implementation.
 */

/*property
    MAX_SAFE_INTEGER, abs, add, apply, call, configurable, defineProperty,
    enumerable, exec, floor, global, hasOwnProperty, ignoreCase, index,
    lastIndex, lastIndexOf, length, max, min, multiline, pow, prototype,
    remove, replace, searchLastOf, searchOf, source, toString, value, writable
*/

/*
 * Properties used in the testing of searchOf and searchLastOf implimentation.
 */

/*property
    appendChild, createTextNode, getElementById, indexOf, lastIndexOf, length,
    searchLastOf, searchOf, unshift
*/

(function () {
    'use strict';

    var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1,
        getNativeFlags = new RegExp('\\/([a-z]*)$', 'i'),
        clipDups = new RegExp('([\\s\\S])(?=[\\s\\S]*\\1)', 'g'),
        pToString = Object.prototype.toString,
        pHasOwn = Object.prototype.hasOwnProperty,
        stringTagRegExp;

    /**
     * Defines a new property directly on an object, or modifies an existing
     * property on an object, and returns the object.
     *
     * @private
     * @function
     * @param {Object} object
     * @param {string} property
     * @param {Object} descriptor
     * @returns {Object}
     * @see https://goo.gl/CZnEqg
     */
    function $defineProperty(object, property, descriptor) {
        if (Object.defineProperty) {
            Object.defineProperty(object, property, descriptor);
        } else {
            object[property] = descriptor.value;
        }

        return object;
    }

    /**
     * Returns true if the operands are strictly equal with no type conversion.
     *
     * @private
     * @function
     * @param {*} a
     * @param {*} b
     * @returns {boolean}
     * @see http://www.ecma-international.org/ecma-262/5.1/#sec-11.9.4
     */
    function $strictEqual(a, b) {
        return a === b;
    }

    /**
     * Returns true if the operand inputArg is undefined.
     *
     * @private
     * @function
     * @param {*} inputArg
     * @returns {boolean}
     */
    function $isUndefined(inputArg) {
        return $strictEqual(typeof inputArg, 'undefined');
    }

    /**
     * Provides a string representation of the supplied object in the form
     * "[object type]", where type is the object type.
     *
     * @private
     * @function
     * @param {*} inputArg The object for which a class string represntation
     *                     is required.
     * @returns {string} A string value of the form "[object type]".
     * @see http://www.ecma-international.org/ecma-262/5.1/#sec-15.2.4.2
     */
    function $toStringTag(inputArg) {
        var val;
        if (inputArg === null) {
            val = '[object Null]';
        } else if ($isUndefined(inputArg)) {
            val = '[object Undefined]';
        } else {
            val = pToString.call(inputArg);
        }

        return val;
    }

    /**
     * The string tag representation of a RegExp object.
     *
     * @private
     * @type {string}
     */
    stringTagRegExp = $toStringTag(getNativeFlags);

    /**
     * Returns true if the operand inputArg is a RegExp.
     *
     * @private
     * @function
     * @param {*} inputArg
     * @returns {boolean}
     */
    function $isRegExp(inputArg) {
        return $toStringTag(inputArg) === stringTagRegExp &&
                pHasOwn.call(inputArg, 'ignoreCase') &&
                typeof inputArg.ignoreCase === 'boolean' &&
                pHasOwn.call(inputArg, 'global') &&
                typeof inputArg.global === 'boolean' &&
                pHasOwn.call(inputArg, 'multiline') &&
                typeof inputArg.multiline === 'boolean' &&
                pHasOwn.call(inputArg, 'source') &&
                typeof inputArg.source === 'string';
    }

    /**
     * The abstract operation throws an error if its argument is a value that
     * cannot be converted to an Object, otherwise returns the argument.
     *
     * @private
     * @function
     * @param {*} inputArg The object to be tested.
     * @throws {TypeError} If inputArg is null or undefined.
     * @returns {*} The inputArg if coercible.
     * @see https://goo.gl/5GcmVq
     */
    function $requireObjectCoercible(inputArg) {
        var errStr;

        if (inputArg === null || $isUndefined(inputArg)) {
            errStr = 'Cannot convert argument to object: ' + inputArg;
            throw new TypeError(errStr);
        }

        return inputArg;
    }

    /**
     * The abstract operation converts its argument to a value of type string
     *
     * @private
     * @function
     * @param {*} inputArg
     * @returns {string}
     * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tostring
     */
    function $toString(inputArg) {
        var type,
            val;

        if (inputArg === null) {
            val = 'null';
        } else {
            type = typeof inputArg;
            if (type === 'string') {
                val = inputArg;
            } else if (type === 'undefined') {
                val = type;
            } else {
                if (type === 'symbol') {
                    throw new TypeError('Cannot convert symbol to string');
                }

                val = String(inputArg);
            }
        }

        return val;
    }

    /**
     * Returns a string only if the arguments is coercible otherwise throws an
     * error.
     *
     * @private
     * @function
     * @param {*} inputArg
     * @throws {TypeError} If inputArg is null or undefined.
     * @returns {string}
     */
    function $onlyCoercibleToString(inputArg) {
        return $toString($requireObjectCoercible(inputArg));
    }

    /**
     * The function evaluates the passed value and converts it to an integer.
     *
     * @private
     * @function
     * @param {*} inputArg The object to be converted to an integer.
     * @returns {number} If the target value is NaN, null or undefined, 0 is
     *                   returned. If the target value is false, 0 is returned
     *                   and if true, 1 is returned.
     * @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.4
     */
    function $toInteger(inputArg) {
        var number = +inputArg,
            val = 0;

        if ($strictEqual(number, number)) {
            if (!number || number === Infinity || number === -Infinity) {
                val = number;
            } else {
                val = (number > 0 || -1) * Math.floor(Math.abs(number));
            }
        }

        return val;
    }

    /**
     * Copies a regex object. Allows adding and removing native flags while
     * copying the regex.
     *
     * @private
     * @function
     * @param {RegExp} regex Regex to copy.
     * @param {Object} [options] Allows specifying native flags to add or
     *                           remove while copying the regex.
     * @returns {RegExp} Copy of the provided regex, possibly with modified
     *                   flags.
     */
    function $copyRegExp(regex, options) {
        var flags,
            opts,
            rx;

        if (options !== null && typeof options === 'object') {
            opts = options;
        } else {
            opts = {};
        }

        // Get native flags in use
        flags = getNativeFlags.exec($toString(regex))[1];
        flags = $onlyCoercibleToString(flags);
        if (opts.add) {
            flags += opts.add;
            flags = flags.replace(clipDups, '');
        }

        if (opts.remove) {
            // Would need to escape `options.remove` if this was public
            rx = new RegExp('[' + opts.remove + ']+', 'g');
            flags = flags.replace(rx, '');
        }

        return new RegExp(regex.source, flags);
    }

    /**
     * The abstract operation ToLength converts its argument to an integer
     * suitable for use as the length of an array-like object.
     *
     * @private
     * @function
     * @param {*} inputArg The object to be converted to a length.
     * @returns {number} If len <= +0 then +0 else if len is +INFINITY then
     *                   2^53-1 else min(len, 2^53-1).
     * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tolength
     */
    function $toLength(inputArg) {
        return Math.min(Math.max($toInteger(inputArg), 0), MAX_SAFE_INTEGER);
    }

    /**
     * Copies a regex object so that it is suitable for use with searchOf and
     * searchLastOf methods.
     *
     * @private
     * @function
     * @param {RegExp} regex Regex to copy.
     * @returns {RegExp}
     */
    function $toSearchRegExp(regex) {
        return $copyRegExp(regex, {
            add: 'g',
            remove: 'y'
        });
    }

    /**
     * Returns true if the operand inputArg is a member of one of the types
     * Undefined, Null, Boolean, Number, Symbol, or String.
     *
     * @private
     * @function
     * @param {*} inputArg
     * @returns {boolean}
     * @see https://goo.gl/W68ywJ
     * @see https://goo.gl/ev7881
     */
    function $isPrimitive(inputArg) {
        var type = typeof inputArg;

        return type === 'undefined' ||
                inputArg === null ||
                type === 'boolean' ||
                type === 'string' ||
                type === 'number' ||
                type === 'symbol';
    }

    /**
     * The abstract operation converts its argument to a value of type Object
     * but fixes some environment bugs.
     *
     * @private
     * @function
     * @param {*} inputArg The argument to be converted to an object.
     * @throws {TypeError} If inputArg is not coercible to an object.
     * @returns {Object} Value of inputArg as type Object.
     * @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.9
     */
    function $toObject(inputArg) {
        var object;

        if ($isPrimitive($requireObjectCoercible(inputArg))) {
            object = Object(inputArg);
        } else {
            object = inputArg;
        }

        return object;
    }

    /**
     * Converts a single argument that is an array-like object or list (eg.
     * arguments, NodeList, DOMTokenList (used by classList), NamedNodeMap
     * (used by attributes property)) into a new Array() and returns it.
     * This is a partial implementation of the ES6 Array.from
     *
     * @private
     * @function
     * @param {Object} arrayLike
     * @returns {Array}
     */
    function $toArray(arrayLike) {
        var object = $toObject(arrayLike),
            length = $toLength(object.length),
            array = [],
            index = 0;

        array.length = length;
        while (index < length) {
            array[index] = object[index];
            index += 1;
        }

        return array;
    }

    if (!String.prototype.searchOf) {
        /**
         * This method returns the index within the calling String object of
         * the first occurrence of the specified value, starting the search at
         * fromIndex. Returns -1 if the value is not found.
         *
         * @function
         * @this {string}
         * @param {RegExp|string} regex A regular expression object or a String.
         *                              Anything else is implicitly converted to
         *                              a String.
         * @param {Number} [fromIndex] The location within the calling string
         *                             to start the search from. It can be any
         *                             integer. The default value is 0. If
         *                             fromIndex < 0 the entire string is
         *                             searched (same as passing 0). If
         *                             fromIndex >= str.length, the method will
         *                             return -1 unless searchValue is an empty
         *                             string in which case str.length is
         *                             returned.
         * @returns {Number} If successful, returns the index of the first
         *                   match of the regular expression inside the
         *                   string. Otherwise, it returns -1.
         */
        $defineProperty(String.prototype, 'searchOf', {
            enumerable: false,
            configurable: true,
            writable: true,
            value: function (regex) {
                var str = $onlyCoercibleToString(this),
                    args = $toArray(arguments),
                    result = -1,
                    fromIndex,
                    match,
                    rx;

                if (!$isRegExp(regex)) {
                    return String.prototype.indexOf.apply(str, args);
                }

                if ($toLength(args.length) > 1) {
                    fromIndex = +args[1];
                    if (fromIndex < 0) {
                        fromIndex = 0;
                    }
                } else {
                    fromIndex = 0;
                }

                if (fromIndex >= $toLength(str.length)) {
                    return result;
                }

                rx = $toSearchRegExp(regex);
                rx.lastIndex = fromIndex;
                match = rx.exec(str);
                if (match) {
                    result = +match.index;
                }

                return result;
            }
        });
    }

    if (!String.prototype.searchLastOf) {
        /**
         * This method returns the index within the calling String object of
         * the last occurrence of the specified value, or -1 if not found.
         * The calling string is searched backward, starting at fromIndex.
         *
         * @function
         * @this {string}
         * @param {RegExp|string} regex A regular expression object or a String.
         *                              Anything else is implicitly converted to
         *                              a String.
         * @param {Number} [fromIndex] Optional. The location within the
         *                             calling string to start the search at,
         *                             indexed from left to right. It can be
         *                             any integer. The default value is
         *                             str.length. If it is negative, it is
         *                             treated as 0. If fromIndex > str.length,
         *                             fromIndex is treated as str.length.
         * @returns {Number} If successful, returns the index of the first
         *                   match of the regular expression inside the
         *                   string. Otherwise, it returns -1.
         */
        $defineProperty(String.prototype, 'searchLastOf', {
            enumerable: false,
            configurable: true,
            writable: true,
            value: function (regex) {
                var str = $onlyCoercibleToString(this),
                    args = $toArray(arguments),
                    result = -1,
                    fromIndex,
                    length,
                    match,
                    pos,
                    rx;

                if (!$isRegExp(regex)) {
                    return String.prototype.lastIndexOf.apply(str, args);
                }

                length = $toLength(str.length);
                if (!$strictEqual(args[1], args[1])) {
                    fromIndex = length;
                } else {
                    if ($toLength(args.length) > 1) {
                        fromIndex = $toInteger(args[1]);
                    } else {
                        fromIndex = length - 1;
                    }
                }

                if (fromIndex >= 0) {
                    fromIndex = Math.min(fromIndex, length - 1);
                } else {
                    fromIndex = length - Math.abs(fromIndex);
                }

                pos = 0;
                rx = $toSearchRegExp(regex);
                while (pos <= fromIndex) {
                    rx.lastIndex = pos;
                    match = rx.exec(str);
                    if (!match) {
                        break;
                    }

                    pos = +match.index;
                    if (pos <= fromIndex) {
                        result = pos;
                    }

                    pos += 1;
                }

                return result;
            }
        });
    }
}());

(function () {
    'use strict';

    /*
     * testing as follow to make sure that at least for one character regexp,
     * the result is the same as if we used indexOf
     */

    var pre = document.getElementById('out');

    function log(result) {
        pre.appendChild(document.createTextNode(result + '\n'));
    }

    function test(str) {
        var i = str.length + 2,
            r,
            a,
            b;

        while (i) {
            a = str.indexOf('a', i);
            b = str.searchOf(/a/, i);
            r = ['Failed', 'searchOf', str, i, a, b];
            if (a === b) {
                r[0] = 'Passed';
            }

            log(r);
            a = str.lastIndexOf('a', i);
            b = str.searchLastOf(/a/, i);
            r = ['Failed', 'searchLastOf', str, i, a, b];
            if (a === b) {
                r[0] = 'Passed';
            }

            log(r);
            i -= 1;
        }
    }

    /*
     * Look for the a among the xes
     */

    test('xxx');
    test('axx');
    test('xax');
    test('xxa');
    test('axa');
    test('xaa');
    test('aax');
    test('aaa');
}());
<pre id="out"></pre>




对于稀疏匹配的数据,使用string.search是跨浏览器最快的。 它将每个迭代的字符串重新切片为:

function lastIndexOfSearch(string, regex, index) {
  if(index === 0 || index)
     string = string.slice(0, Math.max(0,index));
  var idx;
  var offset = -1;
  while ((idx = string.search(regex)) !== -1) {
    offset += idx + 1;
    string = string.slice(idx + 1);
  }
  return offset;
}

对于密集的数据,我做了这个。 与执行方法相比,它非常复杂,但对于密集数据,它比我尝试的其他方法快2-10倍,比接受的解决方案快大约100倍。 要点是:

  1. 它调用一次传入的正则表达式来验证是否匹配或提前退出。 我这样做(?=在类似的方法中使用,但使用exec检查IE的速度要快得多。
  2. 它构造并缓存格式为'(r)的修改后的正则表达式。 (?! ?r)'
  3. 新的正则表达式被执行,并且该exec或第一个exec的结果被返回;

    function lastIndexOfGroupSimple(string, regex, index) {
        if (index === 0 || index) string = string.slice(0, Math.max(0, index + 1));
        regex.lastIndex = 0;
        var lastRegex, index
        flags = 'g' + (regex.multiline ? 'm' : '') + (regex.ignoreCase ? 'i' : ''),
        key = regex.source + '$' + flags,
        match = regex.exec(string);
        if (!match) return -1;
        if (lastIndexOfGroupSimple.cache === undefined) lastIndexOfGroupSimple.cache = {};
        lastRegex = lastIndexOfGroupSimple.cache[key];
        if (!lastRegex)
            lastIndexOfGroupSimple.cache[key] = lastRegex = new RegExp('.*(' + regex.source + ')(?!.*?' + regex.source + ')', flags);
        index = match.index;
        lastRegex.lastIndex = match.index;
        return (match = lastRegex.exec(string)) ? lastRegex.lastIndex - match[1].length : index;
    };
    

方法的jsPerf

我不明白上面测试的目的。 需要正则表达式的情况不可能与indexOf的调用进行比较,我认为这是首先制定方法的要点。 为了让测试通过,使用'xxx +(?! x)'比调整regex迭代的方式更有意义。




我还需要一个regexIndexOf函数,所以我自己regexIndexOf了一个函数。 不过,我怀疑,它是优化的,但我想它应该正常工作。

Array.prototype.regexIndexOf = function (regex, startpos = 0) {
    len = this.length;
    for(x = startpos; x < len; x++){
        if(typeof this[x] != 'undefined' && (''+this[x]).match(regex)){
            return x;
        }
    }
    return -1;
}

arr = [];
arr.push(null);
arr.push(NaN);
arr[3] = 7;
arr.push('asdf');
arr.push('qwer');
arr.push(9);
arr.push('...');
console.log(arr);
arr.regexIndexOf(/\d/, 4);



根据BaileyP的回答。 主要区别在于如果模式不匹配,这些方法返回-1

编辑:谢谢杰森Bunting的答案我有一个想法。 为什么不修改正则表达式的.lastIndex属性? 虽然这只适用于带有全局标志( /g )的模式。

编辑:更新以通过测试用例。

String.prototype.regexIndexOf = function(re, startPos) {
    startPos = startPos || 0;

    if (!re.global) {
        var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
        re = new RegExp(re.source, flags);
    }

    re.lastIndex = startPos;
    var match = re.exec(this);

    if (match) return match.index;
    else return -1;
}

String.prototype.regexLastIndexOf = function(re, startPos) {
    startPos = startPos === undefined ? this.length : startPos;

    if (!re.global) {
        var flags = "g" + (re.multiline?"m":"") + (re.ignoreCase?"i":"");
        re = new RegExp(re.source, flags);
    }

    var lastSuccess = -1;
    for (var pos = 0; pos <= startPos; pos++) {
        re.lastIndex = pos;

        var match = re.exec(this);
        if (!match) break;

        pos = match.index;
        if (pos <= startPos) lastSuccess = pos;
    }

    return lastSuccess;
}



RexExp实例已经有了一个lastIndex属性(如果它们是全局的),所以我在做的是复制正则表达式,稍微修改它以符合我们的目的,在字符串上exec它并查看lastIndex 。 这将不可避免地比在字符串上循环更快。 (你有足够的例子来说明如何将它放到字符串原型上,对吧?)

function reIndexOf(reIn, str, startIndex) {
    var re = new RegExp(reIn.source, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};

function reLastIndexOf(reIn, str, startIndex) {
    var src = /\$$/.test(reIn.source) && !/\\\$$/.test(reIn.source) ? reIn.source : reIn.source + '(?![\\S\\s]*' + reIn.source + ')';
    var re = new RegExp(src, 'g' + (reIn.ignoreCase ? 'i' : '') + (reIn.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};

reIndexOf(/[abc]/, "tommy can eat");  // Returns 6
reIndexOf(/[abc]/, "tommy can eat", 8);  // Returns 11
reLastIndexOf(/[abc]/, "tommy can eat"); // Returns 11

您也可以将函数原型化到RegExp对象上:

RegExp.prototype.indexOf = function(str, startIndex) {
    var re = new RegExp(this.source, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};

RegExp.prototype.lastIndexOf = function(str, startIndex) {
    var src = /\$$/.test(this.source) && !/\\\$$/.test(this.source) ? this.source : this.source + '(?![\\S\\s]*' + this.source + ')';
    var re = new RegExp(src, 'g' + (this.ignoreCase ? 'i' : '') + (this.multiLine ? 'm' : ''));
    re.lastIndex = startIndex || 0;
    var res = re.exec(str);
    if(!res) return -1;
    return re.lastIndex - res[0].length;
};


/[abc]/.indexOf("tommy can eat");  // Returns 6
/[abc]/.indexOf("tommy can eat", 8);  // Returns 11
/[abc]/.lastIndexOf("tommy can eat"); // Returns 11

我如何修改RegExp的快速说明:对于indexOf我只需确保设置了全局标志。 对于lastIndexOf ,我使用负面预读来查找最后一次出现,除非RegExp在字符串末尾已经匹配。




使用:

str.search(regex)

请参阅here.的文档here.






Links