times - javascript regex tester




How do you access the matched groups in a JavaScript regular expression? (9)

Terminology used in this answer:

  • Match indicates the result of running your RegEx pattern against your string like so: someString.match(regexPattern).
  • Matched patterns indicate all matched portions of the input string, which all reside inside the match array. These are all instances of your pattern inside the input string.
  • Matched groups indicate all groups to catch, defined in the RegEx pattern. (The patterns inside parentheses, like so: /format_(.*?)/g, where (.*?) would be a matched group.) These reside within matched patterns.

Description

To get access to the matched groups, in each of the matched patterns, you need a function or something similar to iterate over the match. There are a number of ways you can do this, as many of the other answers show. Most other answers use a while loop to iterate over all matched patterns, but I think we all know the potential dangers with that approach. It is necessary to match against a new RegExp() instead of just the pattern itself, which only got mentioned in a comment. This is because the .exec() method behaves similar to a generator functionit stops every time there is a match, but keeps its .lastIndex to continue from there on the next .exec() call.

Code examples

Below is an example of a function searchString which returns an Array of all matched patterns, where each match is an Array with all the containing matched groups. Instead of using a while loop, I have provided examples using both the Array.prototype.map() function as well as a more performant way – using a plain for-loop.

Concise versions (less code, more syntactic sugar)

These are less performant since they basically implement a forEach-loop instead of the faster for-loop.

// Concise ES6/ES2015 syntax
const searchString = 
    (string, pattern) => 
        string
        .match(new RegExp(pattern.source, pattern.flags))
        .map(match => 
            new RegExp(pattern.source, pattern.flags)
            .exec(match));

// Or if you will, with ES5 syntax
function searchString(string, pattern) {
    return string
        .match(new RegExp(pattern.source, pattern.flags))
        .map(match =>
            new RegExp(pattern.source, pattern.flags)
            .exec(match));
}

let string = "something format_abc",
    pattern = /(?:^|\s)format_(.*?)(?:\s|$)/;

let result = searchString(string, pattern);
// [[" format_abc", "abc"], null]
// The trailing `null` disappears if you add the `global` flag

Performant versions (more code, less syntactic sugar)

// Performant ES6/ES2015 syntax
const searchString = (string, pattern) => {
    let result = [];

    const matches = string.match(new RegExp(pattern.source, pattern.flags));

    for (let i = 0; i < matches.length; i++) {
        result.push(new RegExp(pattern.source, pattern.flags).exec(matches[i]));
    }

    return result;
};

// Same thing, but with ES5 syntax
function searchString(string, pattern) {
    var result = [];

    var matches = string.match(new RegExp(pattern.source, pattern.flags));

    for (var i = 0; i < matches.length; i++) {
        result.push(new RegExp(pattern.source, pattern.flags).exec(matches[i]));
    }

    return result;
}

let string = "something format_abc",
    pattern = /(?:^|\s)format_(.*?)(?:\s|$)/;

let result = searchString(string, pattern);
// [[" format_abc", "abc"], null]
// The trailing `null` disappears if you add the `global` flag

I have yet to compare these alternatives to the ones previously mentioned in the other answers, but I doubt this approach is less performant and less fail-safe than the others.

I want to match a portion of a string using a regular expression and then access that parenthesized substring:

var myString = "something format_abc"; // I want "abc"

var arr = /(?:^|\s)format_(.*?)(?:\s|$)/.exec(myString);

console.log(arr);     // Prints: [" format_abc", "abc"] .. so far so good.
console.log(arr[1]);  // Prints: undefined  (???)
console.log(arr[0]);  // Prints: format_undefined (!!!)

What am I doing wrong?


I've discovered that there was nothing wrong with the regular expression code above: the actual string which I was testing against was this:

"date format_%A"

Reporting that "%A" is undefined seems a very strange behaviour, but it is not directly related to this question, so I've opened a new one, Why is a matched substring returning "undefined" in JavaScript?.


The issue was that console.log takes its parameters like a printf statement, and since the string I was logging ("%A") had a special value, it was trying to find the value of the next parameter.


function getMatches(string, regex, index) {
  index || (index = 1); // default to the first capturing group
  var matches = [];
  var match;
  while (match = regex.exec(string)) {
    matches.push(match[index]);
  }
  return matches;
}


// Example :
var myString = 'Rs.200 is Debited to A/c ...2031 on 02-12-14 20:05:49 (Clear Bal Rs.66248.77) AT ATM. TollFree 1800223344 18001024455 (6am-10pm)';
var myRegEx = /clear bal.+?(\d+\.?\d{2})/gi;

// Get an array containing the first capturing group for every match
var matches = getMatches(myString, myRegEx, 1);

// Log results
document.write(matches.length + ' matches found: ' + JSON.stringify(matches))
console.log(matches);

function getMatches(string, regex, index) {
  index || (index = 1); // default to the first capturing group
  var matches = [];
  var match;
  while (match = regex.exec(string)) {
    matches.push(match[index]);
  }
  return matches;
}


// Example :
var myString = 'something format_abc something format_def something format_ghi';
var myRegEx = /(?:^|\s)format_(.*?)(?:\s|$)/g;

// Get an array containing the first capturing group for every match
var matches = getMatches(myString, myRegEx, 1);

// Log results
document.write(matches.length + ' matches found: ' + JSON.stringify(matches))
console.log(matches);

A one liner that is practical only if you have a single pair of parenthesis:

while ( ( match = myRegex.exec( myStr ) ) && matches.push( match[1] ) ) {};

Here’s a method you can use to get the n​th capturing group for each match:

function getMatches(string, regex, index) {
  index || (index = 1); // default to the first capturing group
  var matches = [];
  var match;
  while (match = regex.exec(string)) {
    matches.push(match[index]);
  }
  return matches;
}


// Example :
var myString = 'something format_abc something format_def something format_ghi';
var myRegEx = /(?:^|\s)format_(.*?)(?:\s|$)/g;

// Get an array containing the first capturing group for every match
var matches = getMatches(myString, myRegEx, 1);

// Log results
document.write(matches.length + ' matches found: ' + JSON.stringify(matches))
console.log(matches);

Last but not least, i found that one line code that worked fine for me (JS ES6) :

var reg = /#([\S]+)/igm; //get hashtags
var string = 'mi alegría es total! βœŒπŸ™Œ\n#fiestasdefindeaño #PadreHijo #buenosmomentos #france #paris';

var matches = (string.match(reg) || []).map(e => e.replace(reg, '$1'));
console.log(matches);

this will return : [fiestasdefindeaño, PadreHijo, buenosmomentos, france, paris]


There is no need to invoke the exec method! You can use "match" method directly on the string. Just don't forget the parentheses.

var str = "This is cool";
var matches = str.match(/(This is)( cool)$/);
console.log( JSON.stringify(matches) ); // will print ["This is cool","This is"," cool"] or something like that...

Position 0 has a string with all the results. Position 1 has the first match represented by parentheses, and position 2 has the second match isolated in your parentheses. Nested parentheses are tricky, so beware!


You can access capturing groups like this:

var myString = "something format_abc";
var myRegexp = /(?:^|\s)format_(.*?)(?:\s|$)/g;
var match = myRegexp.exec(myString);
console.log(match[1]); // abc

And if there are multiple matches you can iterate over them:

var myString = "something format_abc";
var myRegexp = /(?:^|\s)format_(.*?)(?:\s|$)/g;
match = myRegexp.exec(myString);
while (match != null) {
  // matched text: match[0]
  // match start: match.index
  // capturing group n: match[n]
  console.log(match[0])
  match = myRegexp.exec(myString);
}

Your code works for me (FF3 on Mac) even if I agree with PhiLo that the regex should probably be:

/\bformat_(.*?)\b/

(But, of course, I'm not sure because I don't know the context of the regex.)


/*Regex function for extracting object from "window.location.search" string.
 */

var search = "?a=3&b=4&c=7"; // Example search string

var getSearchObj = function (searchString) {

    var match, key, value, obj = {};
    var pattern = /(\w+)=(\w+)/g;
    var search = searchString.substr(1); // Remove '?'

    while (match = pattern.exec(search)) {
        obj[match[0].split('=')[0]] = match[0].split('=')[1];
    }

    return obj;

};

console.log(getSearchObj(search));




regex