[Javascript] How to avoid long nesting of asynchronous functions in Node.js


Answers

Kay, simply use one of these modules.

It will turn this:

dbGet('userIdOf:bobvance', function(userId) {
    dbSet('user:' + userId + ':email', 'bobvance@potato.egg', function() {
        dbSet('user:' + userId + ':firstName', 'Bob', function() {
            dbSet('user:' + userId + ':lastName', 'Vance', function() {
                okWeAreDone();
            });
        });
    });
});

Into this:

flow.exec(
    function() {
        dbGet('userIdOf:bobvance', this);

    },function(userId) {
        dbSet('user:' + userId + ':email', 'bobvance@potato.egg', this.MULTI());
        dbSet('user:' + userId + ':firstName', 'Bob', this.MULTI());
        dbSet('user:' + userId + ':lastName', 'Vance', this.MULTI());

    },function() {
        okWeAreDone()
    }
);
Question

I want to make a page that displays some data from a DB, so I have created some functions that get that data from my DB. I'm just a newbie in Node.js, so as far as I understand, if I want to use all of them in a single page (HTTP response) I'd have to nest them all:

http.createServer(function (req, res) {
  res.writeHead(200, {'Content-Type': 'text/html'});
  var html = "<h1>Demo page</h1>";
  getSomeDate(client, function(someData) {
    html += "<p>"+ someData +"</p>";
    getSomeOtherDate(client, function(someOtherData) {
      html += "<p>"+ someOtherData +"</p>";
      getMoreData(client, function(moreData) {
        html += "<p>"+ moreData +"</p>";
        res.write(html);
        res.end();
      });
    });
  });

If there are many functions like that, then the nesting becomes a problem.

Is there a way to avoid this? I guess it has to do with how you combine multiple asynchronous functions, which seems to be something fundamental.




What you need is a bit of syntactic sugar. Chek this out:

http.createServer(function (req, res) {
  res.writeHead(200, {'Content-Type': 'text/html'});
  var html = ["<h1>Demo page</h1>"];
  var pushHTML = html.push.bind(html);

  Queue.push( getSomeData.partial(client, pushHTML) );
  Queue.push( getSomeOtherData.partial(client, pushHTML) );
  Queue.push( getMoreData.partial(client, pushHTML) );
  Queue.push( function() {
    res.write(html.join(''));
    res.end();
  });
  Queue.execute();
}); 

Pretty neat, isn't it? You may notice that html became an array. That's partly because strings are immutable, so you better off with buffering your output in an array, than discarding larger and larger strings. The other reason is because of another nice syntax with bind.

Queue in the example is really just an example and along with partial can be implemented as follows

// Functional programming for the rescue
Function.prototype.partial = function() {
  var fun = this,
      preArgs = Array.prototype.slice.call(arguments);
  return function() {
    fun.apply(null, preArgs.concat.apply(preArgs, arguments));
  };
};

Queue = [];
Queue.execute = function () {
  if (Queue.length) {
    Queue.shift()(Queue.execute);
  }
};



You could use this trick with an array rather than nested functions or a module.

Far easier on the eyes.

var fs = require("fs");
var chain = [
    function() { 
        console.log("step1");
        fs.stat("f1.js",chain.shift());
    },
    function(err, stats) {
        console.log("step2");
        fs.stat("f2.js",chain.shift());
    },
    function(err, stats) {
        console.log("step3");
        fs.stat("f2.js",chain.shift());
    },
    function(err, stats) {
        console.log("step4");
        fs.stat("f2.js",chain.shift());
    },
    function(err, stats) {
        console.log("step5");
        fs.stat("f2.js",chain.shift());
    },
    function(err, stats) {
        console.log("done");
    },
];
chain.shift()();

You can extend the idiom for parallel processes or even parallel chains of processes:

var fs = require("fs");
var fork1 = 2, fork2 = 2, chain = [
    function() { 
        console.log("step1");
        fs.stat("f1.js",chain.shift());
    },
    function(err, stats) {
        console.log("step2");
        var next = chain.shift();
        fs.stat("f2a.js",next);
        fs.stat("f2b.js",next);
    },
    function(err, stats) {
        if ( --fork1 )
            return;
        console.log("step3");
        var next = chain.shift();

        var chain1 = [
            function() { 
                console.log("step4aa");
                fs.stat("f1.js",chain1.shift());
            },
            function(err, stats) { 
                console.log("step4ab");
                fs.stat("f1ab.js",next);
            },
        ];
        chain1.shift()();

        var chain2 = [
            function() { 
                console.log("step4ba");
                fs.stat("f1.js",chain2.shift());
            },
            function(err, stats) { 
                console.log("step4bb");
                fs.stat("f1ab.js",next);
            },
        ];
        chain2.shift()();
    },
    function(err, stats) {
        if ( --fork2 )
            return;
        console.log("done");
    },
];
chain.shift()();



I had the same problem. I've seen the major libs to node run async functions, and they presents so non-natural chaining (you need to use three or more methods confs etc) to build your code.

I spent some weeks developing a solution to be simple and easing to read. Please, give a try to EnqJS. All opinions will be appreciated.

Instead of:

http.createServer(function (req, res) {
  res.writeHead(200, {'Content-Type': 'text/html'});
  var html = "<h1>Demo page</h1>";
  getSomeDate(client, function(someData) {
    html += "<p>"+ someData +"</p>";
    getSomeOtherDate(client, function(someOtherData) {
      html += "<p>"+ someOtherData +"</p>";
      getMoreData(client, function(moreData) {
        html += "<p>"+ moreData +"</p>";
        res.write(html);
        res.end();
      });
    });
  });

with EnqJS:

http.createServer(function (req, res) {
  res.writeHead(200, {'Content-Type': 'text/html'});
  var html = "<h1>Demo page</h1>";

  enq(function(){
    var self=this;
    getSomeDate(client, function(someData){
      html += "<p>"+ someData +"</p>";
      self.return();
    })
  })(function(){
    var self=this;
    getSomeOtherDate(client, function(someOtherData){ 
      html += "<p>"+ someOtherData +"</p>";
      self.return();
    })
  })(function(){
    var self=this;
    getMoreData(client, function(moreData) {
      html += "<p>"+ moreData +"</p>";
      self.return();
      res.write(html);
      res.end();
    });
  });
});

Observe that the code appears to be bigger than before. But it isn't nested as before. To appear more natural, the chains are called imediately:

enq(fn1)(fn2)(fn3)(fn4)(fn4)(...)

And to say that it returned, inside the function we call:

this.return(response)



The most simple syntactical sugar i have seen is node-promise.

npm install node-promise || git clone https://github.com/kriszyp/node-promise

Using this you can chain async methods as:

firstMethod().then(secondMethod).then(thirdMethod);

The return value of each is available as argument in the next.







Using wire your code would look like this:

http.createServer(function (req, res) {
    res.writeHead(200, {'Content-Type': 'text/html'});

    var l = new Wire();

    getSomeDate(client, l.branch('someData'));
    getSomeOtherDate(client, l.branch('someOtherData'));
    getMoreData(client, l.branch('moreData'));

    l.success(function(r) {
        res.write("<h1>Demo page</h1>"+
            "<p>"+ r['someData'] +"</p>"+
            "<p>"+ r['someOtherData'] +"</p>"+
            "<p>"+ r['moreData'] +"</p>");
        res.end();
    });
});



Suppose you could do this:

http.createServer(function (req, res) {
    res.writeHead(200, {'Content-Type': 'text/html'});
    var html = "<h1>Demo page</h1>";
    chain([
        function (next) {
            getSomeDate(client, next);
        },
        function (next, someData) {
            html += "<p>"+ someData +"</p>";
            getSomeOtherDate(client, next);
        },
        function (next, someOtherData) {
            html += "<p>"+ someOtherData +"</p>";
            getMoreData(client, next);
        },
        function (next, moreData) {
            html += "<p>"+ moreData +"</p>";
            res.write(html);
            res.end();
        }
    ]);
});

You only need to implement chain() so that it partially applies each function to the next one, and immediately invokes only the first function:

function chain(fs) {
    var f = function () {};
    for (var i = fs.length - 1; i >= 0; i--) {
        f = fs[i].partial(f);
    }
    f();
}



After the others responded, you stated that your problem were local variables. It seems an easy way to do this is to write one outer function to contain those local variables, then use a bunch of named inner functions and access them by name. This way, you'll only ever nest two deep, regardless of how many functions you need to chain together.

Here is my newbie's attempt at using the mysql Node.js module with nesting:

function with_connection(sql, bindings, cb) {
    pool.getConnection(function(err, conn) {
        if (err) {
            console.log("Error in with_connection (getConnection): " + JSON.stringify(err));
            cb(true);
            return;
        }
        conn.query(sql, bindings, function(err, results) {
            if (err) {
                console.log("Error in with_connection (query): " + JSON.stringify(err));
                cb(true);
                return;
            }
            console.log("with_connection results: " + JSON.stringify(results));
            cb(false, results);
        });
    });
}

The following is a rewrite using named inner functions. The outer function with_connection can be used as a holder for local variables, too. (Here, I've got the parameters sql, bindings, cb that act in a similar way, but you can just define some additional local variables in with_connection.)

function with_connection(sql, bindings, cb) {

    function getConnectionCb(err, conn) {
        if (err) {
            console.log("Error in with_connection/getConnectionCb: " + JSON.stringify(err));
            cb(true);
            return;
        }
        conn.query(sql, bindings, queryCb);
    }

    function queryCb(err, results) {
        if (err) {
            console.log("Error in with_connection/queryCb: " + JSON.stringify(err));
            cb(true);
            return;
        }
        cb(false, results);
    }

    pool.getConnection(getConnectionCb);
}

I had been thinking that perhaps it would be possible to make an object with instance variables, and to use these instance variables as a replacement for the local variables. But now I find that the above approach using nested functions and local variables is simpler and easier to understand. It takes some time to unlearn OO, it seems :-)

So here is my previous version with an object and instance variables.

function DbConnection(sql, bindings, cb) {
    this.sql = sql;
    this.bindings = bindings;
    this.cb = cb;
}
DbConnection.prototype.getConnection = function(err, conn) {
    var self = this;
    if (err) {
        console.log("Error in DbConnection.getConnection: " + JSON.stringify(err));
        this.cb(true);
        return;
    }
    conn.query(this.sql, this.bindings, function(err, results) { self.query(err, results); });
}
DbConnection.prototype.query = function(err, results) {
    var self = this;
    if (err) {
        console.log("Error in DbConnection.query: " + JSON.stringify(err));
        self.cb(true);
        return;
    }
    console.log("DbConnection results: " + JSON.stringify(results));
    self.cb(false, results);
}

function with_connection(sql, bindings, cb) {
    var dbc = new DbConnection(sql, bindings, cb);
    pool.getConnection(function (err, conn) { dbc.getConnection(err, conn); });
}

It turns out that bind can be used to some advantage. It allows me to get rid of the somewhat ugly anonymous functions I've created that didn't do anything much, except to forward themselves to a method call. I couldn't pass the method directly because it would have been involved with the wrong value of this. But with bind, I can specify the value of this that I want.

function DbConnection(sql, bindings, cb) {
    this.sql = sql;
    this.bindings = bindings;
    this.cb = cb;
}
DbConnection.prototype.getConnection = function(err, conn) {
    var f = this.query.bind(this);
    if (err) {
        console.log("Error in DbConnection.getConnection: " + JSON.stringify(err));
        this.cb(true);
        return;
    }
    conn.query(this.sql, this.bindings, f);
}
DbConnection.prototype.query = function(err, results) {
    if (err) {
        console.log("Error in DbConnection.query: " + JSON.stringify(err));
        this.cb(true);
        return;
    }
    console.log("DbConnection results: " + JSON.stringify(results));
    this.cb(false, results);
}

// Get a connection from the pool, execute `sql` in it
// with the given `bindings`.  Invoke `cb(true)` on error,
// invoke `cb(false, results)` on success.  Here,
// `results` is an array of results from the query.
function with_connection(sql, bindings, cb) {
    var dbc = new DbConnection(sql, bindings, cb);
    var f = dbc.getConnection.bind(dbc);
    pool.getConnection(f);
}

Of course, none of this is proper JS with Node.js coding -- I just spent a couple of hours on it. But maybe with a little polishing this technique can help?




Use Fibers https://github.com/laverdet/node-fibers it makes asynchronous code looks like synchronous (without blocking)

I personally use this little wrapper http://alexeypetrushin.github.com/synchronize Sample of code from my project (every method is actually asynchronous, working with async file IO) I even afraid of imagine what a mess it would be with callback or async-control-flow helper libraries.

_update: (version, changesBasePath, changes, oldSite) ->
  @log 'updating...'
  @_updateIndex version, changes
  @_updateFiles version, changesBasePath, changes
  @_updateFilesIndexes version, changes
  configChanged = @_updateConfig version, changes
  @_updateModules version, changes, oldSite, configChanged
  @_saveIndex version
  @log "updated to #{version} version"



I've recently created simpler abstraction called wait.for to call async functions in sync mode (based on Fibers). It's at an early stage but works. It is at:

https://github.com/luciotato/waitfor

Using wait.for, you can call any standard nodejs async function, as if it were a sync function.

using wait.for your code could be:

var http=require('http');
var wait=require('wait.for');

http.createServer(function(req, res) {
  wait.launchFiber(handleRequest,req, res); //run in a Fiber, keep node spinning
}).listen(8080);


//in a fiber
function handleRequest(req, res) {
  res.writeHead(200, {'Content-Type': 'text/html'});
  var html = "<h1>Demo page</h1>";
  var someData = wait.for(getSomeDate,client);
  html += "<p>"+ someData +"</p>";
  var someOtherData = wait.for(getSomeOtherDate,client);
  html += "<p>"+ someOtherData +"</p>";
  var moreData = wait.for(getMoreData,client);
  html += "<p>"+ moreData +"</p>";
  res.write(html);
  res.end();
};

...or if you want to be less verbose (and also add error catching)

//in a fiber
function handleRequest(req, res) {
  try {
    res.writeHead(200, {'Content-Type': 'text/html'});
    res.write(
    "<h1>Demo page</h1>" 
    + "<p>"+ wait.for(getSomeDate,client) +"</p>"
    + "<p>"+ wait.for(getSomeOtherDate,client) +"</p>"
    + "<p>"+ wait.for(getMoreData,client) +"</p>"
    );
    res.end();
  }
  catch(err) {
   res.end('error '+e.message); 
  }

};

In all the cases, getSomeDate, getSomeOtherDate and getMoreData should be standard async functions with the last parameter a function callback(err,data)

as in:

function getMoreData(client, callback){
  db.execute('select moredata from thedata where client_id=?',[client.id],
       ,function(err,data){
          if (err) callback(err);
          callback (null,data);
        });
}