A combination of the Cheerio and Request libraries to make a nodejs scraper that doesn’t have the same memory leak issues as jsdom-based scrapers like node-scraper. Also allows for rate limiting. Units are in milliseconds.
var alphabits = require('/Users/mike/code/alphabits/lib/alphabits.js');
alphabits('http://america.aljazeera.com', function(err, $){
var headline = $('h1.topStories-headline a').html();
console.log(headline);
})
var alphabits = require('/Users/mike/code/alphabits/lib/alphabits.js').rateLimit(5000);
for (var i = 0; i < 5; i++){
alphabits('http://america.aljazeera.com', function(err, $){
var headline = $('h1.topStories-headline a').html();
console.log(headline);
})
}