terminal-scrapearange
v0.0.2 terminal scrape arange
terminal-scrapearange
Terminal interface implementation for ranged web scraping.
const scrapeArange = require('terminal-scrapearange');
// scrapeArange.logSill(<message>): for less important logs (verbose)
// scrapeArange.logVerb(<message>): for important logs (verbose)
// scrapeArange.logErr(<message>): for error logs (verbose)
// scrapeArange.request(<options>): make http/https request (follows redirect)
// scrapeArange.main(<options>): terminal interface
scrapeArange.logSill('> GET somewebsite.org')
// > GET somewebsite.org (in grey, if verbose enabled)
scrapeArange.logVerb('Scraping post 12...')
// > Scraping post 12... (in bright yellow, if verbose enabled)
scrapeArange.logErr('ERR: 12 failed')
// > ERR: 12 failed (in bright red, if verbose enabled)
scrapeArange.request({
protocol: 'https:',
hostname: 'somewebsite.org',
port: 443,
path: '/',
method: 'GET'
}).then((html) => console.log(html));
// <html><head><title>somewebsite</title>...
if(require.main===module) scrapeArange.main({
output: null, // target writestream, like fs.createWriteStream('output.txt')
retries: 4, // times to retry failed requests
connections: 4, // maximum number of parallel connections
timegap: 250, // minimum time gap between requests in milliseconds (doubles if a request fails)
verbose: false, // get detailed output?
method: () => {}, // method that scrapes html and returns JSON object
});
// { /* object returned from method() */ }
// { /* another object returned from method() */ }
// ... (redirected to output file if specified with -o|--output)
// STDERR: [ /* array of failed ids, if any */ ]
npm i [email protected]
Metadata
- MIT
- Whatever
- [email protected]
- released 1/3/2018