node.js

回调函数

  1. 非异步执行(阻塞)

     var fs = require("fs");
     var data = fs.readFileSync('input.txt');
     console.log(data.toString());
     console.log("程序执行结束!");
    
  2. 异步执行(非阻塞)

     var fs = require("fs");
     fs.readFile('./test.txt', function (err, data) {
         if (err) return console.error(err);
         console.log(data.toString());
     });
     function tellEnd(){
         console.log("程序执行结束!");
     }
     //这里tellEnd等待0秒,会发现tellEnd先执行完了,test.txt文件读取并打印后执行完了
     setTimeout(tellEnd,0);
     //这里tellEnd等待1秒会发现test.txt文件读取并打印先执行完了,tellEnd后执行完了
     //setTimeout(tellEnd,1000);
     //这说明fs.readFile、tellEnd同时都在执行中了,整个执行过程是异步非阻塞的
    

Node爬虫

  1. 开始前的准备

     yum install \
     pango.x86_64 \
     libXcomposite.x86_64 \
     libXcursor.x86_64 \
     libXdamage.x86_64 \
     libXext.x86_64 \
     libXi.x86_64 \
     libXtst.x86_64 \
     cups-libs.x86_64 \
     libXScrnSaver.x86_64 \
     libXrandr.x86_64 \
     GConf2.x86_64 \
     alsa-lib.x86_64 \
     atk.x86_64 \
     gtk3.x86_64 \
     ipa-gothic-fonts \
     xorg-x11-fonts-100dpi \
     xorg-x11-fonts-75dpi \
     xorg-x11-utils \
     xorg-x11-fonts-cyrillic \
     xorg-x11-fonts-Type1 \
     xorg-x11-fonts-misc \
     -y
    
  2. 安装^node-8.x.x

  3. 创建一个目录

     mkdir /pa_node
     cd /pa_node
    
  4. 创建文件.npmrc(用来获取最快的节点)

     vim .npmrc
    
     registry=https://registry.npm.taobao.org
     chromedriver_cdnurl=http://npm.taobao.org/mirrors/chromedriver
     phantomjs_cdnurl=https://npm.taobao.org/dist/phantomjs
     electron_mirror=https://npm.taobao.org/mirrors/electron/
     sass_binary_site=https://npm.taobao.org/mirrors/node-sass/
     puppeteer_download_host=https://cdn.npm.taobao.org/dist
    
     ESC
     :wq
    
  5. 创建文件package.json

     vim package.json
    
     {
       "name": "pa_node",
       "version": "1.0.0",
       "main": "index.js",
       "dependencies": {
         "redis": "^2.8.0",
         "hiredis": "^0.5.0",
         "request": "^2.83.0",
         "express": "^4.16.2",
         "puppeteer": "^0.13.0"
       }
     }
    
     ESC
     :wq
    
  6. 创建文件index.js我们把爬取到的数据存入redis

     vim index.js
    
     const puppeteer = require('puppeteer');
     const redis = require("redis");
     const client = redis.createClient('6379', '127.0.0.1');
    
     class Spider {
         async main() {
             await this.init();
             await this.getDomData();
         }
    
         async init() {
             this.browser = await puppeteer.launch({'args': ['--no-sandbox', '--disable-setuid-sandbox']});
             this.page = await this.browser.newPage();
             await Promise.all([
                 this.page.setUserAgent('Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.19 Safari/537.36'),
                 this.page.setViewport({width: 1100, height: 1080}),
                 this.page.setJavaScriptEnabled(true)
             ]);
         }
    
         async getDomData() {
             await this.page.goto(page);
             /**
              * 取节点数据格式
              * #id > .class > div > li:nth-child(3)
              * li:nth-child(1)表示父元素(如ul)下的第3个为li的子元素
              */
             let name = await this.page.$eval('.container > div > div > h4 > strong', el => el.innerText);
             /**
              * 用evaluate()来执行前端js
              * 用evaluate()要通过return处理最外层和最里层的数据
              */
             let datas = await this.page.evaluate(() => {
                 return Array.prototype.slice.apply(document.querySelectorAll('body > div > div:nth-child(9) > div')).map((item) => {
                     let data = item.querySelector('span > span').innerText;
                     return {data};
                 });
             });
             let result = {
                 'name': name,
                 'datas' : datas
             };
             result = JSON.stringify(result);
             this.saveData(key, result);
             this.getData(key);
             this.browser.close();
         }
    
         async saveData(key, data) {
             client.set(key, data, function (err, res) {
                 if (err) {
                     console.log("Error:", err);
                 } else {
                     console.log(res);
                     client.quit();
                 }
             });
         }
    
         async getData(key) {
             client.get(key, function (err, res) {
                 if (err) {
                     console.log("Error:", err);
                 } else {
                     console.log(res);
                     client.quit();
                 }
             });
         }
     }
    
     let parameter = process.argv.splice(2);
     let page = 'http://www.test.com/';
     let spider = new Spider();
     let key = 'pa_node';
     spider.main();
    
     ESC
     :wq
    
  7. 运行

     npm install
     node index.js
    

Node其它

  1. 模板引擎art-template参考链接
@耿志环 2012-∞ 冀ICP备17033181号, powered by Gitbook修订: 2018-05-23 16:25:59

results matching ""

    No results matching ""