node.js - nodejs 用superagent抓取网页gbk编码乱码的问题
巴扎黑
巴扎黑 2017-04-17 14:01:04
0
3
763

网上找了太多了。什么比如iconv-lite什么的,都试过了。就是不行。

href = 'http://www.qq.com/';
            console.log(href);
            superagent.get(href).end(function (err, res) {
                var str = res.text;
                var buf = new Buffer(str);
                str = iconv.decode(buf, 'GBK');
                console.log(str);


            });  
        
        折腾了一夜了,可有解决方案?
        
        
        
        
巴扎黑
巴扎黑

reply all(3)
刘奇

http://web-engineer.cn/article/29

阿神
var charset = require('superagent-charset');
var superagent = charset(require('superagent'));

var href = 'http://www.qq.com/';
superagent.get(href).charset('gbk').end(function (err, res) {
  console.log(res.text);
});

Use superagent-charset module


I just took the time to modify this module, https://github.com/52cik/superagent-charset
I don’t know when the author will merge my code,
you can directly npm i 52cik/superagent-charset install and use it .

How to use:

var charset = require('superagent-charset');
var superagent = charset(require('superagent'));

var href = 'http://www.qq.com/';
superagent
  .get(href)
  .charset() // 不写会自动检测编码
  .end(function (err, res) {
    console.log(res.text);
  });
伊谢尔伦

Not superagent, use the http module provided by nodejs
The final file content after decoding is UTF-8

new Buffer(string[,encoding]) itself will have a transcoding process, the default is UTF-8, that is to say,
is as follows,
encodes first, then toString, then new Buffer, and finally decodes with GBK to get There is a problem with the result

var buffer2=iconv.encode("不是superagent,使用nodejs提供的http模块","GBK");
var str=iconv.decode(new Buffer(buffer2.toString()), "GBK");
console.log(str);

Post an example of using native:

var http=require('http');
var iconv = require('iconv-lite');
var zlib=require('zlib');

var clientRequest = http.request('http://www.qq.com', function(response) {
    var dataReceived=false,responseBuffer, responseLength, responseLengthReceived = 0;

    responseLength = parseInt(response.headers["content-length"], 10);
    responseLength = !isNaN(responseLength) ? responseLength : 0;

    responseBuffer = new Buffer(responseLength);

    response.on("data", receiveData);
    response.on("end", processReceivedData);

    function receiveData(chunk){
        if (!chunk.length || dataReceived) {
            return;
        }

        if (responseLengthReceived + chunk.length > responseBuffer.length) {
            if (responseLengthReceived + chunk.length <= 1024 * 1024 * 16) {
                var tmpNewBuffer = new Buffer(responseLengthReceived + chunk.length);
                responseBuffer.copy(tmpNewBuffer, 0, 0, responseBuffer.length);
                chunk.copy(tmpNewBuffer, responseBuffer.length, 0, chunk.length);
                responseBuffer = tmpNewBuffer;
            }
        } else {
            chunk.copy(responseBuffer, responseLengthReceived, 0, chunk.length);
        }

        responseLengthReceived += chunk.length;
    }

    function processReceivedData() {
        responseBuffer = responseBuffer.slice(0, responseLengthReceived);
        var decodeAndReturnResponse = function (error, responseBuffer) {
            var responseBody =iconv.decode(responseBuffer, "GBK");
            console.log(responseBody);
        };
        
        if (contentEncoding && /(gzip|deflate)/.test(contentEncoding)) {
            console.log("zlib.unzip");
            zlib.unzip(responseBuffer, decodeAndReturnResponse);
        } else {
            decodeAndReturnResponse(undefined, responseBuffer);
        }
    }
});

clientRequest.end();
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template