在web worker中,如何从字符串中查找html属性?

时间:2022-01-24 18:19:43

Inside a web worker, I have an html string like:

在web worker中,我有一个html字符串,如:

"<div id='foo'>  <img src='bar'></img>  <ul id='baz'></ul>  </div>"

Is there any library I can import to easily access id and src attributes of the different tags ? Is regex the only way inside a worker ?

我可以导入任何库来轻松访问不同标签的id和src属性吗?正则表达式是工人内部的唯一途径吗?

1 个解决方案

#1


2  

There are two ways to solve this problem efficiently:

有两种方法可以有效地解决这个问题:

Regex

With the risk of getting false positives, you can use something like:

有获得误报的风险,您可以使用以下内容:

var pattern = /<img [^>]*?src=(["'])((?:[^"']+|(?!\1)["'])*)(\1)/i;
var match = string.match(pattern);
var src = match ? match[2] : '';

Built-in parser & messaging

If getting the HTML right is a critical requirement, just let the browser parse the HTML, by passing the string to the caller. Here's a full example:

如果正确获取HTML是一项关键要求,只需让浏览器通过将字符串传递给调用者来解析HTML。这是一个完整的例子:

Caller:

呼叫者:

var worker = new Worker('worker.js');
worker.addEventListener('message', function(e) {
    if (!e.data) return;
    if (e.data.method === 'getsrc') {
        // Unlike document.createElement, etc, the following method does not 
        //  load the image when the HTML is parsed
        var doc = document.implementation.createHTMLDocument('');
        doc.body.innerHTML = e.data.data;
        var images = doc.getElementsByTagName('img');
        var result = [];
        for (var i=0; i<images.length; i++) {
            result.push(images[i].getAttribute('src'));
        }
        worker.postMessage({
            messageID: e.data.messageID,
            result: result
        });
    } else if (e.data.method === 'debug') {
        console.log(e.data.data);
    }
});

worker.js

worker.js

// A simple generic messaging API
var callbacks = {};
var lastMessageID = 0;
addEventListener('message', function(e) {
   if (callbacks[e.data.messageID]) {
       callbacks[e.data.messageID](e.data.result);
   }
});
function sendRequest(method, data, callback) {
    var messageID = ++lastMessageID;
    if (callback) callbacks[messageID] = callback;
    postMessage({
        method: method,
        data: data,
        messageID: messageID
    });
}

// Example:
sendRequest('getsrc',
    '<img src="foo.png">' + 
    "<img src='bar.png'>" + 
    '<textarea><img src="should.not.be.visible"></textarea>',
    function(result) {
        sendRequest('debug', 'Received: ' + result.join(', '));
    }
);

#1


2  

There are two ways to solve this problem efficiently:

有两种方法可以有效地解决这个问题:

Regex

With the risk of getting false positives, you can use something like:

有获得误报的风险,您可以使用以下内容:

var pattern = /<img [^>]*?src=(["'])((?:[^"']+|(?!\1)["'])*)(\1)/i;
var match = string.match(pattern);
var src = match ? match[2] : '';

Built-in parser & messaging

If getting the HTML right is a critical requirement, just let the browser parse the HTML, by passing the string to the caller. Here's a full example:

如果正确获取HTML是一项关键要求,只需让浏览器通过将字符串传递给调用者来解析HTML。这是一个完整的例子:

Caller:

呼叫者:

var worker = new Worker('worker.js');
worker.addEventListener('message', function(e) {
    if (!e.data) return;
    if (e.data.method === 'getsrc') {
        // Unlike document.createElement, etc, the following method does not 
        //  load the image when the HTML is parsed
        var doc = document.implementation.createHTMLDocument('');
        doc.body.innerHTML = e.data.data;
        var images = doc.getElementsByTagName('img');
        var result = [];
        for (var i=0; i<images.length; i++) {
            result.push(images[i].getAttribute('src'));
        }
        worker.postMessage({
            messageID: e.data.messageID,
            result: result
        });
    } else if (e.data.method === 'debug') {
        console.log(e.data.data);
    }
});

worker.js

worker.js

// A simple generic messaging API
var callbacks = {};
var lastMessageID = 0;
addEventListener('message', function(e) {
   if (callbacks[e.data.messageID]) {
       callbacks[e.data.messageID](e.data.result);
   }
});
function sendRequest(method, data, callback) {
    var messageID = ++lastMessageID;
    if (callback) callbacks[messageID] = callback;
    postMessage({
        method: method,
        data: data,
        messageID: messageID
    });
}

// Example:
sendRequest('getsrc',
    '<img src="foo.png">' + 
    "<img src='bar.png'>" + 
    '<textarea><img src="should.not.be.visible"></textarea>',
    function(result) {
        sendRequest('debug', 'Received: ' + result.join(', '));
    }
);