Search and Highlight the word in HTML

Hi Guys,

I have small add-on which after some processing on the webpage, searches and highlights the few words. Until now it was working find. however now I am having a compatibility issue due to following line.

vrs_getWin.document.getElementsByTagName("body")[0].innerHTML = bodyContents;

Error: Markup should not be passed to innerHTML dynamically.

So the question is,

  • How should I search and highlight the text. I can’t find the text by it’s node Id or class, the only thing I know is text. I need to search all the occurrences of this text on webpage and highlight it.

  • Is there any in build API for that, I mean Firefox search highlights the matching word.

I guess you are currently doing something like this:

document.body.innerHTML = document.body.innerHTML
.replace(searchTerm, s => `<span class="search-result">${ s }</span>`);

That’s pretty bad for performance (you are stringifying the entire page and then rebuild it from scratch) and breaks many pages (all elements are re-created, any references the pages scripts had to elements (e.g. event listeners) are useless).

Have you tried window.find?

I check the function documentation, however didn’t find anything which could hightlight the word given color?

In my case, it could be more than few words, which I need to find and highlight.

window.find() selects the match on the page, you can then use document.getSelection() to get the Elements and and exact ranges that are selected.
If you need to find more than one thing, repeat the find()/getSelection().

If that is not flexible enough for you, you will have to walk the DOM and do the matching of your (partial) search terms against all Text-nodes manually. Using innerHTML for this really isn’t an option.

I could find the node I am interested in though traversing all the text node, the problem is how to replace the particular word in the node with another word ( word + html to hightlight it) without changing it’s innerHTML.

Changing the innerHTML of individual nodes can be Ok. This should do what you want:

/**
 * Highlights a string within a Text node.
 * @param  {Text}     text     The text node in which to highlight.
 * @param  {string}   snippet  The substring within `text` to highlight.
 * @param  {html}     prefix   HTML string of the opening tag to insert for the highlighting.
 * @param  {html}     suffix   HTML string of the closing tag to insert for the highlighting.
 * @return {Element}           SPAN-Element that replaces `text` now.
 * @throws {Errror}   If `snippet` is not present in `text`.
 */
function highlight(text, snippet, prefix, suffix) {
	const parent = text.parentNode, string = text.textContent;
	const container = parent.ownerDocument.createElement('span');
	const at = string.indexOf(snippet); if (at < 0) { throw new Error('snippet not in text'); }
	container.innerHTML = encodeHtml(string.slice(0, at)) + prefix
	+ encodeHtml(string.slice(at, at + snippet.length)) + suffix
	+ encodeHtml(string.slice(at + snippet.length));
	parent.insertBefore(container, text); text.remove();
	return container;
}

/**
 * Reverts `highlight`.
 * @param  {Element}  replacement   The Element returned by `highlight`.
 * @param  {text}     originalText  The original Text.
 */
function unhighlight(replacement, originalText) {
	replacement.parentNode.insertBefore(originalText, replacement); replacement.remove();
}

/**
 * Replaces HTML control characters in a string with their escape entities.
 * @param  {string}  html  A string possibly containing control characters.
 * @return {string}        A string without any control characters, whose unescapeHtml() is the input.
 */
function encodeHtml(html) {
	return (html +'').replace(htmlEscapeRegExp, c => htmlEscapeObject[c]);
}
const htmlEscapeObject = { '&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&#39;', '"': '&quot;', '/': '&#47;', };
const htmlEscapeRegExp = new RegExp('['+ Object.keys(htmlEscapeObject).join('') +']', 'g');

even changing the innerHTML of single node is not allowed, atleast it seems like that. I am using following code and it shows me error.

 _findAndReplace: function(searchText, replacement, searchNode) {
        console.log("inside highlight function");

        if (!searchText || typeof replacement === 'undefined') {
            // Throw error here if you want...
            return;
        }
        var vrs_getWin = document.commandDispatcher.focusedWindow;
		//var bodyContents = vrs_getWin.document.getElementsByTagName("body")[0].innerHTML;

        var regex = typeof searchText === 'string' ?
                    new RegExp(searchText, 'g') : searchText,
            childNodes = (searchNode || vrs_getWin.document.body).childNodes,
            cnLength = childNodes.length,
            excludes = 'html,head,style,title,link,meta,script,object,iframe';
        while (cnLength--) {
            var currentNode = childNodes[cnLength];
            if (currentNode.nodeType === 1 &&
                (excludes + ',').indexOf(currentNode.nodeName.toLowerCase() + ',') === -1) {
                arguments.callee(searchText, replacement, currentNode);
            }
            if (currentNode.nodeType !== 3 || !regex.test(currentNode.data) ) {
                continue;
            }
            var parent = currentNode.parentNode,
                frag = (function(){
                    var html = currentNode.data.replace(regex, replacement),
                        wrap = document.createElement('div'),
                        frag = document.createDocumentFragment();
                    wrap.innerHTML = html;
                    while (wrap.firstChild) {
                        frag.appendChild(wrap.firstChild);
                    }
                    return frag;
                })();
            parent.insertBefore(frag, currentNode);
            parent.removeChild(currentNode);
        }

        console.log(searchText+" Word hightlighted");
    },

I am getting following error while uploading new version.

Markup should not be passed to innerHTML dynamically.
Warning: Due to both security and performance concerns, innerHTML may not be set using dynamic values which have not been adequately sanitized. This can lead to security issues or fairly serious performance degradation.

chrome/content/highlight.js
frag = document.createDocumentFragment();
wrap.innerHTML = html;
while (wrap.firstChild) {

This is the part that matters. The code I provided does that. I’m not sure if yours does.