Hello,
I’ve developed an add-on to preview AsciiDoc as HTML inside Firefox: https://github.com/asciidoctor/asciidoctor-firefox-addon
We have an issue when the character encoding of the plain text document is not declared.
You can read in the Firefox console something like:
The character encoding of the plain text document was not declared. The document will render with garbled text in some browser configurations if the document contains characters from outside the US-ASCII range. The character encoding of the file needs to be declared in the transfer protocol or file needs to use a byte order mark as an encoding signature.
We need to obtain the plain text in UTF-8 (regardless of the browser’s default encoding). Is there a way to achieve this ?
The following hack is working but was rejected by AMO (because I’m using XMLHttpRequest
):
// if charset is not UTF-8, try techniques to coerce it to UTF-8
// likely used only for local files
if (document.characterSet.toUpperCase() != 'UTF-8') {
try {
// this technique works if all characters are in standard ASCII set
// see: http://www.ascii-code.com
sanitizeAndShowHTML(convertToHTML(decodeURIComponent(escape(document.firstChild.textContent))));
} catch (decodeError) {
// XMLHttpRequest responseText is UTF-8 encoded by default
var xhr = new XMLHttpRequest();
xhr.open('GET', window.location.href, true);
xhr.onload = function (evt) {
if (xhr.readyState === 4) {
// NOTE status is 0 for local files (i.e., file:// URIs)
if (xhr.status === 200 || xhr.status === 0) {
sanitizeAndShowHTML(convertToHTML(xhr.responseText));
} else {
console.error('Could not read AsciiDoc source. Reason: [' + xhr.status + '] ' + xhr.statusText);
}
}
};
xhr.onerror = function (evt) {
console.error(xhr.statusText);
};
xhr.send();
}
} else {
sanitizeAndShowHTML(convertToHTML(document.firstChild.textContent));
}
Without the UTF-8 text, we cannot properly support non-English languages: https://github.com/asciidoctor/asciidoctor-firefox-addon/issues/43
Thanks,
Guillaume