diff options
author | hackademix <giorgio@maone.net> | 2019-03-11 22:35:46 +0100 |
---|---|---|
committer | hackademix <giorgio@maone.net> | 2019-03-11 22:35:46 +0100 |
commit | 2a272e97c076c8a0f13a1f0f5140ad5d72cbd890 (patch) | |
tree | 68f21afb0693f6464ba12cad3d08621590c01a30 | |
parent | 42a2d31b272a2934824e038e8d9c73f19bab006c (diff) |
Correctly decode using the original document charset and force re-encoding via UTF-8 BOM only when needed.
-rw-r--r-- | bg/ResponseMetaData.js | 18 | ||||
-rw-r--r-- | bg/ResponseProcessor.js | 20 |
2 files changed, 10 insertions, 28 deletions
diff --git a/bg/ResponseMetaData.js b/bg/ResponseMetaData.js index 41d1fe9..5a75a3c 100644 --- a/bg/ResponseMetaData.js +++ b/bg/ResponseMetaData.js @@ -37,7 +37,6 @@ class ResponseMetaData { this.headers[propertyName] = h; } } - this.forcedUTF8 = false; } get charset() { @@ -52,21 +51,6 @@ class ResponseMetaData { return charset; } - get isUTF8() { - return /^utf-?8$/i.test(this.charset); - } - - forceUTF8() { - if (!(this.forcedUTF8 || this.isUTF8)) { - let h = this.headers.contentType; - if (h) { - h.value = h.value.replace(/;\s*charset\s*=.*|$/, "; charset=utf8"); - this.forcedUTF8 = true; - } // if the header doesn't exist the browser should default to UTF-8 anyway - } - return this.forcedUTF8; - } - createDecoder() { if (this.charset) { try { @@ -75,7 +59,7 @@ class ResponseMetaData { console.error(e); } } - return new TextDecoder("utf-8"); + return new TextDecoder("latin1"); } }; diff --git a/bg/ResponseProcessor.js b/bg/ResponseProcessor.js index 4443d90..d10d46d 100644 --- a/bg/ResponseProcessor.js +++ b/bg/ResponseProcessor.js @@ -90,8 +90,6 @@ class ResponseTextFilter { }; filter.onstop = async event => { - - let params = {stream: true}; // concatenate chunks let size = buffer.reduce((sum, chunk, n) => sum + chunk.byteLength, 0) let allBytes = new Uint8Array(size); @@ -119,19 +117,19 @@ class ResponseTextFilter { } catch(e) { console.error(e); } - if (editedText !== null && - (metaData.forcedUTF8 && request.type !== "script" || - response.text !== editedText)) { - // if we changed the charset, the text or both, let's re-encode - filter.write(new TextEncoder().encode(editedText)); - } else { - // ... otherwise pass all the raw bytes through - filter.write(allBytes); + if (editedText !== null) { + // we changed the content, let's re-encode + let encoded = new TextEncoder().encode(editedText); + // pre-pending the UTF-8 BOM will force the charset per HTML 5 specs + allBytes = new Uint8Array(encoded.byteLength + 3); + allBytes.set(new Uint8Array([0xEF, 0xBB, 0xBF]), 0); // UTF-8 BOM + allBytes.set(encoded, 3); } + filter.write(allBytes); filter.close(); } - return metaData.forceUTF8() ? {responseHeaders} : ResponseProcessor.ACCEPT;; + return ResponseProcessor.ACCEPT; } } |