diff options
| -rw-r--r-- | bg/ResponseMetaData.js | 82 | ||||
| -rw-r--r-- | bg/ResponseProcessor.js | 110 | ||||
| -rw-r--r-- | main_background.js | 142 | 
3 files changed, 234 insertions, 100 deletions
| diff --git a/bg/ResponseMetaData.js b/bg/ResponseMetaData.js new file mode 100644 index 0000000..40ca3f3 --- /dev/null +++ b/bg/ResponseMetaData.js @@ -0,0 +1,82 @@ +/** +* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. +* +* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net> +* +* This file is part of GNU LibreJS. +* +* GNU LibreJS is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* GNU LibreJS is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with GNU LibreJS.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** +  This class parses HTTP response headers to extract both the +  MIME Content-type and the character set to be used, if specified, +  to parse textual data through a decoder. +*/ + +class ResponseMetaData { +  constructor(request) { +    let {responseHeaders} = request; +    this.headers = {}; +    for (let h of responseHeaders) { +      if (/^\s*Content-(Type|Disposition)\s*$/i.test(h.name)) { +        let propertyName =  h.name.split("-")[1].trim(); +        propertyName = `content${propertyName.charAt(0).toUpperCase()}${propertyName.substring(1).toLowerCase()}`; +        this[propertyName] = h.value; +        this.headers[propertyName] = h; +      } +    } +    this.forcedUTF8 = false; +  } + +  get charset() { +    let charset = ""; +    if (this.contentType) { +      let m = this.contentType.match(/;\s*charset\s*=\s*(\S+)/); +      if (m) { +        charset = m[1]; +      } +    } +    Object.defineProperty(this, "charset", { value: charset, writable: false, configurable: true }); +    return charset; +  } + +  get isUTF8() { +    return /^utf-8$/i.test(this.charset); +  } + +  forceUTF8() { +    if (!(this.forcedUTF8 || this.isUTF8)) { +      let h = this.headers.contentType; +      if (h) { +        h.value = h.value.replace(/;\s*charset\s*=.*|$/, "; charset=utf8"); +        this.forcedUTF8 = true; +      } // if the header doesn't exist the browser should default to UTF-8 anyway +    } +    return this.forcedUTF8; +  } + +  createDecoder() { +    if (this.charset) { +      try { +        return new TextDecoder(this.charset); +      } catch (e) { +        console.error(e); +      } +    } +    return new TextDecoder("utf-8"); +  } +}; + +module.exports = { ResponseMetaData }; diff --git a/bg/ResponseProcessor.js b/bg/ResponseProcessor.js new file mode 100644 index 0000000..3f3151b --- /dev/null +++ b/bg/ResponseProcessor.js @@ -0,0 +1,110 @@ +/** +* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. +* +* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net> +* +* This file is part of GNU LibreJS. +* +* GNU LibreJS is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* GNU LibreJS is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with GNU LibreJS.  If not, see <http://www.gnu.org/licenses/>. +*/ + +/** +  An abstraction layer over the StreamFilter API, allowing its clients to process +  only the "interesting" HTML and script requests and leaving the other alone +*/ + +let {ResponseMetaData} = require("./ResponseMetaData.js"); + +let listeners = new WeakMap(); +let webRequestEvent = browser.webRequest.onHeadersReceived; + +class ResponseProcessor { + +  static install(handler, types = ["main_frame", "sub_frame", "script"]) { +    if (listeners.has(handler)) return false; +    let listener = +      request =>  new ResponseTextFilter(request).process(handler); +    listeners.set(handler, listener); +    webRequestEvent.addListener( +  		listener, +  		{urls: ["<all_urls>"], types}, +  		["blocking", "responseHeaders"] +  	); +    return true; +  } + +  static uninstall(handler) { +    let listener = listeners.get(handler); +    if (listener) { +      webRequestEvent.removeListener(listener); +    } +  } +} + +class ResponseTextFilter { +  constructor(request) { +    this.request = request; +    let {type, statusCode} = request; +    let md = this.metaData = new ResponseMetaData(request); +    this.canProcess = // we want to process html documents and scripts only +      (statusCode < 300 || statusCode >= 400) && // skip redirections +      !md.disposition && // skip forced downloads +      (type === "script" || /\bhtml\b/i.test(md.contentType)); +  } + +  process(handler) { +    if (!this.canProcess) return {}; +    let metaData = this.metaData; +    let {requestId, responseHeaders} = this.request; +    let filter = browser.webRequest.filterResponseData(requestId); +    let buffer = []; + +    filter.ondata = event => { +      buffer.push(event.data); +    }; + +    filter.onstop = async event => { +      let decoder = metaData.createDecoder(); +      let params = {stream: true}; +      let text = this.text = buffer.map( +        chunk => decoder.decode(chunk, params)) +        .join(''); +      let editedText = null; +      try { +        let response = { +          request: this.request, +          metaData, +          text, +        }; +        editedText = await handler(response); +      } catch(e) { +        console.error(e); +      } +      if (metaData.forcedUTF8 || +        editedText !== null && text !== editedText) { +        // if we changed the charset, the text or both, let's re-encode +        filter.write(new TextEncoder().encode(editedText)); +      } else { +        // ... otherwise pass all the raw bytes through +        for (let chunk of buffer) filter.write(chunk); +      } + +      filter.disconnect(); +    } + +    return metaData.forceUTF8() ? {responseHeaders} : {}; +  } +} + +module.exports = { ResponseProcessor }; diff --git a/main_background.js b/main_background.js index debe0c2..ff68479 100644 --- a/main_background.js +++ b/main_background.js @@ -25,6 +25,7 @@ var acorn = require('acorn/dist/acorn_loose');  var jssha = require('jssha');  var walk = require("acorn/dist/walk");  var legacy_license_lib = require("./legacy_license_check.js"); +var {ResponseProcessor} = require("./bg/ResponseProcessor");  console.log("main_background.js");  /** @@ -853,7 +854,7 @@ function license_valid(matches){  *		reason text		  *	]  */ -function license_read(script_src, name){ +function license_read(script_src, name, external = false){  	var reason_text = ""; @@ -970,7 +971,7 @@ function get_script(response,url,tabid,wl,index=-1){  			}  		edited = [true,response,"Page is whitelisted in preferences"];  		}else{ -			edited = license_read(response,scriptname); +			edited = license_read(response,scriptname,index == -2);  		}  		var src_hash = hash(response);  		var verdict = edited[0]; @@ -1066,35 +1067,28 @@ function block_ga(a){  	else return {};  } + +  /** -*	This is a callback trigged from requests caused by script tags with the src="" attribute. +*	This listener gets called as soon as we've got all the HTTP headers, can guess +* content type and encoding, and therefore correctly parse HTML documents and +* and external script inclusion in search of non-free JavaScript  */ -function read_script(a){ -	var GA = test_GA(a); -	if(GA !== false){ -		return GA; -	} -	var filter = webex.webRequest.filterResponseData(a.requestId); -	var decoder = new TextDecoder("utf-8"); -	var encoder = new TextEncoder(); -	var str = ""; - -	filter.onstop = event => { -		dbg_print("read_script "+a.url); -		var res = test_url_whitelisted(a.url); -		res.then(function(whitelisted){ -			var edit_script = get_script(str,a.url,a["tabId"],whitelisted,-1); -			edit_script.then(function(edited){ -				filter.write(encoder.encode(edited)); -				filter.disconnect(); -			}); -		}); -	} -        filter.ondata = event => { -                str += decoder.decode(event.data, {stream: true}); -        } -	return {}; +async function responseHandler(response) { +	let {url, type} = response.request; +	let whitelisted = await test_url_whitelisted(url); +	let handle_it = type === "script" ? handle_script : handle_html; +	return await handle_it(response, whitelisted); +} + +/** +* Here we handle external script requests +*/ +async function handle_script(response, whitelisted){ +	let {text, request} = response; +	let {url, tabId} = request; +  return await get_script(text, url, tabId, whitelisted, -2);  }  /** @@ -1260,61 +1254,21 @@ function edit_html(html,url,tabid,wl){  }  /** -* Callback for main frame requests -*  +* Here we handle html document responses  */ -function read_document(a){ -	var GA = test_GA(a); -	if(GA != false){ -		return GA; -	} -	var str = ""; -	var filter = webex.webRequest.filterResponseData(a.requestId); -	var decoder = new TextDecoder("utf-8"); -	var encoder = new TextEncoder(); -	filter.onerror = event => { -		dbg_print("%c Error in getting document","color:red"); -	} -	filter.onstop = event => { -		time = Date.now(); -		delete unused_data[a["tabId"]]; -		webex.browserAction.setBadgeText({ -			text: "✓", -			tabId: a["tabId"] -		}); -		webex.browserAction.setBadgeBackgroundColor({ -			color: "green", -			tabId: a["tabId"] -		}); -		var test = new ArrayBuffer(); -		var res = test_url_whitelisted(a.url); -		res.then(function(whitelisted){ -			var edit_page; -			// TODO Fix this ugly HACK! -			if(! str.includes("<html")){ -				dbg_print("not html"); -				filter.write(encoder.encode(str)); -				filter.disconnect(); -				return {}; -			} -			if(whitelisted == true){ -				dbg_print("WHITELISTED"); -				// Doesn't matter if this is accepted or blocked, it will still be whitelisted -				filter.write(encoder.encode(str)); -				filter.disconnect(); -			} else{ -				edit_page = edit_html(str,a.url,a["tabId"],false); -				edit_page.then(function(edited){ -					filter.write(encoder.encode(edited)); -					filter.disconnect(); -				}); -			} -		}); -	} -	filter.ondata = event => { -		str += decoder.decode(event.data, {stream: true}); -	} -	return {}; +async function handle_html(response, whitelisted) { +	let {text, request} = response; +	let {url, tabId} = request; +	delete unused_data[tabId]; +	browser.browserAction.setBadgeText({ +		text: "✓", +		tabId +	}); +	browser.browserAction.setBadgeBackgroundColor({ +		color: "green", +		tabId +	}); +	return await edit_html(text, url, tabId, false);  }  /** @@ -1329,32 +1283,20 @@ function init_addon(){  	webex.tabs.onRemoved.addListener(delete_removed_tab_info);  	// Prevents Google Analytics from being loaded from Google servers -	var all_types = [ +	let all_types = [  		"beacon", "csp_report", "font", "image", "imageset", "main_frame", "media",  		"object", "object_subrequest", "ping", "script", "stylesheet", "sub_frame",  		"web_manifest", "websocket", "xbl", "xml_dtd", "xmlhttprequest", "xslt",   		"other" -	] -	// Analyzes remote scripts +	];  	webex.webRequest.onBeforeRequest.addListener(  		block_ga, -		{urls:["<all_urls>"], types:all_types}, -		["blocking"] -	); - -	// Analyzes remote scripts -	webex.webRequest.onBeforeRequest.addListener( -		read_script, -		{urls:["<all_urls>"], types:["script"]}, -		["blocking"] -	); - -	// Analyzes the scripts inside of HTML -	webex.webRequest.onBeforeRequest.addListener( -		read_document, -		{urls:["<all_urls>"], types:["main_frame"]}, +		{urls: ["<all_urls>"], types: all_types},  		["blocking"]  	); +	 +	// Analyzes all the html documents and external scripts as they're loaded +	ResponseProcessor.install(responseHandler);  	legacy_license_lib.init();  } | 
