/** * This file is the "skeleton" of the final system to determine * if a script is accepted or blocked. * * Some assets taken from script_detector.js * */ // the list of all available event attributes var intrinsicEvents = [ "onload", "onunload", "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover", "onmousemove", "onmouseout", "onfocus", "onblur", "onkeypress", "onkeydown", "onkeyup", "onsubmit", "onreset", "onselect", "onchange" ]; /* NONTRIVIAL THINGS: - Fetch - XMLhttpRequest - eval() - ? JAVASCRIPT CAN BE FOUND IN: - Event handlers (onclick, onload, onsubmit, etc.) - - WAYS TO DETERMINE PASS/FAIL: - "// @license [magnet link] [identifier]" then "// @license-end" (may also use /* comments) - Automatic whitelist: (http://bzr.savannah.gnu.org/lh/librejs/dev/annotate/head:/data/script_libraries/script-libraries.json_ -
which may be linked to by a link tag identified by rel="jslicense" or data-jslicense="1" - In the first script tag, declare the license with @licstart/@licend */ var licenses = { 'Apache-2.0':{ 'URL': 'http://www.apache.org/licenses/LICENSE-2.0', 'Magnet link': 'magnet:?xt=urn:btih:8e4f440f4c65981c5bf93c76d35135ba5064d8b7&dn=apache-2.0.txt' }, // No identifier was present in documentation 'Artistic-2.0':{ 'URL': 'http://www.perlfoundation.org/artistic_license_2_0', 'Magnet link': 'magnet:?xt=urn:btih:54fd2283f9dbdf29466d2df1a98bf8f65cafe314&dn=artistic-2.0.txt' }, // No identifier was present in documentation 'Boost':{ 'URL': 'http://www.boost.org/LICENSE_1_0.txt', 'Magnet link': 'magnet:?xt=urn:btih:89a97c535628232f2f3888c2b7b8ffd4c078cec0&dn=Boost-1.0.txt' }, // No identifier was present in documentation 'BSD-3-Clause':{ 'URL': 'http://opensource.org/licenses/BSD-3-Clause', 'Magnet link': 'magnet:?xt=urn:btih:c80d50af7d3db9be66a4d0a86db0286e4fd33292&dn=bsd-3-clause.txt', }, 'CPAL-1.0':{ 'URL': 'http://opensource.org/licenses/cpal_1.0', 'Magnet link': 'magnet:?xt=urn:btih:84143bc45939fc8fa42921d619a95462c2031c5c&dn=cpal-1.0.txt' }, 'CC0-1.0':{ 'URL': 'http://creativecommons.org/publicdomain/zero/1.0/legalcode', 'Magnet link': 'magnet:?xt=urn:btih:90dc5c0be029de84e523b9b3922520e79e0e6f08&dn=cc0.txt' }, 'EPL-1.0':{ 'URL': 'http://www.eclipse.org/legal/epl-v10.html', 'Magnet link': 'magnet:?xt=urn:btih:4c6a2ad0018cd461e9b0fc44e1b340d2c1828b22&dn=epl-1.0.txt' }, 'Expat':{ 'URL': 'http://www.jclark.com/xml/copying.txt', 'Magnet link': 'magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&dn=expat.txt' }, 'FreeBSD':{ 'URL': 'http://www.freebsd.org/copyright/freebsd-license.html', 'Magnet link': 'magnet:?xt=urn:btih:87f119ba0b429ba17a44b4bffcab33165ebdacc0&dn=freebsd.txt' }, 'GPL-2.0':{ 'URL': 'http://www.gnu.org/licenses/gpl-2.0.html', 'Magnet link': 'magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&dn=gpl-2.0.txt' }, 'GPL-3.0':{ 'URL': 'http://www.gnu.org/licenses/gpl-3.0.html', 'Magnet link': 'magnet:?xt=urn:btih:1f739d935676111cfff4b4693e3816e664797050&dn=gpl-3.0.txt' }, 'LGPL-2.1':{ 'URL': 'http://www.gnu.org/licenses/lgpl-2.1.html', 'Magnet link': 'magnet:?xt=urn:btih:5de60da917303dbfad4f93fb1b985ced5a89eac2&dn=lgpl-2.1.txt' }, 'LGPL-3.0':{ 'URL': 'http://www.gnu.org/licenses/lgpl-3.0.html', 'Magnet link': 'magnet:?xt=urn:btih:0ef1b8170b3b615170ff270def6427c317705f85&dn=lgpl-3.0.txt' }, 'AGPL-3.0':{ 'URL': 'http://www.gnu.org/licenses/agpl-3.0.html', 'Magnet link': 'magnet:?xt=urn:btih:0b31508aeb0634b347b8270c7bee4d411b5d4109&dn=agpl-3.0.txt' }, 'ISC':{ 'URL': 'https://www.isc.org/downloads/software-support-policy/isc-license/', 'Magnet link': 'magnet:?xt=urn:btih:b8999bbaf509c08d127678643c515b9ab0836bae&dn=ISC.txt' }, 'MPL-2.0':{ 'URL': 'http://www.mozilla.org/MPL/2.0', 'Magnet link': 'magnet:?xt=urn:btih:3877d6d54b3accd4bc32f8a48bf32ebc0901502a&dn=mpl-2.0.txt' }, // "Public domain is not a license" // Replace with CC0? 'Public-Domain':{ 'URL': 'https://www.gnu.org/licenses/license-list.html#PublicDomain', 'Magnet link': 'magnet:?xt=urn:btih:e95b018ef3580986a04669f1b5879592219e2a7a&dn=public-domain.txt' }, 'UPL-1.0': { 'URL': 'https://oss.oracle.com/licenses/upl/', 'Magnet link': 'magnet:?xt=urn:btih:478974f4d41c3fa84c4befba25f283527fad107d&dn=upl-1.0.txt' }, 'WTFPL': { 'URL': 'http://www.wtfpl.net/txt/copying/', 'Magnet link': 'magnet:?xt=urn:btih:723febf9f6185544f57f0660a41489c7d6b4931b&dn=wtfpl.txt' }, 'Unlicense':{ 'URL': 'http://unlicense.org/UNLICENSE', 'Magnet link': 'magnet:?xt=urn:btih:5ac446d35272cc2e4e85e4325b146d0b7ca8f50c&dn=unlicense.txt' }, // No identifier was present in documentation 'X11':{ 'URL': 'http://www.xfree86.org/3.3.6/COPYRIGHT2.html#3', 'Magnet link': 'magnet:?xt=urn:btih:5305d91886084f776adcf57509a648432709a7c7&dn=x11.txt' }, // Picked one of the two links that were there 'Modified-BSD':{ 'URL': 'http://www.xfree86.org/current/LICENSE4.html', 'Magnet link': 'magnet:?xt=urn:btih:12f2ec9e8de2a3b0002a33d518d6010cc8ab2ae9&dn=xfree86.txt' } } // Objects which could be used to do nontrivial things // Bracket suffix notation could still be exploited to get some of these objects var reserved_objects = [ "fetch", "XMLHttpRequest", "chrome", // only on chrome "browser", // only on firefox "eval" ]; function get_final_page(html_string, callback){ /** * Determines if a block of javascript is trivial or not. * * true = trivial, false = nontrivial * */ function evaluate(script,name){ function reserved_object_regex(object){ // Matches use of object as a variable // This accounts for both unary, binary and assignment operators var arith_operators = "\\+\\-\\*\\/\\%\\="; // These are allowed to preceed or trail a variable as in 'if(true){eval("thiscode")};' // However, if you have 'function(){eval}' where the "}" char trails "eval", this can't // be used to invoke member objects or pass arguments. var scope_chars = "\{\}\]\[\(\)\,"; // No property accessors are allowed to follow the string stored in "object" // Whitespace is allowed to come between these property accessors var trailing_chars = "\s*"+"\(\.\["; return new RegExp("(?:[^\\w\\d]|^|(?:"+arith_operators+"))"+object+'(?:\\s*?(?:[\\;\\,\\.\\(\\[])\\s*?)',"g"); } reserved_object_regex("window"); // Strings var all_strings = new RegExp('".*?"'+"|'.*?'","gm"); // multi-line "/*" "*/" comments var ml_comment = /\/\*([\s\S]+?)\*\//g; // in-line "//" comments var il_comment = /\/\/.+/gm; // The contents of bracket pairs var bracket_pairs = /\[.+?\]/g; // Replace string consts with values that won't interfere var temp = script.replace(/'.+?'+/gm,"'string'"); temp = temp.replace(/".+?"+/gm,'"string"'); // Remove comments temp = temp.replace(ml_comment,""); temp = temp.replace(il_comment,""); // Now that there can't be any brackets inside of comments or strings, // console.log("------evaluation results for "+ name +"------"); console.log("Script accesses reserved objects?"); var flag = true; for(var i = 0; i < reserved_objects.length; i++){ var res = reserved_object_regex(reserved_objects[i]).exec(script); if(res != null){ console.log("%c fail","color:red;"); console.log(res["input"].substr(res["index"]-15,res["index"]+15)); flag = false; } } if(flag){ console.log("%c pass","color:green;"); } return flag; } /** * Looks at the output of the @license regex and determines if the * license is good. */ function license_valid(matches){ // Being overly careful with safety checks if(matches.length != 4){ return false; } if(matches[1] != "@license"){ return false; } if(licenses[matches[3]] === undefined){ return false; } if(licenses[matches[3]]["Magnet link"] != matches[2]){ return false; } return true; } /** * * Runs regexes to search for explicit delcarations of script * licenses on the argument. * It detects: * //@license, //@license-end * //licstart, //licend * * We are assuming that the "stack depth" of @license tags can not exceed 1. * If this isn't correct, we can make it recursive. * */ // TODO: Known bug: extra \n chars thrown in at some splices function license_read(script_src,name){ if(typeof(script_src) != "string"){ return "fail" } // Contains only good Javascript var edited_src = ""; // Once Javascript has been "judged", remove it from here var unedited_src = script_src; var first = true; var watchdog = 0; while(true){ if(first){ first = false; //console.log("input:"); //console.log("%c"+unedited_src,"color:#550000"); } var matches = /^(@license)\s([\S]+)\s([\S]+$)/gm.exec(unedited_src); if(matches == null){ //console.log("No more matches, almost done"); if(evaluate(unedited_src,name)){ edited_src += unedited_src; } return edited_src; } // operate on everything before the next match. //console.log("Everything before the next match"); var before = unedited_src.substr(0,matches["index"]); //console.log(before); if(evaluate(before,name)){ edited_src += before; } // This should remove the substring "before" unedited_src = unedited_src.substr(matches["index"],unedited_src.length); // find the end tag and check if it is valid matches_end = /^(@license-end)/gm.exec(unedited_src); if(matches_end == null){ console.log("ERROR: @license with no @license-end"); return false; } var endtag_end_index = matches_end["index"]+matches_end[0].length; // accept next tag if its license is good. if(license_valid(matches)){ edited_src = edited_src + unedited_src.substr(0,endtag_end_index); } else{ console.log("Error: invalid license tag."); return false; } // Remove the next tag (it will be in edited_src if it was accepted) unedited_src = unedited_src.substr(endtag_end_index,unedited_src.length); //console.log("New input after iteration:"); //console.log("%c"+unedited_src,"color:red;"); //console.log("Current output:"); //console.log("%c"+edited_src,"color:green;"); // TODO: this is here to prevent infinite loops, should be removed eventually watchdog++; if(watchdog > 20){ console.log("%c !!!!!WARNING!!!!! Watchdog > 20.","color:red"); return false; } } } /** * * Checks the whitelist in storage * (Not the comma seperated whitelist from settings) * */ function is_whitelisted(){ // TODO: implement when this is a background script return false; } /** * Parses the weblabels table from a DOM object * */ function read_weblabels_table(weblabel){ var data = {}; var tbody = weblabel.getElementsByTagName("td"); for(var i = 0; i < tbody.length; i++){ var link = tbody[i].getElementsByTagName("a")[0]; //console.log(link.href); if(link.innerText in licenses){ console.log("%cFree: " + link.innerText,"color:green;"); data[encodeURI(link.innerText)] = "free"; } else{ console.log("%cUnknown: " + link.innerText,"color:red;"); data[encodeURI(link.innerText)] = "unknown"; } } console.log("web labels table data:"); console.log(data); return data; } /** * Reads the weblabels table from a link. * */ function get_table(html_doc, callback, url){ var xml = new XMLHttpRequest(); xml.open("get",url); xml.onload = function(){ var a = new DOMParser() var doc = a.parseFromString(this.responseText,"text/html"); var web_label = doc.getElementById("jslicense-labels1"); if(web_label != null){ read_w_table(html_doc, callback, table_data=read_weblabels_table(web_label)); } } xml.send(); } /** * Basically an extension of "analyze" * * Calls license_read() on all the document's scripts. license_read() then returns an edited version * according to license status and trivial/nontrivial status. * * Added because I was having async issues */ function read_w_table(html_doc, callback, table_data=false){ var has_intrinsic_events = []; for(var i = 0; i < html_doc.all.length; i++){ for(var j = 0; j < intrinsicEvents.length; j++){ if(intrinsicEvents[j] in html_doc.all[i].attributes){ has_intrinsic_events.push([i,j]); } } } var done = false; var amt_done = 0; var amt_remote_scripts = 0; var amt_todo = html_doc.scripts.length + has_intrinsic_events.length; function check_done(){ console.log(amt_done + "/" + (amt_todo - amt_remote_scripts) ); if(amt_done > amt_todo){ console.warn("Not supposed to happen"); } if(done == false && amt_done >= (amt_todo - amt_remote_scripts) ){ console.log("%c DONE.","color:red;"); callback(html_doc); done = true; // TODO: Convert this to async // TODO: Call update_popup() here with reasons } } // "i" is an index in html_doc.scripts function edit_src(src, i, name){ var edited = license_read(src,name); if(edited == "string"){ html_doc.scripts[i].outerHTML = ""; } else{ html_doc.scripts[i].outerHTML = ""; } amt_done++; } // "i" is an index in html_doc.all // "j" is an index in intrinsicEvents function edit_event(src,i,j,name){ var edited = license_read(src,name); if(edited == "string"){ html_doc.all[i].attributes[intrinsicEvents[j]].value = edited; } else{ html_doc.all[i].attributes[intrinsicEvents[j]].value = "//Denied by LibreJS"; } amt_done++; } for(var i = 0; i < html_doc.scripts.length; i++){ // convert between relative link and file name (table_data indexes by file name) var tok_index = html_doc.scripts[i].src.split("/").length; var scriptname = html_doc.scripts[i].src.split("/")[tok_index-1]; if(table_data != false && scriptname in table_data){ console.log("script contained in weblabel data."); if(table_data[scriptname] == "free"){ console.log("script is free"); continue; } console.log("script is unknown"); } if(html_doc.scripts[i].src != ""){ // this is a remote script ("") var name = html_doc.scripts[i].src; console.log("%c Will evaluate script '" + name + "' when it arrives. Document.scripts index: "+i,"color:blue;"); amt_remote_scripts++; } else{ // it is an inline script ("") console.log("%c Evaluating inline script. Document.scripts index: "+i,"color:blue;"); //console.log(html_doc.scripts[i].innerText); edit_src(html_doc.scripts[i].innerText, i, "src: inline (index "+i+")"); } } // Find all the document's elements with intrinsic events for(var i = 0; i < has_intrinsic_events.length; i++){ var s_name = "html_doc.all["+has_intrinsic_events[i][0]+"]"; edit_event(html_doc.all[has_intrinsic_events[i][0]].attributes[intrinsicEvents[has_intrinsic_events[i][1]]].value,has_intrinsic_events[i][0],has_intrinsic_events[i][1],s_name); } check_done(); } // "main" for the script analyzer // called when invoked by the button function analyze(html_source,callback){ // TODO: Call get_whitelisted_status on this page's URL var parser = new DOMParser(); var html_doc = parser.parseFromString(html_source, "text/html"); // Test "the first piece of Javascript available to the page" for the license comment // TODO: Is this supposed to test if the license is free or just assume that it is? var finished = false; if(html_doc.scripts[0] !== undefined){ if(html_doc.scripts[0].src != ""){ // this function is here because otherwise there would be async issues function get_first_js(){ var name = html_doc.scripts[0].src; var xml = new XMLHttpRequest(); xml.open("get", html_doc.scripts[0].src); xml.onload = function(response){ var matches = this.responseText.match(/@licstart[\s\S]+@licend/g); if(matches != null){ console.log("License comment found:"); console.log(matches[0]); console.log("Trusting that the entire page is freely licensed."); callback(true); } } xml.send(); } get_first_js(); } else{ console.log("%c Script " + i + ": (src: inline)","color:red;"); var matches = html_doc.scripts[0].innerText.match(/@licstart[\s\S]+@licend/g); if(matches != null){ console.log("License comment found:"); console.log(matches[0]); console.log("Trusting that the entire page is freely licensed."); callback(true); } } } var table_data = {}; var found_table_flag = false; // Test for the link that has rel="jslicense", data-jslicense="1" for(var i = 0; i < html_doc.links.length; i++){ // TODO: also check if data-jslicense == "1". (how?) if(html_doc.links[i].rel == "jslicense"){ console.log("Found HTML table link:"); get_table(html_doc, callback, html_doc.links[i].href); found_table_flag = true; break; } } // Test for the JavaScript Web Labels table on this page var weblabel = html_doc.getElementById("jslicense-labels1"); if(weblabel !== undefined && weblabel != null && found_table_flag == false){ console.log("Found web labels table"); read_w_table(html_doc, callback, table_data=read_weblabels_table(weblabel)); } if(found_table_flag == false){ read_w_table(html_doc, callback); } } analyze(html_string,callback); } get_final_page(document.documentElement.outerHTML,function(a){ console.log("returned"); if(typeof(a) == "boolean"){ return; } document.documentElement.innerHTML = a.documentElement.innerHTML; });