diff options
-rw-r--r-- | main_background.js | 269 | ||||
-rw-r--r-- | test.js | 235 |
2 files changed, 233 insertions, 271 deletions
diff --git a/main_background.js b/main_background.js index aafd873..60f0b98 100644 --- a/main_background.js +++ b/main_background.js @@ -1,5 +1,5 @@ -var acornbase = require("acorn"); +var acorn_base = require("acorn"); var acorn = require('acorn/dist/acorn_loose'); var jssha = require('jssha'); var walk = require("acorn/dist/walk"); @@ -1553,160 +1553,162 @@ var fname_data = { "NodeList": true, "StopIteration": true }; +//************************Comes from HTML file index.html's script test.js**************************** -// "The number of conditionals and loops must be at most 5" -var loops = ["ForInStatement","ForStatement","DoWhileStatement","WhileStatement","IfStatement","SwitchStatement"]; /** -* Loop over a function and get a list of things being called. +* If this is true, it evaluates entire scripts instead of returning as soon as it encounters a violation. * -* Tests to see if the function calls itself. -* -* Note: If it is an anonmyous function, recursion isn't possible. +* Also, it controls whether or not this part of the code logs to the console. * */ -function get_function_names(input_node_src,scope){ - var func_name = ""; - var flag = true; +var DEBUG = true; - // The name of the function can't appear anywhere. - // No bracket suffix notation either. - console.log("Searching for identifier '"+scope+"' in this code:"); - //console.log(input_node_src); - - var tokens = acorn_base.tokenizer(input_node_src); - var toke = tokens.getToken(); - while(toke.type != acorn_base.tokTypes.eof){ - if(toke.type.label == "name" && scope == toke.value){ - return true; - } - toke = tokens.getToken(); +function dbg_print(a){ + if(DEBUG == true){ + console.log(a) } - - return false; } -/** -* Reads fname_data to determine if a function call is trivial -* -*/ -function test_function_name(name){ - var res = fname_data[name]; - if(res == false){ - console.log("'"+name+"'"+" is trivial."); - return false; - } - if(res == true){ - console.log("%cNONTRIVIAL:'"+name+"'"+" is non-trivial.","color:red"); - return true; - } - console.log("%cNONTRIVIAL:'"+name+"'"+" is probably user defined.","color:red"); - return false; -} -/** -* Rigorously determines if code is trivial or not based on the official triviality criterion -*/ function full_evaluate(script){ - var res = true; - - if(script === undefined || script == ""){ - return true; - } - var ast = acorn.parse_dammit(script).body[0]; - - var flag = false; - var amtloops = 0; - - // COUNTS LOOPS AND CONDITIONALS - walk.simple(ast, { - ForInStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - ForStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - DoWhileStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - WhileStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - IfStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - SwitchStatement(node){ - if(amtloops > 3){return;} - amtloops++; + var res = true; + if(script === undefined || script == ""){ + return true; } - }); - if(amtloops > 3){ - console.log("%c NONTRIVIAL: Too many loops/conditionals.","color:red"); - return false; - } - // Detect which objects are referenced and which functions are called - // Only cares about top level objects. Window is special because we will test its methods. - var nontrivial = false; + var ast = acorn.parse_dammit(script).body[0]; - walk.simple(ast, { - ExpressionStatement(node){ - if(nontrivial == true){ - return; + var flag = false; + var amtloops = 0; + + var loopkeys = {"for":true,"if":true,"while":true,"switch":true}; + try{ + var tokens = acorn_base.tokenizer(script); + }catch(e){ + console.warn("Tokenizer could not be initiated (probably invalid code)"); + return false; + } + try{ + var toke = tokens.getToken(); + }catch(e){ + console.warn("couldn't get first token (probably invalid code)"); + console.warn("Continuing evaluation"); + } + var toke = tokens.getToken(); + + /** + * Given the end of an identifer token, it tests for bracket suffix notation + */ + function being_called(end){ + var i = 0; + while(script.charAt(end+i).match(/\s/g) !== null){ + i++; + if(i >= script.length-1){ + return false; + } } - // Get the first thing in the expression - if(node === undefined){ - return; - } - var lnode = node.expression; - var last_name = ""; - while(true){ - // window.test() - if(lnode.type == "CallExpression"){ - if(lnode.property !== undefined){ - last_name = lnode.property.name; + if(script.charAt(end+i) == "("){ + return true; + }else{ + return false; + } + } + /** + * Given the end of an identifer token, it tests for parentheses + */ + function is_bsn(end){ + var i = 0; + while(script.charAt(end+i).match(/\s/g) !== null){ + i++; + if(i >= script.length-1){ + return false; + } + } + if(script.charAt(end+i) == "["){ + return true; + }else{ + return false; + } + } + while(toke.type != acorn_base.tokTypes.eof){ + if(toke.type.keyword !== undefined){ + // This type of loop detection ignores functional loop alternatives and ternary operators + dbg_print("Keyword:"+toke.type.keyword); + + if(toke.type.keyword == "function"){ + dbg_print("%c NONTRIVIAL: Function declaration.","color:red"); + if(DEBUG == false){ + return false; + } + } + + if(loopkeys[toke.type.keyword] !== undefined){ + amtloops++; + if(amtloops > 3){ + dbg_print("%c NONTRIVIAL: Too many loops/conditionals.","color:red"); + if(DEBUG == false){ + return false; + } } - lnode = lnode.callee; - // window.test - }else if(lnode.type == "MemberExpression"){ - last_name = lnode.property.name; - // This may be bracket suffix notation - lnode = lnode.object; - // We should be at the first in the chain. - }else if(lnode.type == "Identifier"){ - // Since window is the global object, it is special - if(lnode.name == "window"){ - nontrivial = test_function_name(last_name); - break; - } else{ - nontrivial = test_function_name(lnode.name); - break; + } + }else if(toke.value !== undefined){ + var status = fname_data[toke.value]; + if(status === true){ // is the identifier banned? + dbg_print("%c NONTRIVIAL: nontrivial token: '"+toke.value+"'","color:red"); + if(DEBUG == false){ + return false; + } + }else if(status === false){// is the identifier not banned? + // Is there bracket suffix notation? + if(is_bsn(toke.end)){ + dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red"); + if(DEBUG == false){ + return false; + } } - }else if(lnode.type == "BinaryExpression"){ - // This actually might not be valid. It can't be anything nontrivial. - console.log("%c Warn: syntax not valid","color:Red;") - break; - } else{ - console.log("Unrecognized: "+lnode.type); - break; + }else if(status === undefined){// is the identifier user defined? + // Are arguments being passed to a user defined variable? + if(being_called(toke.end)){ + dbg_print("%c NONTRIVIAL: User defined variable '"+toke.value+"' called as function","color:red"); + if(DEBUG == false){ + return false; + } + } + // Is there bracket suffix notation? + if(is_bsn(toke.end)){ + dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red"); + if(DEBUG == false){ + return false; + } + } + }else{ + dbg_print("trivial token:"+toke.value); } - //console.log(last_name+":"+lnode.name); + } + // If not a keyword or an identifier it's some kind of operator, field parenthesis, brackets + try{ + toke = tokens.getToken(); + }catch(e){ + console.warn("Tokenizer error (probably invalid code)"); + console.warn("Continuing evaluation"); } } - }); - if(nontrivial == true){ - return false; - } - return res; + dbg_print("%cAppears to be trivial.","color:green;"); + return true; } + +//**************************************************************************************************** /** +* This is the entry point for full code evaluation. +* * Performs the initial pass on code to see if it needs to be completely parsed +* +* This can only determine if a script is bad, not if it's good +* +* If it passes the intitial pass, it runs the full pass and returns the result +* */ function evaluate(script,name){ function reserved_object_regex(object){ @@ -1743,8 +1745,9 @@ function evaluate(script,name){ return [flag,reason+"<br>"]; } - flag = full_evaluate(temp); - + var temp = full_evaluate(temp); + flag = temp[0]; + reason = flag[1]; // If flag is set true at this point, the script is trivial if(flag){ @@ -1975,7 +1978,7 @@ function read_script(a){ function remove_noscripts(html_doc){ for(var i = 0; i < html_doc.getElementsByName("librejs-path").length; i++){ if(html_doc.getElementsByName("librejs-path")[i].tagName == "NOSCRIPT"){ - debugger; + html_doc.getElementsByName("librejs-path")[i].outerHTML = html_doc.getElementsByName("librejs-path")[i].innerHTML; } } @@ -834,50 +834,22 @@ var fname_data = { }; -var DEBUG = true; +//************************Comes from HTML file index.html's script test.js**************************** + /** -* Loop over a function and get a list of things being called. +* If this is true, it evaluates entire scripts instead of returning as soon as it encounters a violation. * -* Tests to see if the function calls itself. -* -* Note: If it is an anonmyous function, recursion isn't possible. +* Also, it controls whether or not this part of the code logs to the console. * */ -function get_function_names(input_node_src,scope){ - var func_name = ""; - var flag = true; - - // The name of the function can't appear anywhere. - // No bracket suffix notation either. - console.log("Searching for identifier '"+scope+"'"); - - var tokens = acorn_base.tokenizer(input_node_src); - var toke = tokens.getToken(); - while(toke.type != acorn_base.tokTypes.eof){ - if(toke.type.label == "name" && scope == toke.value){ - return true; - } - toke = tokens.getToken(); - } - - return false; -} +var DEBUG = true; +console.log("DEBUG:"+DEBUG); -function test_function_name(name){ - var res = fname_data[name]; - if(res == false){ - console.log("'"+name+"'"+" is trivial."); - return false; - } - if(res == true){ - console.log("%cNONTRIVIAL:'"+name+"'"+" is non-trivial.","color:red"); - return true; +function dbg_print(a,b){ + if(DEBUG == true){ + console.log(a,b) } - console.log("%cNONTRIVIAL:'"+name+"'"+" is probably user defined.","color:red"); - return false; - } -//746 function full_evaluate(script){ var res = true; @@ -889,121 +861,109 @@ function full_evaluate(script){ var flag = false; var amtloops = 0; - // COUNTS LOOPS AND CONDITIONALS - walk.simple(ast, { - ForInStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - ForStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - DoWhileStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - WhileStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - IfStatement(node){ - if(amtloops > 3){return;} - amtloops++; - }, - SwitchStatement(node){ - if(amtloops > 3){return;} - amtloops++; - } - }); + var loopkeys = {"for":true,"if":true,"while":true,"switch":true}; + var tokens = acorn_base.tokenizer(script); + var toke = tokens.getToken(); - if(amtloops > 3){ - console.log("%c NONTRIVIAL: Too many loops/conditionals.","color:red"); - if(DEBUG == false){ + /** + * Given the end of an identifer token, it tests for bracket suffix notation + */ + function being_called(end){ + var i = 0; + while(script.charAt(end+i).match(/\s/g) !== null){ + i++; + if(i >= script.length-1){ + return false; + } + } + if(script.charAt(end+i) == "("){ + return true; + }else{ + return false; + } + } + /** + * Given the end of an identifer token, it tests for parentheses + */ + function is_bsn(end){ + var i = 0; + while(script.charAt(end+i).match(/\s/g) !== null){ + i++; + if(i >= script.length-1){ + return false; + } + } + if(script.charAt(end+i) == "["){ + return true; + }else{ return false; - } + } } - // Detect which objects are referenced and which functions are called - // Only cares about top level objects. Window is special because we will test its methods. - var nontrivial = false; + while(toke.type != acorn_base.tokTypes.eof){ + console.log(toke); + if(toke.type.keyword !== undefined){ + // This type of loop detection ignores functional loop alternatives and ternary operators + dbg_print("Keyword:"+toke.type.keyword); - // Has a while loop to cut down on recursion when not needed - function read(lnode){ - while(true){ - // window.test() - if(lnode.type == "CallExpression"){ - if(lnode.property !== undefined){ - last_name = lnode.property.name; + if(toke.type.keyword == "function"){ + dbg_print("%c NONTRIVIAL: Function declaration.","color:red"); + if(DEBUG == false){ + return false; + } + } + + if(loopkeys[toke.type.keyword] !== undefined){ + amtloops++; + if(amtloops > 3){ + dbg_print("%c NONTRIVIAL: Too many loops/conditionals.","color:red"); + if(DEBUG == false){ + return false; + } } - lnode = lnode.callee; - // window.test - }else if(lnode.type == "MemberExpression"){ - last_name = lnode.property.name; - // This may be bracket suffix notation - lnode = lnode.object; - // We should be at the first in the chain. - }else if(lnode.type == "Identifier"){ - // Since window is the global object, it is special - if(lnode.name == "window"){ - return test_function_name(last_name); - } else{ - return test_function_name(lnode.name); + } + }else if(toke.value !== undefined){ + var status = fname_data[toke.value]; + if(status === true){ // is the identifier banned? + dbg_print("%c NONTRIVIAL: nontrivial token: '"+toke.value+"'","color:red"); + if(DEBUG == false){ + return false; + } + }else if(status === false){// is the identifier not banned? + // Is there bracket suffix notation? + if(is_bsn(toke.end)){ + dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red"); + if(DEBUG == false){ + return false; + } } - }else if(lnode.type == "BinaryExpression"){ - // recurse on left side - if(read(lnode.left) == true){ - return true; + }else if(status === undefined){// is the identifier user defined? + // Are arguments being passed to a user defined variable? + if(being_called(toke.end)){ + dbg_print("%c NONTRIVIAL: User defined variable '"+toke.value+"' called as function","color:red"); + if(DEBUG == false){ + return false; + } } - // continue on right side - last_name = lnode.operator; - // This may be bracket suffix notation - lnode = lnode.right; - }else if(lnode.type == "AssignmentExpression"){ - // recurse on left side - if(read(lnode.left) == true){ - return true; + // Is there bracket suffix notation? + if(is_bsn(toke.end)){ + dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red"); + if(DEBUG == false){ + return false; + } } - // continue on right side - last_name = lnode.operator; - // This may be bracket suffix notation - lnode = lnode.right; - }else if(lnode.type == "FunctionExpression"){ - console.log("%cDefines a function.","color:red"); - return true; }else{ - console.log("Unrecognized:"); - console.log(lnode); - return; - } - //console.log(last_name+":"+lnode.name); - } - - } - - walk.simple(ast, { - ExpressionStatement(node){ - if(nontrivial == true && DEBUG == false){ - return; + dbg_print("trivial token:"+toke.value); } - // Get the first thing in the expression - if(node === undefined){ - return; - } - var lnode = node.expression; - var last_name = ""; - console.log("%cReading ExpressionStatement","color:green"); - read(lnode,last_name); - }, FunctionExpression(node){ - // } - }); - - if(nontrivial == true){ - return false; + // If not a keyword or an identifier it's some kind of operator, field parenthesis, brackets + toke = tokens.getToken(); } + dbg_print("%cAppears to be trivial.","color:green;"); return true; } +//**************************************************************************************************** + window.onload = function () { document.getElementById("parse").addEventListener("click",function(){ var script = document.getElementById("input").value; @@ -1011,6 +971,5 @@ window.onload = function () { document.getElementById("output").innerHTML = JSON.stringify(ast, null, '\t'); // Indented with tab document.getElementById("output").innerHTML = full_evaluate(script) + "\n\n" + document.getElementById("output").innerHTML; }); - } |