aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main_background.js269
-rw-r--r--test.js235
2 files changed, 233 insertions, 271 deletions
diff --git a/main_background.js b/main_background.js
index aafd873..60f0b98 100644
--- a/main_background.js
+++ b/main_background.js
@@ -1,5 +1,5 @@
-var acornbase = require("acorn");
+var acorn_base = require("acorn");
var acorn = require('acorn/dist/acorn_loose');
var jssha = require('jssha');
var walk = require("acorn/dist/walk");
@@ -1553,160 +1553,162 @@ var fname_data = {
"NodeList": true,
"StopIteration": true
};
+//************************Comes from HTML file index.html's script test.js****************************
-// "The number of conditionals and loops must be at most 5"
-var loops = ["ForInStatement","ForStatement","DoWhileStatement","WhileStatement","IfStatement","SwitchStatement"];
/**
-* Loop over a function and get a list of things being called.
+* If this is true, it evaluates entire scripts instead of returning as soon as it encounters a violation.
*
-* Tests to see if the function calls itself.
-*
-* Note: If it is an anonmyous function, recursion isn't possible.
+* Also, it controls whether or not this part of the code logs to the console.
*
*/
-function get_function_names(input_node_src,scope){
- var func_name = "";
- var flag = true;
+var DEBUG = true;
- // The name of the function can't appear anywhere.
- // No bracket suffix notation either.
- console.log("Searching for identifier '"+scope+"' in this code:");
- //console.log(input_node_src);
-
- var tokens = acorn_base.tokenizer(input_node_src);
- var toke = tokens.getToken();
- while(toke.type != acorn_base.tokTypes.eof){
- if(toke.type.label == "name" && scope == toke.value){
- return true;
- }
- toke = tokens.getToken();
+function dbg_print(a){
+ if(DEBUG == true){
+ console.log(a)
}
-
- return false;
}
-/**
-* Reads fname_data to determine if a function call is trivial
-*
-*/
-function test_function_name(name){
- var res = fname_data[name];
- if(res == false){
- console.log("'"+name+"'"+" is trivial.");
- return false;
- }
- if(res == true){
- console.log("%cNONTRIVIAL:'"+name+"'"+" is non-trivial.","color:red");
- return true;
- }
- console.log("%cNONTRIVIAL:'"+name+"'"+" is probably user defined.","color:red");
- return false;
-}
-/**
-* Rigorously determines if code is trivial or not based on the official triviality criterion
-*/
function full_evaluate(script){
- var res = true;
-
- if(script === undefined || script == ""){
- return true;
- }
- var ast = acorn.parse_dammit(script).body[0];
-
- var flag = false;
- var amtloops = 0;
-
- // COUNTS LOOPS AND CONDITIONALS
- walk.simple(ast, {
- ForInStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- ForStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- DoWhileStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- WhileStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- IfStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- SwitchStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
+ var res = true;
+ if(script === undefined || script == ""){
+ return true;
}
- });
- if(amtloops > 3){
- console.log("%c NONTRIVIAL: Too many loops/conditionals.","color:red");
- return false;
- }
- // Detect which objects are referenced and which functions are called
- // Only cares about top level objects. Window is special because we will test its methods.
- var nontrivial = false;
+ var ast = acorn.parse_dammit(script).body[0];
- walk.simple(ast, {
- ExpressionStatement(node){
- if(nontrivial == true){
- return;
+ var flag = false;
+ var amtloops = 0;
+
+ var loopkeys = {"for":true,"if":true,"while":true,"switch":true};
+ try{
+ var tokens = acorn_base.tokenizer(script);
+ }catch(e){
+ console.warn("Tokenizer could not be initiated (probably invalid code)");
+ return false;
+ }
+ try{
+ var toke = tokens.getToken();
+ }catch(e){
+ console.warn("couldn't get first token (probably invalid code)");
+ console.warn("Continuing evaluation");
+ }
+ var toke = tokens.getToken();
+
+ /**
+ * Given the end of an identifer token, it tests for bracket suffix notation
+ */
+ function being_called(end){
+ var i = 0;
+ while(script.charAt(end+i).match(/\s/g) !== null){
+ i++;
+ if(i >= script.length-1){
+ return false;
+ }
}
- // Get the first thing in the expression
- if(node === undefined){
- return;
- }
- var lnode = node.expression;
- var last_name = "";
- while(true){
- // window.test()
- if(lnode.type == "CallExpression"){
- if(lnode.property !== undefined){
- last_name = lnode.property.name;
+ if(script.charAt(end+i) == "("){
+ return true;
+ }else{
+ return false;
+ }
+ }
+ /**
+ * Given the end of an identifer token, it tests for parentheses
+ */
+ function is_bsn(end){
+ var i = 0;
+ while(script.charAt(end+i).match(/\s/g) !== null){
+ i++;
+ if(i >= script.length-1){
+ return false;
+ }
+ }
+ if(script.charAt(end+i) == "["){
+ return true;
+ }else{
+ return false;
+ }
+ }
+ while(toke.type != acorn_base.tokTypes.eof){
+ if(toke.type.keyword !== undefined){
+ // This type of loop detection ignores functional loop alternatives and ternary operators
+ dbg_print("Keyword:"+toke.type.keyword);
+
+ if(toke.type.keyword == "function"){
+ dbg_print("%c NONTRIVIAL: Function declaration.","color:red");
+ if(DEBUG == false){
+ return false;
+ }
+ }
+
+ if(loopkeys[toke.type.keyword] !== undefined){
+ amtloops++;
+ if(amtloops > 3){
+ dbg_print("%c NONTRIVIAL: Too many loops/conditionals.","color:red");
+ if(DEBUG == false){
+ return false;
+ }
}
- lnode = lnode.callee;
- // window.test
- }else if(lnode.type == "MemberExpression"){
- last_name = lnode.property.name;
- // This may be bracket suffix notation
- lnode = lnode.object;
- // We should be at the first in the chain.
- }else if(lnode.type == "Identifier"){
- // Since window is the global object, it is special
- if(lnode.name == "window"){
- nontrivial = test_function_name(last_name);
- break;
- } else{
- nontrivial = test_function_name(lnode.name);
- break;
+ }
+ }else if(toke.value !== undefined){
+ var status = fname_data[toke.value];
+ if(status === true){ // is the identifier banned?
+ dbg_print("%c NONTRIVIAL: nontrivial token: '"+toke.value+"'","color:red");
+ if(DEBUG == false){
+ return false;
+ }
+ }else if(status === false){// is the identifier not banned?
+ // Is there bracket suffix notation?
+ if(is_bsn(toke.end)){
+ dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red");
+ if(DEBUG == false){
+ return false;
+ }
}
- }else if(lnode.type == "BinaryExpression"){
- // This actually might not be valid. It can't be anything nontrivial.
- console.log("%c Warn: syntax not valid","color:Red;")
- break;
- } else{
- console.log("Unrecognized: "+lnode.type);
- break;
+ }else if(status === undefined){// is the identifier user defined?
+ // Are arguments being passed to a user defined variable?
+ if(being_called(toke.end)){
+ dbg_print("%c NONTRIVIAL: User defined variable '"+toke.value+"' called as function","color:red");
+ if(DEBUG == false){
+ return false;
+ }
+ }
+ // Is there bracket suffix notation?
+ if(is_bsn(toke.end)){
+ dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red");
+ if(DEBUG == false){
+ return false;
+ }
+ }
+ }else{
+ dbg_print("trivial token:"+toke.value);
}
- //console.log(last_name+":"+lnode.name);
+ }
+ // If not a keyword or an identifier it's some kind of operator, field parenthesis, brackets
+ try{
+ toke = tokens.getToken();
+ }catch(e){
+ console.warn("Tokenizer error (probably invalid code)");
+ console.warn("Continuing evaluation");
}
}
- });
- if(nontrivial == true){
- return false;
- }
- return res;
+ dbg_print("%cAppears to be trivial.","color:green;");
+ return true;
}
+
+//****************************************************************************************************
/**
+* This is the entry point for full code evaluation.
+*
* Performs the initial pass on code to see if it needs to be completely parsed
+*
+* This can only determine if a script is bad, not if it's good
+*
+* If it passes the intitial pass, it runs the full pass and returns the result
+*
*/
function evaluate(script,name){
function reserved_object_regex(object){
@@ -1743,8 +1745,9 @@ function evaluate(script,name){
return [flag,reason+"<br>"];
}
- flag = full_evaluate(temp);
-
+ var temp = full_evaluate(temp);
+ flag = temp[0];
+ reason = flag[1];
// If flag is set true at this point, the script is trivial
if(flag){
@@ -1975,7 +1978,7 @@ function read_script(a){
function remove_noscripts(html_doc){
for(var i = 0; i < html_doc.getElementsByName("librejs-path").length; i++){
if(html_doc.getElementsByName("librejs-path")[i].tagName == "NOSCRIPT"){
- debugger;
+ html_doc.getElementsByName("librejs-path")[i].outerHTML = html_doc.getElementsByName("librejs-path")[i].innerHTML;
}
}
diff --git a/test.js b/test.js
index d752237..1302d90 100644
--- a/test.js
+++ b/test.js
@@ -834,50 +834,22 @@ var fname_data = {
};
-var DEBUG = true;
+//************************Comes from HTML file index.html's script test.js****************************
+
/**
-* Loop over a function and get a list of things being called.
+* If this is true, it evaluates entire scripts instead of returning as soon as it encounters a violation.
*
-* Tests to see if the function calls itself.
-*
-* Note: If it is an anonmyous function, recursion isn't possible.
+* Also, it controls whether or not this part of the code logs to the console.
*
*/
-function get_function_names(input_node_src,scope){
- var func_name = "";
- var flag = true;
-
- // The name of the function can't appear anywhere.
- // No bracket suffix notation either.
- console.log("Searching for identifier '"+scope+"'");
-
- var tokens = acorn_base.tokenizer(input_node_src);
- var toke = tokens.getToken();
- while(toke.type != acorn_base.tokTypes.eof){
- if(toke.type.label == "name" && scope == toke.value){
- return true;
- }
- toke = tokens.getToken();
- }
-
- return false;
-}
+var DEBUG = true;
+console.log("DEBUG:"+DEBUG);
-function test_function_name(name){
- var res = fname_data[name];
- if(res == false){
- console.log("'"+name+"'"+" is trivial.");
- return false;
- }
- if(res == true){
- console.log("%cNONTRIVIAL:'"+name+"'"+" is non-trivial.","color:red");
- return true;
+function dbg_print(a,b){
+ if(DEBUG == true){
+ console.log(a,b)
}
- console.log("%cNONTRIVIAL:'"+name+"'"+" is probably user defined.","color:red");
- return false;
-
}
-//746
function full_evaluate(script){
var res = true;
@@ -889,121 +861,109 @@ function full_evaluate(script){
var flag = false;
var amtloops = 0;
- // COUNTS LOOPS AND CONDITIONALS
- walk.simple(ast, {
- ForInStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- ForStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- DoWhileStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- WhileStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- IfStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- },
- SwitchStatement(node){
- if(amtloops > 3){return;}
- amtloops++;
- }
- });
+ var loopkeys = {"for":true,"if":true,"while":true,"switch":true};
+ var tokens = acorn_base.tokenizer(script);
+ var toke = tokens.getToken();
- if(amtloops > 3){
- console.log("%c NONTRIVIAL: Too many loops/conditionals.","color:red");
- if(DEBUG == false){
+ /**
+ * Given the end of an identifer token, it tests for bracket suffix notation
+ */
+ function being_called(end){
+ var i = 0;
+ while(script.charAt(end+i).match(/\s/g) !== null){
+ i++;
+ if(i >= script.length-1){
+ return false;
+ }
+ }
+ if(script.charAt(end+i) == "("){
+ return true;
+ }else{
+ return false;
+ }
+ }
+ /**
+ * Given the end of an identifer token, it tests for parentheses
+ */
+ function is_bsn(end){
+ var i = 0;
+ while(script.charAt(end+i).match(/\s/g) !== null){
+ i++;
+ if(i >= script.length-1){
+ return false;
+ }
+ }
+ if(script.charAt(end+i) == "["){
+ return true;
+ }else{
return false;
- }
+ }
}
- // Detect which objects are referenced and which functions are called
- // Only cares about top level objects. Window is special because we will test its methods.
- var nontrivial = false;
+ while(toke.type != acorn_base.tokTypes.eof){
+ console.log(toke);
+ if(toke.type.keyword !== undefined){
+ // This type of loop detection ignores functional loop alternatives and ternary operators
+ dbg_print("Keyword:"+toke.type.keyword);
- // Has a while loop to cut down on recursion when not needed
- function read(lnode){
- while(true){
- // window.test()
- if(lnode.type == "CallExpression"){
- if(lnode.property !== undefined){
- last_name = lnode.property.name;
+ if(toke.type.keyword == "function"){
+ dbg_print("%c NONTRIVIAL: Function declaration.","color:red");
+ if(DEBUG == false){
+ return false;
+ }
+ }
+
+ if(loopkeys[toke.type.keyword] !== undefined){
+ amtloops++;
+ if(amtloops > 3){
+ dbg_print("%c NONTRIVIAL: Too many loops/conditionals.","color:red");
+ if(DEBUG == false){
+ return false;
+ }
}
- lnode = lnode.callee;
- // window.test
- }else if(lnode.type == "MemberExpression"){
- last_name = lnode.property.name;
- // This may be bracket suffix notation
- lnode = lnode.object;
- // We should be at the first in the chain.
- }else if(lnode.type == "Identifier"){
- // Since window is the global object, it is special
- if(lnode.name == "window"){
- return test_function_name(last_name);
- } else{
- return test_function_name(lnode.name);
+ }
+ }else if(toke.value !== undefined){
+ var status = fname_data[toke.value];
+ if(status === true){ // is the identifier banned?
+ dbg_print("%c NONTRIVIAL: nontrivial token: '"+toke.value+"'","color:red");
+ if(DEBUG == false){
+ return false;
+ }
+ }else if(status === false){// is the identifier not banned?
+ // Is there bracket suffix notation?
+ if(is_bsn(toke.end)){
+ dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red");
+ if(DEBUG == false){
+ return false;
+ }
}
- }else if(lnode.type == "BinaryExpression"){
- // recurse on left side
- if(read(lnode.left) == true){
- return true;
+ }else if(status === undefined){// is the identifier user defined?
+ // Are arguments being passed to a user defined variable?
+ if(being_called(toke.end)){
+ dbg_print("%c NONTRIVIAL: User defined variable '"+toke.value+"' called as function","color:red");
+ if(DEBUG == false){
+ return false;
+ }
}
- // continue on right side
- last_name = lnode.operator;
- // This may be bracket suffix notation
- lnode = lnode.right;
- }else if(lnode.type == "AssignmentExpression"){
- // recurse on left side
- if(read(lnode.left) == true){
- return true;
+ // Is there bracket suffix notation?
+ if(is_bsn(toke.end)){
+ dbg_print("%c NONTRIVIAL: Bracket suffix notation on variable '"+toke.value+"'","color:red");
+ if(DEBUG == false){
+ return false;
+ }
}
- // continue on right side
- last_name = lnode.operator;
- // This may be bracket suffix notation
- lnode = lnode.right;
- }else if(lnode.type == "FunctionExpression"){
- console.log("%cDefines a function.","color:red");
- return true;
}else{
- console.log("Unrecognized:");
- console.log(lnode);
- return;
- }
- //console.log(last_name+":"+lnode.name);
- }
-
- }
-
- walk.simple(ast, {
- ExpressionStatement(node){
- if(nontrivial == true && DEBUG == false){
- return;
+ dbg_print("trivial token:"+toke.value);
}
- // Get the first thing in the expression
- if(node === undefined){
- return;
- }
- var lnode = node.expression;
- var last_name = "";
- console.log("%cReading ExpressionStatement","color:green");
- read(lnode,last_name);
- }, FunctionExpression(node){
- //
}
- });
-
- if(nontrivial == true){
- return false;
+ // If not a keyword or an identifier it's some kind of operator, field parenthesis, brackets
+ toke = tokens.getToken();
}
+ dbg_print("%cAppears to be trivial.","color:green;");
return true;
}
+//****************************************************************************************************
+
window.onload = function () {
document.getElementById("parse").addEventListener("click",function(){
var script = document.getElementById("input").value;
@@ -1011,6 +971,5 @@ window.onload = function () {
document.getElementById("output").innerHTML = JSON.stringify(ast, null, '\t'); // Indented with tab
document.getElementById("output").innerHTML = full_evaluate(script) + "\n\n" + document.getElementById("output").innerHTML;
});
-
}