diff options
| author | Yuchen Pei <hi@ypei.me> | 2022-07-27 11:48:07 +1000 | 
|---|---|---|
| committer | Yuchen Pei <hi@ypei.me> | 2022-07-27 11:48:07 +1000 | 
| commit | 428a42de4119f0e33a49f02c00b9dee49224b771 (patch) | |
| tree | fb36031a618082f6b2f3250c663752f741d0af3d /content | |
| parent | 076cae05a3b190782cb76a4073b596353d739116 (diff) | |
further linting and fixes of contact finder
Diffstat (limited to 'content')
| -rw-r--r-- | content/contactFinder.js | 80 | 
1 files changed, 40 insertions, 40 deletions
| diff --git a/content/contactFinder.js b/content/contactFinder.js index b7d95a0..fb0ec27 100644 --- a/content/contactFinder.js +++ b/content/contactFinder.js @@ -3,6 +3,7 @@  * *  * Copyright (C) 2017 Nathan Nichols, Loic J. Duros, Nik Nyby  * Copyright (C) 2018 Giorgio Maone +* Copyright (C) 2022 Yuchen Pei  *  * This file is part of GNU LibreJS.  * @@ -35,10 +36,6 @@  //********************************************************************************************* -//Regexes taken from "contact_regex.js" in the current LibreJS -//Copyright (C) 2011, 2012, 2014 Loic J. Duros -//Copyright (C) 2014, 2015 Nik Nyby -  function debug(format, ...args) {    console.debug(`LibreJS - ${format}`, ...args);  } @@ -50,9 +47,9 @@ debug("Injecting contact finder in %s", document.URL);   * Contains arrays of strings classified by language   * and by degree of certainty.   */ -const contactFrags = { -  // TODO: remove lang, change things to regexp -  'de': { +const contactFrags = [ +  // de +  {      'certain': [        '^[\\s]*Kontakt os[\\s]*$',        '^[\\s]*Email Os[\\s]*$', @@ -65,7 +62,8 @@ const contactFrags = {        'Hvem vi er'      ]    }, -  'en': { +  // en +  {      'certain': [        '^[\\s]*Contact Us[\\s]*$',        '^[\\s]*Email Us[\\s]*$', @@ -83,7 +81,8 @@ const contactFrags = {        'Customer Service'      ]    }, -  'es': { +  // es +  {      'certain': [        '^[\\s]*contáctenos[\\s]*$',        '^[\\s]*Email[\\s]*$' @@ -93,7 +92,8 @@ const contactFrags = {        'Acerca de nosotros'      ]    }, -  'fr': { +  // fr +  {      'certain': [        '^[\\s]*Contactez nous[\\s]*$',        '^[\\s]*(Nous )?contacter[\\s]*$', @@ -110,44 +110,45 @@ const contactFrags = {        'Service Client(e|è)le'      ]    } -}; +];  // Taken from http://emailregex.com/ -const email_regex = new RegExp(/(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])/g); +const emailRegex = new RegExp(/(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])/g);  //********************************************************************************************* +function findMatch(link, frag, first) { +  const strUnderTest = link.innerText + " " + link.href +  const result = (strUnderTest.match(new RegExp(frag, "g")) || []).filter(x => typeof x == "string"); +  if (result.length) { +    if (first) { +      return { 'final': true, 'matched': true }; +    } else { +      //console.log(link.href + " matched " + frag); +      return { 'final': false, 'matched': true }; +    } +  } +  return { 'final': false, 'matched': false }; +} +  /**  *	Tests all links on the page for regexes under a certain certainty level.  *  *	Will return either the first regex match from the selected certainty level or all regexes that  *	match on that certainty level.  * -*	certainty_lvl can be "certain" > "probable" > "uncertain" +*	certaintyLvl can be "certain" > "probable" > "uncertain"  */  function attempt(certaintyLvl, first = true) {    // There needs to be some kind of max so that people can't troll by for example leaving a comment with a bunch of emails    // to cause LibreJS users to slow down.    const matches = []; -  for (const link of document.links) { -    if (typeof (link.innerText) !== "string" || typeof (link.href) !== "string") { -      continue; -    } -    const strUnderTest = link.innerText + " " + link.href; -    const matched = false; -    for (const byLevel of Object.values(contactFrags)) { +  const links = Array.from(document.links).filter(link => (typeof (link.innerText) === "string" || typeof (link.href) === "string")); +  for (const link of links) { +    for (const byLevel of contactFrags) {        for (const frag of byLevel[certaintyLvl]) { -        if (!matched) { -          const result = (strUnderTest.match(new RegExp(frag, "g")) || []).filter(x => typeof x == "string"); -          if (result.length) { -            if (first) { -              return { "fail": false, "result": link }; -            } else { -              //console.log(link.href + " matched " + contactFrags[j][certainty_lvl][k]); -              matches.push(link); -              matched = true; -            } -          } -        } +        const match = findMatch(link, frag, first); +        if (match.final) return { 'fail': false, 'result': [link] }; +        match.matched && matches.push(link);        }      }    } @@ -158,7 +159,7 @@ function attempt(certaintyLvl, first = true) {  *	"LibreJS detects contact pages, email addresses that are likely to be owned by the  *	maintainer of the site, Twitter and identi.ca links, and phone numbers."  */ -function find_contacts() { +function findContacts() {    for (const type of ["certain", "probable", "uncertain"]) {      const attempted = attempt(type);      if (!attempted["fail"]) { @@ -208,7 +209,7 @@ function main() {    const initFrame = prefs => {      debug("initFrame"); -    const res = find_contacts(); +    const res = findContacts();      const contentDoc = frame.contentWindow.document;      const { body } = contentDoc;      body.id = "_LibreJS_dialog"; @@ -223,22 +224,21 @@ function main() {        addHTML("<div>Could not guess any contact page for this site.</div>");      } else {        addHTML("<h3>Contact info guessed for this site</h3>"); -      if (typeof (res[1]) === "string") { +      for (const link of res[1]) {          const a = contentDoc.createElement("a"); -        a.href = a.textContent = res[1]; +        a.href = link.href; +        a.textContent = link.textContent;          content.appendChild(a); -      } else if (typeof (res[1]) === "object") { -        addHTML(`${res[0]}: ${res[1].outerHTML}`);        }      } -    // TODO: check this change is ok, i.e. if the result of match is null filter still works -    const emails = (document.documentElement.textContent.match(email_regex) || []).filter(e => !!e); +    const emails = (document.documentElement.textContent.match(emailRegex) || []).filter(e => !!e);      if (emails.length) {        addHTML("<h5>Possible email addresses:</h5>");        const list = contentDoc.createElement("ul");        for (const recipient of emails.slice(0, 10)) {          const a = contentDoc.createElement("a"); +        // TODO: fix prefs          a.href = `mailto:${recipient}?subject${encodeURIComponent(prefs["pref_subject"])            }&body=${encodeURIComponent(prefs["pref_body"])            }`; | 
