From dbfe1fb86719a18364f24bff57781aeb3262cb90 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Thu, 13 Oct 2022 15:48:44 +1100 Subject: fixing a few problems with @license / @license-end regexes - trailing whitespace, including newlines are consumed by the opneing regex (added a test case) - added a test case for /* @license ... */ ... /* @license-end */ --- common/checks.js | 35 ++++++++++++++++++++++++----------- test/spec/LibreJSSpec.js | 12 ++++++++++++ 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/common/checks.js b/common/checks.js index 01765e4..bf90130 100644 --- a/common/checks.js +++ b/common/checks.js @@ -56,8 +56,13 @@ const LOOPKEYS = new Set(['for', 'if', 'while', 'switch']); const OPERATORS = new Set(['||', '&&', '=', '==', '++', '--', '+=', '-=', '*']); // @license match, second and third capture groups are canonicalUrl // and license name -const OPENING_LICENSE_RE = /^\s*\/[/*]\s*?(@license)\s+(\S+)\s+(\S+).*$/mi; -const CLOSING_LICENSE_RE = /^\s*\/([*/])\s*@license-end\s*(\*\/)?/mi; +// Caveat: will not work in a commented out star comments: +// '// /* @license */ ... /* @license-end */' will be checked, though +// the whole thing is a comment +const OPENING_LICENSE_RE1 = /^\s*\/\/\s*@license\s+(\S+)\s+(\S+).*$/mi; +const OPENING_LICENSE_RE2 = /\/\*\s*?@license\s+(\S+)\s+([^/*]+).*\*\//mi; +const CLOSING_LICENSE_RE1 = /^\s*\/\/\s*@license-end\s*/mi; +const CLOSING_LICENSE_RE2 = /\/\*\s*@license-end\s*\*\//mi; /** * If this is true, it evaluates entire scripts instead of returning as soon as it encounters a violation. * @@ -383,7 +388,13 @@ function checkScriptSource(scriptSrc, name, external = false) { // Consume inSrc by checking licenses in all @license / @license-end // blocks and triviality outside these blocks while (inSrc) { - const openingMatch = OPENING_LICENSE_RE.exec(inSrc); + const openingMatch1 = OPENING_LICENSE_RE1.exec(inSrc); + const openingMatch2 = OPENING_LICENSE_RE2.exec(inSrc); + const openingMatch = + (openingMatch1 && openingMatch2) ? + (openingMatch1.index < openingMatch2.index ? openingMatch1 + : openingMatch2) + : (openingMatch1 || openingMatch2); const openingIndex = openingMatch ? openingMatch.index : inSrc.length; // checks the triviality of the code before the license tag, if any checkTriviality(inSrc.substring(0, openingIndex)); @@ -391,21 +402,23 @@ function checkScriptSource(scriptSrc, name, external = false) { if (!inSrc) break; // checks the remaining part, that starts with an @license - const closureMatch = CLOSING_LICENSE_RE.exec(inSrc); + const closureMatch1 = CLOSING_LICENSE_RE1.exec(inSrc); + const closureMatch2 = CLOSING_LICENSE_RE2.exec(inSrc); + const closureMatch = + (closureMatch1 && closureMatch2) ? + (closureMatch1.index < closureMatch2.index ? closureMatch1 + : closureMatch2) + : (closureMatch1 || closureMatch2); if (!closureMatch) { const msg = 'ERROR: @license with no @license-end'; return [false, `\n/*\n ${msg} \n*/\n`, msg]; } - let closureEndIndex = closureMatch.index + closureMatch[0].length; - const commentEndOffset = inSrc.substring(closureEndIndex).indexOf(closureMatch[1] === '*' ? '*/' : '\n'); - if (commentEndOffset !== -1) { - closureEndIndex += commentEndOffset; - } + const closureEndIndex = closureMatch.index + closureMatch[0].length; - if (!(Array.isArray(openingMatch) && openingMatch.length >= 4)) { + if (!(Array.isArray(openingMatch) && openingMatch.length >= 3)) { return [false, 'Malformed or unrecognized license tag.']; } - const licenseName = checkMagnet(openingMatch[2]); + const licenseName = checkMagnet(openingMatch[1]); let message; if (licenseName) { outSrc += inSrc.substr(0, closureEndIndex); diff --git a/test/spec/LibreJSSpec.js b/test/spec/LibreJSSpec.js index 7e21c24..bbd699c 100644 --- a/test/spec/LibreJSSpec.js +++ b/test/spec/LibreJSSpec.js @@ -41,12 +41,16 @@ describe('LibreJS\' components', () => { // code calling anything else is trivial const trivialCall = 'foo();'; const licensed = `// @license ${license.magnet} ${license.id}\n${nontrivial}\n// @license-end`; + const licensedStarCommented = `/* @license ${license.magnet} ${license.id} */${nontrivial}/* @license-end */`; const unknownLicensed = `// @license ${unknownLicense.magnet} ${unknownLicense.id}\n${nontrivial}\n// @license-end`; const commentedOutUnknownLicensed = unknownLicensed.split('\n').map(y => '// ' + y).join('\n'); const malformedLicensed = `// @license\n${nontrivial}`; const commentedOutMalformedLicensed = malformedLicensed.split('\n').map(y => '// ' + y).join('\n'); + const emptyLicensed = `// @license ${license.magnet} ${license.id}\n// @license-end`; + const licensedNontrivialTrivial = `${emptyLicensed}\n${nontrivial}\n${trivial}`; + let tab, documentUrl; const enableContactFinderTests = false; @@ -121,6 +125,8 @@ describe('LibreJS\' components', () => { it('should accept scripts with known free license tags', async () => { const processed = await processScript(licensed); expect(processed || licensed).toContain(nontrivial); + const processed1 = await processScript(licensedStarCommented); + expect(processed1 || licensed).toContain(nontrivial); }); it('should block scripts with unknown license tags', async () => { @@ -210,6 +216,12 @@ describe('LibreJS\' components', () => { expect(extractScripts(processed, nontrivial)).not.toContain(nontrivial); }); + it('should block the unlicensed nontrivial part sandwiched between licensed and trivial parts', async () => { + const modifiedHtml = addScript(html, licensedNontrivialTrivial); + const processed = await processHtml(modifiedHtml); + expect(extractScripts(processed, nontrivial)).not.toContain(nontrivial); + }); + it('should accept scripts on globally licensed pages', async () => { const globalLicense = `/* @licstart The following is the entire license notice for the JavaScript code in this page. -- cgit v1.2.3