diff options
author | david <public@beloved.name> | 2018-06-20 14:03:06 +0200 |
---|---|---|
committer | david <public@beloved.name> | 2018-06-20 14:03:06 +0200 |
commit | 222f7c16acc41bc94b3acafbe719442c8131a73d (patch) | |
tree | 0df5ad08296b499d8f1735ca1ef6ad1fce16a542 /addons.mozilla.org-fsd/get-data/license-reporter | |
parent | 5745506c059da79b2c7afb98fab4e59a9080644e (diff) |
Updated addons.mozilla.org-fsd to generate wiki text.
Diffstat (limited to 'addons.mozilla.org-fsd/get-data/license-reporter')
-rwxr-xr-x | addons.mozilla.org-fsd/get-data/license-reporter | 363 |
1 files changed, 312 insertions, 51 deletions
diff --git a/addons.mozilla.org-fsd/get-data/license-reporter b/addons.mozilla.org-fsd/get-data/license-reporter index 5298037..3a73598 100755 --- a/addons.mozilla.org-fsd/get-data/license-reporter +++ b/addons.mozilla.org-fsd/get-data/license-reporter @@ -34,7 +34,7 @@ if [ "$2" == "--debug" ]; then debug=true; else debug=false; fi -minimum_average_daily_users="1000000"; +minimum_average_daily_users="100000"; SCRIPTSRC=$(readlink -f "$0" || echo "$0") RUN_PATH=$(dirname "${SCRIPTSRC}" || echo .) @@ -69,6 +69,20 @@ function check_files { } +function line_status { + + + if [ "$file" == "$previous_file" ]; then + + ((line++)) + + else + + line="0"; # Should not be 1 + + fi + +} case "$1" in @@ -83,14 +97,24 @@ OPTIONS $0 --make-repository-list Downloads add-on data for free add-ons from collections shared by defined users in license-reporter-repositories. Example: https://addons.mozilla.org/en-US/firefox/collections/mozilla/ - $0 --make-collection-list $0 --make-custom-list $0 --make-search-list + $0 --get-licenses $0 --merge-lists + $0 --generate-proposed-list $0 --download-licenses-json $0 --download-free-webextensions-for-gnu-and-linux $0 --verify-license-copy + + --all-recommended + $0 --fresh-build + $0 --make-search-list + $0 --merge-lists + $0 --get-licenses + $0 --merged-free + $0 --make-wiki + " && exit 1 ;; @@ -224,15 +248,18 @@ OPTIONS page="1" - + function foo() { # Do not evaluate the number of extensions seen in https://addons.mozilla.org/en-US/firefox/search/ in a Quantum based browser since that will hide legacy add-ons. - uri_query="page=$page&platform=linux&sort=users&type=extension&tag=firefox57"; + uri_query="page=$page&platform=linux&sort=users&type=extension"; + # Get the most popular WebExtensions wget "https://addons.mozilla.org/api/v3/addons/search/?$uri_query" -O "index.html?$uri_query.json" + ((page++)) + } @@ -244,7 +271,6 @@ OPTIONS while [ "$(jq ".results[].average_daily_users" "index.html?$uri_query.json" | tail -n 1)" -gt "$minimum_average_daily_users" ]; do - ((page++)) foo done @@ -256,14 +282,59 @@ OPTIONS find index*.json -type f -printf "%Tc %p\n" | sort -n | awk '{print $NF}' | while read -r file; do - line="0"; + + + line="0"; # Should not be 1 for average_daily_users in $(jq ".results[].average_daily_users" "$file"); do + + + if [ "$(jq ".results[$line].has_eula" "$file")" == "false" ]; then + + has_eula="eulafree"; + + else + + has_eula="eula"; + + fi + + + # Clean titles + + name=$(jq .results[$line].name $RUN_PATH/build/json/$file | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + + if [ "$name" == "null" ]; then + + name=$(jq .results[$line].name $RUN_PATH/build/json/$file | sed "s|en-GB|enGB|;" | jq .enGB | sed "s|^\"||; s|\"$||;"); + + fi + + + # Clean titles. Example of bad titles: "Ghostery – Privacy Ad Blocker" + + # - + # Hard to filter out "-" since many add-ons use it, see for example https://addons.mozilla.org/en-US/firefox/search/?platform=linux&q=1-click + # Flash Video Downloader - YouTube HD Download [4K] + + name=$(echo $name | sed ' +s/ — /\n/g; +s/ – /\n/g; +s/ - /\n/g; # looks identical to the prevoius line but is unique +s/: /\n/g; +s/(/\n/g; +s/,/\n/g; +' | head -n 1 | sed 's/ $//'); + + + + + # Exclude add-ons with to low average daily users from index.html* if [ "$average_daily_users" -ge "$minimum_average_daily_users" ]; then - echo -e "$average_daily_users\t$(jq ".results[$line].id" "$file")\t$file\t$(jq ".results[$line].slug" "$file" | sed "s|^\"||; s|\"$||;")" >> ../merged-search.txt + echo -e "$average_daily_users\t$(jq ".results[$line].id" "$file")\t$file\t$(jq ".results[$line].slug" "$file" | sed "s|^\"||; s|\"$||;")\t$name\t$has_eula" >> ../merged-search.txt fi @@ -290,17 +361,22 @@ OPTIONS cd build/ || exit mkdir -p json/current_versions cd json/current_versions || exit + + line="0"; # Should not be 1 - line="0"; while IFS= read -r table; do - ((line++)) - id=$(echo $table | awk '{print $2}'); file="$RUN_PATH/build/json/"$(echo $table | awk '{print $3}'); + line_status + + # echo "$file $line $id" + wget -nc "https://addons.mozilla.org/api/v3/addons/addon/$(jq ".results[$line].slug" "$file" | sed "s|^\"||; s|\"$||;")/versions/$(jq ".results[$line].current_version.id" "$file")/" -O "$id.json" + previous_file="$file"; + done < $RUN_PATH/build/MERGED-ALL.txt ;; @@ -309,75 +385,254 @@ OPTIONS cd build/ || exit cp -a MERGED-ALL.txt MERGED-ALL-freedom_status.txt - line="0"; + line="1"; # Should not be "0" while IFS= read -r table; do - ((line++)) - id=$(echo $table | awk '{print $2}'); - license=$(jq .license.name $RUN_PATH/build/json/current_versions/$id.json | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); - + license=$(jq .license.name "$RUN_PATH/build/json/current_versions/$id.json" | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + # This is the complete list of pre-defined licenses listed on AMO. Apache License 2.0 is not on the list so its not easy to detect because it has to be released under a "Custom License". + # See https://addons.mozilla.org/en-US/developers/addon/<your add-on>/ownership for the current add-on list. Note that not all licenses are added to the list. + # DuckDuckGo Privacy Essentials is distributed under Apache License 2.0. + # Apache License, version 2.0 is not on the add-on license list - https://github.com/mozilla/addons-server/issues/8545 if - [ "$license" == "GNU General Public License, version 3.0" ] || + [ "$license" == "Apache License, Version 2.0" ] || # Custom license + [ "$license" == "Apache License, version 2.0" ] || # Custom license + [ "$license" == "BSD License" ] || + [ "$license" == "GNU General Public License, version 3.0" ] || [ "$license" == "GNU General Public License, version 2.0" ] || [ "$license" == "GNU Lesser General Public License, version 3.0" ] || [ "$license" == "GNU Lesser General Public License, version 2.1" ] || + [ "$license" == "MIT/X11 License" ] || [ "$license" == "Mozilla Public License, version 2.0" ] || - [ "$license" == "Mozilla Public License Version 1.1" ] || - [ "$license" == "BSD License" ] || - [ "$license" == "MIT/X11 License" ] + [ "$license" == "Mozilla Public License Version 1.1" ] then # The nonfree add-ons should not be removed since the line number must correspond with the entry number in the search result JSON files. sed -i "$line""s/$/\\tfree/" MERGED-ALL-freedom_status.txt - else + elif [ "$license" == "All Rights Reserved" ]; then + sed -i "$line""s/$/\\tnonfree/" MERGED-ALL-freedom_status.txt + else + + sed -i "$line""s/$/\\tunknown/" MERGED-ALL-freedom_status.txt + fi + # Add license for analysis + # don't use "/" as delimeters since some licenses are "MIT/X11 License" + sed -i "$line""s|$|\\t$license|" MERGED-ALL-freedom_status.txt + + ((line++)) + done < $RUN_PATH/build/MERGED-ALL.txt ;; - --generate-proposed-list) + --make-wiki) - line="0"; + cd build/ || exit + if [ ! -d "wiki" ]; then mkdir wiki; fi + + echo "{| class=\"wikitable sortable\" border=\"1\" style=\"font-size:smaller\" +! WebExtension +! Description +! Claimed license +! Users +! Rating +! Updated" > "wiki/IceCat WebExtensions (proposed).wiki" + + + unset file while IFS= read -r table; do - ((line++)) + if [ -z ${file+x} ] || [ "$file" != "$(echo $table | awk '{print $3}')" ]; then + + line="0"; # should not be 1 + + fi + average_daily_users=$(echo $table | awk '{print $1}'); id=$(echo $table | awk '{print $2}'); - source=$(echo $table | awk '{print $3}'); + file=$(echo $table | awk '{print $3}'); + name=$(echo "$table" | cut -f 5); + has_eula=$(echo "$table" | cut -f 6); + freedom_status=$(echo "$table" | cut -f 7); + license=$(jq .license.name $RUN_PATH/build/json/current_versions/$id.json | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); - freedom_status=$(echo $table | awk '{print $5}'); - name=$(jq .results[$line].name $RUN_PATH/build/json/$source | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + firefox_max_version="$(jq .results[$line].current_version.compatibility.firefox.max $RUN_PATH/build/json/$file | sed "s|^\"||; s|\"$||;" | sed "s|.\*||")"; + - if [ "$name" == "null" ]; then + # Values are either an integer or "*" + re='^[0-9]+$' + if [[ $firefox_max_version =~ $re ]] && [ "$firefox_max_version" -ge 60 ] || [ "$firefox_max_version" == '*' ]; then - name=$(jq .results[$line].name $RUN_PATH/build/json/$source | sed "s|en-GB|enGB|;" | jq .enGB | sed "s|^\"||; s|\"$||;"); +# echo "$firefox_max_version" + firefox_max_version__pass=true; + + else + unset firefox_max_version__pass + fi + + if [ "$freedom_status" == "free" ] && [ "$has_eula" == "eulafree" ] && [ "$firefox_max_version__pass" = true ]; then - # Clean titles. Example of bad titles: "Ghostery – Privacy Ad Blocker" - if [[ $name = *"–"* ]]; then + # require min version 60 ddd - name=$(echo $name | sed 's/–/\n/g; s/-/\n/g' | head -n 1); + #################################### + # Build the core wiki pages - fi + average_daily_users=$(echo $table | awk '{print $1}'); + id=$(echo $table | awk '{print $2}'); + file=$(echo $table | awk '{print $3}'); + name=$(echo "$table" | cut -f 5); + has_eula=$(echo "$table" | cut -f 6); + freedom_status=$(echo "$table" | cut -f 7); + + license=$(jq .license.name $RUN_PATH/build/json/current_versions/$id.json | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + + + if [ "$license" == "Apache License, Version 2.0" ]; then license="Apache2.0"; + elif [ "$license" == "Apache License, version 2.0" ]; then license="Apache2.0"; + elif [ "$license" == "BSD License" ]; then license="BSD 2Clause"; + elif [ "$license" == "GNU General Public License, version 3.0" ]; then license="GPLv3"; + elif [ "$license" == "GNU General Public License, version 2.0" ]; then license="GPLv2"; + elif [ "$license" == "GNU Lesser General Public License, version 3.0" ]; then license="LGPLv3"; + elif [ "$license" == "GNU Lesser General Public License, version 2.1" ]; then license="LGPLv2.1"; + elif [ "$license" == "MIT/X11 License" ]; then license="X11"; + elif [ "$license" == "Mozilla Public License, version 2.0" ]; then license="MPLv2.0"; + elif [ "$license" == "Mozilla Public License Version 1.1" ]; then license="MPLv1.1"; + fi - if [ "$freedom_status" == "free" ]; then - echo "{{Checkme item -|Section= -|Package=$name -|Package note= -}}" + + + + short_description=$(jq .results[$line].summary $RUN_PATH/build/json/$file | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;" | sed "s|\\\n|\n|g; s|\\\||g"); + +# echo "$short_description" + + full_description=$(jq .results[$line].description $RUN_PATH/build/json/$file | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + homepage_url=$(jq .results[$line].homepage $RUN_PATH/build/json/$file | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + version=$(jq .results[$line].current_version.version $RUN_PATH/build/json/$file | sed "s|^\"||; s|\"$||;"); + version_date=$(jq .results[$line].last_updated $RUN_PATH/build/json/$file | sed "s|^\"||; s|\"$||;" | sed "s|T|\n|" | head -n 1 | sed "s|-|/|g"); + version_id=$(jq .results[$line].current_version.id $RUN_PATH/build/json/$file); + license_copyright=$(jq .results[$line].authors[].name $RUN_PATH/build/json/$file | sed "s|^\"||; s|\"$||;"); + bayesian_average=$(jq .results[$line].ratings.bayesian_average $RUN_PATH/build/json/$file); + bayesian_average__simple=$(printf "%.1f" "$(echo "$bayesian_average" | sed "s|\.|,|")"); + + last_review_date=$(date +"%Y/%m/%d"); + last_review_by="wikisysbot"; + submitted_by="wikisysbot"; + + if [[ "$(jq .results[$line].current_version.compatibility.seamonkey $RUN_PATH/build/json/$file)" != "null" ]]; then + set_seamonkey="Iceape,"; + fi + + if [[ "$(jq .results[$line].current_version.compatibility.firefox $RUN_PATH/build/json/$file)" != "null" ]]; then + set_icecat="IceCat,"; + fi + + # if [[ "$(jq .results[$line].current_version.compatibility.android $RUN_PATH/build/json/$file)" != "null" ]] || [[ "$(jq .results[$line].current_version.compatibility.mobile $RUN_PATH/build/json/$file)" != "null" ]]; then + # set_icecatmobile="IceCatMobile,"; + # fi + + if [[ "$(jq .results[$line].current_version.compatibility.thunderbird $RUN_PATH/build/json/$file)" != "null" ]]; then + set_thunderbird="Icedove,"; + fi + + # Do not list IceCatMobile + extension_of=$(echo "$set_seamonkey$set_icecat$set_thunderbird" | sed "s|,$||;"); + + + + github_true=$(echo "$homepage_url" | grep "github.com"); + + if [[ $github_true != "" ]]; then + + # Always use https, and remove anchors + homepage_url__for__vcs_checkout_command="https://github.com$(echo $github_true | sed "s|https://github.com||; s|http://github.com||; s|#|\n|;" | head -n 1)"; + # Remove trailing slash + homepage_url__for__vcs_checkout_command="${homepage_url__for__vcs_checkout_command%/}" + vcs_checkout_command="git clone $homepage_url__for__vcs_checkout_command.git"; + fi + + if [ "$name" == "GNU LibreJS" ]; then + + is_gnu="Yes"; + + else + + is_gnu="No"; + + fi + + support_url=$(jq .results[$line].support_url $RUN_PATH/build/json/$file | sed "s|en-US|enUS|;" | jq .enUS | sed "s|^\"||; s|\"$||;"); + + # List JSON structure: cat json/$file | js '.' + # XML value "homepage" is not always set in AMO API, but the XML require "Homepage URL" to be set: Therfore we use the AMO page itself in Homepage URL to complete the build of the repo automatically. + echo "{{Entry" > wiki/$id.wiki + + # Dash have to be removed in jq 1.3: https://github.com/stedolan/jq/issues/341 + echo "|Name=$name +|Short description=$short_description +|Full description=$full_description +|Homepage URL=$homepage_url +|Extension of=$extension_of +|VCS checkout command=$vcs_checkout_command +|Version identifier=$version +|Version date=$version_date +|Version download=https://addons.mozilla.org/firefox/downloads/latest/$id/addon-$version_id-latest.xpi +|Last review by=$last_review_by +|Last review date=$last_review_date +|Submitted by=$submitted_by +|Submitted date=2018/06/20 +|Is GNU=$is_gnu +}}" >> wiki/$id.wiki + + + # https://addons.mozilla.org/api/v3/addons/addon/noscript/versions/1910123/ + echo "{{Project license +|License=$license +|License copyright=$license_copyright +}}" >> wiki/$id.wiki + + if [[ "$support_url" != "" ]]; + then + + echo "{{Resource +|Resource audience=Users +|Resource kind=Support +|Resource URL=$support_url +}}" >> "wiki/$id.wiki" + + fi + + + echo "|- +| [[$name]] +| $short_description +| [[License:$license|$license]] +| $average_daily_users +| $bayesian_average__simple +| $version_date" >> "wiki/IceCat WebExtensions (proposed).wiki" + + + + + fi + + ((line++)) + done < $RUN_PATH/build/MERGED-ALL-freedom_status.txt + echo "|}" >> "wiki/IceCat WebExtensions (proposed).wiki" + ;; --download-free-webextensions-for-gnu-and-linux) @@ -393,8 +648,8 @@ OPTIONS ((line++)) - # Add-ons not avalible for GNU/Linux will be ignored (used to be very few dough). - wget -nc "https://addons.mozilla.org/firefox/downloads/latest/$slug/addon-$id-latest.xpi" || wget -nc "https://addons.mozilla.org/firefox/downloads/latest/$slug/platform:2/addon-$id-latest.xpi" + # Add-ons not avalible for GNU/Linux will be ignored (used to be very few dough). + wget -nc "https://addons.mozilla.org/firefox/downloads/latest/$slug/addon-$id-latest.xpi" || wget -nc "https://addons.mozilla.org/firefox/downloads/latest/$slug/platform:2/addon-$id-latest.xpi" done < ../license.name.enUS.txt @@ -516,18 +771,24 @@ $count_reports reports to submit"; ;; - --all) - - $0 --fresh-build "$2" - $0 --make-repository-list "$2" - $0 --make-collection-list "$2" - $0 --make-custom-list "$2" - $0 --make-search-list "$2" - $0 --merge-lists "$2" - # $0 --download-licenses-json "$2" - # $0 --download-free-webextensions-for-gnu-and-linux "$2" - # $0 --verify-license-copy "$2" + --make-xml) + # Deprecated. + # sha1sum file.xml + # However. It's not possible to insert the checsum in <sha1> for the page since it will break the checksum itself. + # Get number of edits: + revid="$(wget -qO- "https://directory.fsf.org/w/api.php?action=query&list=recentchanges&rclimit=1&format=json" | jq .query.recentchanges[0].revid)"; + xml_header="$(wget -qO- "https://directory.fsf.org/wiki/Special:Export/Main_Page" | head -n 42)"; + + ;; + + --all-recommended) + $0 --fresh-build "$2" + $0 --make-search-list "$2" + $0 --merge-lists "$2" + $0 --get-licenses "$2" + $0 --merged-free "$2" + $0 --make-wiki "$2" ;; |