diff options
Diffstat (limited to 'load_changelog.py')
-rw-r--r-- | load_changelog.py | 45 |
1 files changed, 0 insertions, 45 deletions
diff --git a/load_changelog.py b/load_changelog.py deleted file mode 100644 index eb65772..0000000 --- a/load_changelog.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from debian import changelog -import pandas as pd -import os -import sys -from dateutil import parser -import subprocess - -def import_one(pkgname, fh): - try: - c = changelog.Changelog(fh) - date = parser.parse(c.date).strftime('%Y-%m-%d') - df = pd.DataFrame([{'_srcpkg':c.package, 'version':c.version, 'date':date, 'author':c.author}]) - except: - return - return (df) - -def get_pkgname(path): - (dir, base) = os.path.split(path) - - if base in ('current', 'stable_changelog'): - return get_pkgname(dir) - else: - return base - -def main(): - versions = [] - for path in subprocess.check_output("find metadata.ftp-master.debian.org -name stable_changelog".split()).strip().split(): - pkgname = get_pkgname(path) - print pkgname, path - data = import_one(pkgname, file(path)) - - if data is not None: - versions.append(data) - - versions = pd.concat(versions) - print versions - store = pd.HDFStore('cl.h5') - store['cl_versions'] = versions - store.close() - -if __name__ == '__main__': - main() |