From 9cedcd07d6ea19520a5d5ddf140fc6601d4e573d Mon Sep 17 00:00:00 2001 From: Dafydd Harries Date: Mon, 18 Mar 2013 00:23:41 -0400 Subject: add tool to import package descriptions --- README | 4 ++++ load_descriptions.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 load_descriptions.py diff --git a/README b/README index b412693..3eeb444 100644 --- a/README +++ b/README @@ -8,6 +8,10 @@ Loading data from package files: Packages files can be obtained from Debian mirrors, and are cached by APT in /var/lib/apt/lists. +Loading package descriptions: + + $ pv .../Translation-en | python load_descriptions.py + Loading data from copyright files: $ python load_copyright.py main/*/*/current/copyright | tee cp_import.log diff --git a/load_descriptions.py b/load_descriptions.py new file mode 100644 index 0000000..36d6257 --- /dev/null +++ b/load_descriptions.py @@ -0,0 +1,12 @@ + +import sys + +import debian.deb822 +import pandas as pd + +descs = debian.deb822.Packages.iter_paragraphs(sys.stdin) +df = pd.DataFrame([dict(p) for p in descs]) +store = pd.HDFStore('pkg.h5') +store['descriptions'] = df +store.close() + -- cgit v1.2.3