diff options
author | Dafydd Harries <daf@rhydd.org> | 2013-03-18 00:23:41 -0400 |
---|---|---|
committer | Dafydd Harries <daf@rhydd.org> | 2013-03-18 00:23:41 -0400 |
commit | 9cedcd07d6ea19520a5d5ddf140fc6601d4e573d (patch) | |
tree | bbc03bda4f1efd59ad2feef20e973c467c2a4483 | |
parent | d38193b534327499fe9fa260a32c9458a5b09e0b (diff) |
add tool to import package descriptions
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | load_descriptions.py | 12 |
2 files changed, 16 insertions, 0 deletions
@@ -8,6 +8,10 @@ Loading data from package files: Packages files can be obtained from Debian mirrors, and are cached by APT in /var/lib/apt/lists. +Loading package descriptions: + + $ pv .../Translation-en | python load_descriptions.py + Loading data from copyright files: $ python load_copyright.py main/*/*/current/copyright | tee cp_import.log diff --git a/load_descriptions.py b/load_descriptions.py new file mode 100644 index 0000000..36d6257 --- /dev/null +++ b/load_descriptions.py @@ -0,0 +1,12 @@ + +import sys + +import debian.deb822 +import pandas as pd + +descs = debian.deb822.Packages.iter_paragraphs(sys.stdin) +df = pd.DataFrame([dict(p) for p in descs]) +store = pd.HDFStore('pkg.h5') +store['descriptions'] = df +store.close() + |