diff options
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | load_descriptions.py | 12 |
2 files changed, 16 insertions, 0 deletions
@@ -8,6 +8,10 @@ Loading data from package files: Packages files can be obtained from Debian mirrors, and are cached by APT in /var/lib/apt/lists. +Loading package descriptions: + + $ pv .../Translation-en | python load_descriptions.py + Loading data from copyright files: $ python load_copyright.py main/*/*/current/copyright | tee cp_import.log diff --git a/load_descriptions.py b/load_descriptions.py new file mode 100644 index 0000000..36d6257 --- /dev/null +++ b/load_descriptions.py @@ -0,0 +1,12 @@ + +import sys + +import debian.deb822 +import pandas as pd + +descs = debian.deb822.Packages.iter_paragraphs(sys.stdin) +df = pd.DataFrame([dict(p) for p in descs]) +store = pd.HDFStore('pkg.h5') +store['descriptions'] = df +store.close() + |