aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDafydd Harries <daf@rhydd.org>2013-03-18 00:23:41 -0400
committerDafydd Harries <daf@rhydd.org>2013-03-18 00:23:41 -0400
commit9cedcd07d6ea19520a5d5ddf140fc6601d4e573d (patch)
treebbc03bda4f1efd59ad2feef20e973c467c2a4483
parentd38193b534327499fe9fa260a32c9458a5b09e0b (diff)
add tool to import package descriptions
-rw-r--r--README4
-rw-r--r--load_descriptions.py12
2 files changed, 16 insertions, 0 deletions
diff --git a/README b/README
index b412693..3eeb444 100644
--- a/README
+++ b/README
@@ -8,6 +8,10 @@ Loading data from package files:
Packages files can be obtained from Debian mirrors, and are cached by APT in
/var/lib/apt/lists.
+Loading package descriptions:
+
+ $ pv .../Translation-en | python load_descriptions.py
+
Loading data from copyright files:
$ python load_copyright.py main/*/*/current/copyright | tee cp_import.log
diff --git a/load_descriptions.py b/load_descriptions.py
new file mode 100644
index 0000000..36d6257
--- /dev/null
+++ b/load_descriptions.py
@@ -0,0 +1,12 @@
+
+import sys
+
+import debian.deb822
+import pandas as pd
+
+descs = debian.deb822.Packages.iter_paragraphs(sys.stdin)
+df = pd.DataFrame([dict(p) for p in descs])
+store = pd.HDFStore('pkg.h5')
+store['descriptions'] = df
+store.close()
+