;;; radix-tree.el --- A simple library of radix trees  -*- lexical-binding: t; -*-

;; Copyright (C) 2016-2022 Free Software Foundation, Inc.

;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
;; Keywords:

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;; NOTE: This is a modified version of radix-tree that comes builtin
;; with emacs. It allows different compare functions and type. One use
;; is to build a radix tree of list of string, e.g. from a filesystem
;; hierarchy.

;; There are many different options for how to represent radix trees
;; in Elisp.  Here I chose a very simple one.  A radix-tree can be either:
;; - a node, of the form ((PREFIX . PTREE) . RTREE) where PREFIX is a string
;;   meaning that everything that starts with PREFIX is in PTREE,
;;   and everything else in RTREE.  It also has the property that
;;   everything that starts with the first letter of PREFIX but not with
;;   that whole PREFIX is not in RTREE (i.e. is not in the tree at all).
;; - anything else is taken as the value to associate with the empty string.
;; So every node is basically an (improper) alist where each mapping applies
;; to a different leading letter.
;;
;; The main downside of this representation is that the lookup operation
;; is slower because each level of the tree is an alist rather than some kind
;; of array, so every level's lookup is O(N) rather than O(1).  We could easily
;; solve this by using char-tables instead of alists, but that would make every
;; level take up a lot more memory, and it would make the resulting
;; data structure harder to read (by a human) when printed out.

;;; Code:
(defvar radix-tree-compare-function 'compare-strings)
(defvar radix-tree-type 'string)

(defun radix-tree--insert (tree key val i)
  (pcase tree
    (`((,prefix . ,ptree) . ,rtree)
     (let* ((ni (+ i (length prefix)))
            (cmp (funcall radix-tree-compare-function prefix nil nil key i ni)))
       (if (eq t cmp)
           (let ((nptree (radix-tree--insert ptree key val ni)))
             `((,prefix . ,nptree) . ,rtree))
         (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
           (if (zerop n)
               (let ((nrtree (radix-tree--insert rtree key val i)))
                 `((,prefix . ,ptree) . ,nrtree))
             (let* ((nprefix (substring prefix 0 n))
                    (kprefix (substring key (+ i n)))
                    (pprefix (substring prefix n))
                    (ktree (if (equal kprefix "") val
                             `((,kprefix . ,val)))))
               `((,nprefix
                  . ((,pprefix . ,ptree) . ,ktree))
                 . ,rtree)))))))
    (_
     (if (= (length key) i) val
       (let ((prefix (substring key i)))
         `((,prefix . ,val) . ,tree))))))

(defun radix-tree--remove (tree key i)
  (pcase tree
    (`((,prefix . ,ptree) . ,rtree)
     (let* ((ni (+ i (length prefix)))
            (cmp (funcall radix-tree-compare-function prefix nil nil key i ni)))
       (if (eq t cmp)
           (pcase (radix-tree--remove ptree key ni)
             ('nil rtree)
             (`((,pprefix . ,pptree))
              `((,(seq-concatenate radix-tree-type prefix pprefix) . ,pptree) .
                ,rtree))
             (nptree `((,prefix . ,nptree) . ,rtree)))
         (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
           (if (zerop n)
               (let ((nrtree (radix-tree--remove rtree key i)))
                 `((,prefix . ,ptree) . ,nrtree))
             tree)))))
    (_
     (if (= (length key) i) nil tree))))


(defun radix-tree--lookup (tree string i)
  (pcase tree
    (`((,prefix . ,ptree) . ,rtree)
     (let* ((ni (+ i (length prefix)))
            (cmp (funcall radix-tree-compare-function prefix nil nil string i ni)))
       (if (eq t cmp)
           (radix-tree--lookup ptree string ni)
         (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
           (if (zerop n)
               (radix-tree--lookup rtree string i)
             (+ i n))))))
    (val
     (if (and val (equal (length string) i))
         (if (integerp val) `(t . ,val) val)
       i))))

;; (defun radix-tree--trim (tree string i)
;;   (if (= i (length string))
;;       tree
;;     (pcase tree
;;       (`((,prefix . ,ptree) . ,rtree)
;;        (let* ((ni (+ i (length prefix)))
;;               (cmp (funcall radix-tree-compare-function prefix nil nil string i ni))
;;               ;; FIXME: We could compute nrtree more efficiently
;;               ;; whenever cmp is not -1 or 1.
;;               (nrtree (radix-tree--trim rtree string i)))
;;          (if (eq t cmp)
;;              (pcase (radix-tree--trim ptree string ni)
;;                (`nil nrtree)
;;                (`((,pprefix . ,pptree))
;;                 `((,(concat prefix pprefix) . ,pptree) . ,nrtree))
;;                (nptree `((,prefix . ,nptree) . ,nrtree)))
;;            (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
;;              (cond
;;               ((equal (+ n i) (length string))
;;                `((,prefix . ,ptree) . ,nrtree))
;;               (t nrtree))))))
;;       (val val))))

(defun radix-tree--prefixes (tree string i prefixes)
  (pcase tree
    (`((,prefix . ,ptree) . ,rtree)
     (let* ((ni (+ i (length prefix)))
            (cmp (funcall radix-tree-compare-function prefix nil nil string i ni))
            ;; FIXME: We could compute prefixes more efficiently
            ;; whenever cmp is not -1 or 1.
            (prefixes (radix-tree--prefixes rtree string i prefixes)))
       (if (eq t cmp)
           (radix-tree--prefixes ptree string ni prefixes)
         prefixes)))
    (val
     (if (null val)
         prefixes
       (cons (cons (substring string 0 i)
                   (if (eq (car-safe val) t) (cdr val) val))
             prefixes)))))

(defun radix-tree--subtree (tree string i)
  (if (equal (length string) i) tree
    (pcase tree
      (`((,prefix . ,ptree) . ,rtree)
       (let* ((ni (+ i (length prefix)))
              (cmp (funcall radix-tree-compare-function prefix nil nil string i ni)))
         (if (eq t cmp)
             (radix-tree--subtree ptree string ni)
           (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
             (cond
              ((zerop n) (radix-tree--subtree rtree string i))
              ((equal (+ n i) (length string))
               (let ((nprefix (substring prefix n)))
                 `((,nprefix . ,ptree))))
              (t nil))))))
      (_ nil))))

;;; Entry points

(defconst radix-tree-empty nil
  "The empty radix-tree.")

(defun radix-tree-insert (tree key val)
  "Insert a mapping from KEY to VAL in radix TREE."
  (when (consp val) (setq val `(t . ,val)))
  (if val (radix-tree--insert tree key val 0)
    (radix-tree--remove tree key 0)))

(defun radix-tree-lookup (tree key)
  "Return the value associated to KEY in radix TREE.
If not found, return nil."
  (pcase (radix-tree--lookup tree key 0)
    (`(t . ,val) val)
    ((pred numberp) nil)
    (val val)))

(defun radix-tree-subtree (tree string)
  "Return the subtree of TREE rooted at the prefix STRING."
  (radix-tree--subtree tree string 0))

;; (defun radix-tree-trim (tree string)
;;   "Return a TREE which only holds entries \"related\" to STRING.
;; \"Related\" is here defined as entries where there's a `string-prefix-p' relation
;; between STRING and the key."
;;   (radix-tree-trim tree string 0))

(defun radix-tree-prefixes (tree string)
  "Return an alist of all bindings in TREE for prefixes of STRING."
  (radix-tree--prefixes tree string 0 nil))

(pcase-defmacro radix-tree-leaf (vpat)
  "Pattern which matches a radix-tree leaf.
The pattern VPAT is matched against the leaf's carried value."
  ;; We used to use `(pred atom)', but `pcase' doesn't understand that
  ;; `atom' is equivalent to the negation of `consp' and hence generates
  ;; suboptimal code.
  `(or `(t . ,,vpat) (and (pred (not consp)) ,vpat)))

(defun radix-tree-iter-subtrees (tree fun)
  "Apply FUN to every immediate subtree of radix TREE.
FUN is called with two arguments: PREFIX and SUBTREE.
You can test if SUBTREE is a leaf (and extract its value) with the
pcase pattern (radix-tree-leaf PAT)."
  (while tree
    (pcase tree
      (`((,prefix . ,ptree) . ,rtree)
       (funcall fun prefix ptree)
       (setq tree rtree))
      (_ (funcall fun "" tree)
         (setq tree nil)))))

(defun radix-tree-iter-mappings (tree fun &optional prefix)
  "Apply FUN to every mapping in TREE.
FUN is called with two arguments: KEY and VAL.
PREFIX is only used internally."
  (radix-tree-iter-subtrees
   tree
   (lambda (p s)
     (let ((nprefix (seq-concatenate radix-tree-type prefix p)))
       (pcase s
         ((radix-tree-leaf v) (funcall fun nprefix v))
         (_ (radix-tree-iter-mappings s fun nprefix)))))))

;; (defun radix-tree->alist (tree)
;;   (let ((al nil))
;;     (radix-tree-iter-mappings tree (lambda (p v) (push (cons p v) al)))
;;     al))

(defun radix-tree-count (tree)
  (let ((i 0))
    (radix-tree-iter-mappings tree (lambda (_k _v) (setq i (1+ i))))
    i))

(declare-function map-apply "map" (function map))

(defun radix-tree-from-map (map)
  ;; Aka (cl-defmethod map-into (map (type (eql 'radix-tree)))) ...)
  (require 'map)
  (let ((rt nil))
    (map-apply (lambda (k v) (setq rt (radix-tree-insert rt k v))) map)
    rt))

(provide 'radix-tree)
;;; radix-tree.el ends here