← Home

learnDataStructures

CSC/CPE 202

Trie (Prefix Tree)

What is a Trie?

A Trie (pronounced “try”), or prefix tree, stores a set of strings by sharing their prefixes. Each edge is labeled by a character; walking from the root and reading labels spells prefixes. We mark nodes where a word ends (a boolean flag, often called isWord or end).

  • In this page we restrict the alphabet to lowercase a..z and auto-lowercase user input.
  • Unlike a BST that compares whole keys, a Trie compares characters incrementally. The time depends on word length, not on the number of words.

Core operations (high level)

  • Insert(word): walk characters; create a child if missing; mark the last node as end=true.
  • Search(word) (exact): walk characters; return true iff you end at a node with end=true.
  • StartsWith(prefix): walk characters; return true if you didn’t fall off the tree.
  • Autocomplete(prefix, k): find the prefix node, then DFS/BFS to collect up to k words.
  • Delete(word) (this page): unmark the end flag; then prune any nodes that became useless (no children and not an end).

Complexities (alphabet size σ=26)

OperationTimeSpace (per insert)Notes
insert / search / startsWithO(L)O(L)L = length of word/prefix
autocomplete (k results)O(L + k·avgLen)Traverse prefix then collect
deleteO(L)Unmark + prune unused nodes
memoryO(total chars)Prefix sharing saves space; compare to HashSet strings

Variants

  • Standard Trie: each node has up to σ children (σ = size of alphabet, 26 for lowercase english letters, 128 for ASCII).
  • Compressed/Radix Trie: compress runs of single-child edges into one edge labeled by a string segment. Same queries; better memory.
  • Ternary Search Tree (TST): BST over characters in each node (char c; left < c < right; eq to advance). Space-efficient with sorted-array performance characteristics.

Trie — reference & pseudocode



        

Pseudocode (insert, search, startsWith, autocomplete, delete)


        
Alphabet: a..z (auto-lowercase) Delete = unmark + prune Autocomplete = DFS from prefix

Interactive Trie simulator

words: 0 · nodes: 0 · ops: 0

Implementations & Variants

Standard Trie (map children)

Python

Trie using dict children
class Node:
    __slots__ = ("end","ch")
    def __init__(self): self.end=False; self.ch={}
class Trie:
    def __init__(self): self.root=Node()
    def insert(self, w: str) -> None:
        u=self.root
        for c in w:
            if c not in u.ch: u.ch[c]=Node()
            u=u.ch[c]
        u.end=True
    def search(self, w: str) -> bool:
        u=self.root
        for c in w:
            u=u.ch.get(c); 
            if u is None: return False
        return u.end
    def startsWith(self, p: str) -> bool:
        u=self.root
        for c in p:
            u=u.ch.get(c)
            if u is None: return False
        return True
    def delete(self, w: str) -> None:
        def rec(u,i):
            if i==len(w):
                if not u.end: return False, False
                u.end=False
                return True, (not u.ch)
            c=w[i]
            v=u.ch.get(c)
            if v is None: return False, False
            existed, prune = rec(v,i+1)
            if prune: u.ch.pop(c, None)
            return existed, (not u.end and not u.ch)
        rec(self.root,0)

Java

Trie using HashMap<Character,Node>
import java.util.*;

class Trie {
  static class Node {
    boolean end;
    Map<Character,Node> ch=new HashMap<>();
  }
  private final Node root=new Node();

  public void insert(String w){
    Node u=root;
    for(char cc: w.toCharArray()){
      char c=(char)('a'+(cc-'a'));             // assume cleaned to a..z
      u.ch.putIfAbsent(c, new Node());
      u=u.ch.get(c);
    }
    u.end=true;
  }
  public boolean search(String w){
    Node u=root;
    for(char c: w.toCharArray()){
      u=u.ch.get(c);
      if(u==null) return false;
    }
    return u.end;
  }
  public boolean startsWith(String p){
    Node u=root;
    for(char c: p.toCharArray()){
      u=u.ch.get(c);
      if(u==null) return false;
    }
    return true;
  }
  public void delete(String w){
    rec(root,w,0);
  }
  private boolean rec(Node u, String w, int i){
    if(i==w.length()){
      if(!u.end) return false;
      u.end=false;
      return u.ch.isEmpty();
    }
    Node v=u.ch.get(w.charAt(i));
    if(v==null) return false;
    boolean prune = rec(v,w,i+1);
    if(prune) u.ch.remove(w.charAt(i));
    return !u.end && u.ch.isEmpty();
  }
}

C++

Trie using unordered_map<char,Node*>
#include <unordered_map>
#include <string>
using namespace std;

struct Node{
  bool end=false;
  unordered_map<char,Node*> ch;
};
struct Trie{
  Node* root = new Node();
  void insert(const string& w){
    Node* u=root;
    for(char c: w){
      if(!u->ch.count(c)) u->ch[c]=new Node();
      u=u->ch[c];
    }
    u->end=true;
  }
  bool search(const string& w){
    Node* u=root;
    for(char c: w){ auto it=u->ch.find(c); if(it==u->ch.end()) return false; u=it->second; }
    return u->end;
  }
  bool startsWith(const string& p){
    Node* u=root;
    for(char c: p){ auto it=u->ch.find(c); if(it==u->ch.end()) return false; u=it->second; }
    return true;
  }
  void erase(const string& w){ rec(root,w,0); }
  bool rec(Node* u, const string& w, int i){
    if(i==(int)w.size()){ if(!u->end) return false; u->end=false; return u->ch.empty(); }
    auto it=u->ch.find(w[i]); if(it==u->ch.end()) return false;
    bool prune = rec(it->second,w,i+1);
    if(prune) u->ch.erase(w[i]);
    return !u->end && u->ch.empty();
  }
};

Compressed (Radix) Trie

Edges carry strings instead of single chars. During traversal, we match the longest common prefix of the remaining query and the edge label.

Pseudocode (radix insert/search)
// Node has map<char, (label, child)>; label is string segment
insert(w):
  u = root
  i = 0
  while i < len(w):
    c = w[i]
    if no edge from u starting with c: add edge (w[i..], new node end=true); return
    (lab, v) = edge[u][c]
    k = LCP(lab, w[i..])
    if k == len(lab): u = v; i += k; continue
    // split edge at k
    mid = new node
    edge[u][c] = (lab[0..k), mid]
    edge[mid][lab[k]] = (lab[k..], v)
    if k == len(w)-i: mid.end = true
    else edge[mid][w[i+k]] = (w[i+k..], new node end=true)
    return
search(w):
  u=root; i=0
  while i < len(w):
    c=w[i]; if no edge[u][c]: return false
    (lab,v)=edge[u][c]
    if w[i..].startsWith(lab): i+=len(lab); u=v
    else return false
  return u.end

Ternary Search Tree (TST)

Each node stores one character c and three pointers: lo for characters < c, eq to advance to the next character (when equal), and hi for > c. Words end by setting end=true at the node that matched the final character.

Python TST
class TNode:
    __slots__=("c","lo","eq","hi","end")
    def __init__(self,c): self.c=c; self.lo=self.eq=self.hi=None; self.end=False

class TST:
    def __init__(self): self.root=None
    def insert(self, w):
        def rec(u, i):
            c=w[i]
            if u is None: u=TNode(c)
            if c < u.c: u.lo = rec(u.lo,i)
            elif c > u.c: u.hi = rec(u.hi,i)
            else:
                if i+1==len(w): u.end=True
                else: u.eq = rec(u.eq,i+1)
            return u
        if w: self.root = rec(self.root,0)
    def search(self, w):
        u=self.root; i=0
        while u:
            c=w[i]
            if c < u.c: u=u.lo
            elif c > u.c: u=u.hi
            else:
                if i+1==len(w): return u.end
                i+=1; u=u.eq
        return False
Java TST (core)
class TST {
  static class Node { char c; Node lo,eq,hi; boolean end; Node(char c){this.c=c;} }
  Node root;
  public void insert(String w){ root = ins(root,w,0); }
  private Node ins(Node u, String w, int i){
    char c=w.charAt(i);
    if(u==null) u=new Node(c);
    if(c<u.c) u.lo=ins(u.lo,w,i);
    else if(c>u.c) u.hi=ins(u.hi,w,i);
    else if(i+1==w.length()) u.end=true;
    else u.eq=ins(u.eq,w,i+1);
    return u;
  }
  public boolean search(String w){
    Node u=root; int i=0;
    while(u!=null){
      char c=w.charAt(i);
      if(c<u.c) u=u.lo;
      else if(c>u.c) u=u.hi;
      else { if(i+1==w.length()) return u.end; i++; u=u.eq; }
    }
    return false;
  }
}
C++ TST (core)
struct TNode{ char c; bool end=false; TNode* lo=nullptr; TNode* eq=nullptr; TNode* hi=nullptr; TNode(char c):c(c){} };

TNode* ins(TNode* u, const string& w, int i){
  char c=w[i];
  if(!u) u=new TNode(c);
  if(c<u->c) u->lo=ins(u->lo,w,i);
  else if(c>u->c) u->hi=ins(u->hi,w,i);
  else if(i+1==(int)w.size()) u->end=true;
  else u->eq=ins(u->eq,w,i+1);
  return u;
}
bool search(TNode* u, const string& w){
  int i=0;
  while(u){
    char c=w[i];
    if(c<u->c) u=u->lo;
    else if(c>u->c) u=u->hi;
    else { if(i+1==(int)w.size()) return u->end; i++; u=u->eq; }
  }
  return false;
}

“Library”/Idiomatic alternatives

  • Python: for many problems, a set of words + bisect over a sorted list can do prefix ranges (O(log n + k)), but Tries win when many prefixes or character-by-character traversal is needed.
  • Java: TreeSet supports subSet(prefixRange) to emulate prefix queries; HashSet for exact words.
  • C++: std::set/std::map support prefix range via lower_bound/upper_bound.

Concept Checks

  1. Explain why Trie search and insert are O(L) regardless of the number of words.
  2. Show how delete unmarks and prunes without breaking other words that share prefixes.
  3. Implement autocomplete and argue about O(L + k) behavior when results are limited to k.
  4. Compare Standard Trie vs TST vs Compressed Trie on memory and constant factors.
  5. LeetCode practice: “Implement Trie (Prefix Tree)”, “Design Add and Search Words (with ‘.’ wildcard)”, “Word Search II” (grid + Trie + backtracking).