Python collections — Every Class and Function Explained
collections is Python’s standard library module with specialized container datatypes beyond the built-in list, dict, set, and tuple.
from collections import *namedtuple — Lightweight Immutable Data Objects
A factory for tuple subclasses with named fields. More memory-efficient than a class, immutable, and indexable.
from collections import namedtuple
Point = namedtuple("Point", ["x", "y"])
p = Point(3, 4)
p.x # 3 (access by name)
p[0] # 3 (access by index)
x, y = p # unpacking
# _make — from iterable
Point._make([5, 6]) # Point(x=5, y=6)
# _asdict — to OrderedDict
p._asdict() # {'x': 3, 'y': 4}
# _replace — new instance with changed fields
p._replace(x=10) # Point(x=10, y=4)Named Tuple Variants
# Defaults for trailing fields
Node = namedtuple("Node", ["val", "left", "right"], defaults=[None, None])
Node(1) # Node(val=1, left=None, right=None)
# Rename invalid field names
Point = namedtuple("Point", ["x", "y", "class"], rename=True)
# class becomes _2
# Module-level — enables pickle across files
Point = namedtuple("Point", ["x", "y"], module=__name__)When to use: Simple data containers where you want readability without the overhead of a full class. Prefer dataclasses when you need methods or mutable fields.
deque — Double-Ended Queue
A thread-safe, memory-efficient list-like container optimized for fast appends and pops from both ends.
from collections import deque
d = deque([1, 2, 3], maxlen=5)
d.append(4) # [1, 2, 3, 4]
d.appendleft(0) # [0, 1, 2, 3, 4]
d.pop() # 4
d.popleft() # 0
d.extend([5, 6]) # [1, 2, 3, 5, 6]
d.extendleft([-1, 0]) # [0, -1, 1, 2, 3, 5, 6]
d.rotate(2) # [5, 6, 0, -1, 1, 2, 3]
d.rotate(-2) # [0, -1, 1, 2, 3, 5, 6]
d.maxlen # 5
# maxlen — drops from opposite end on overflow
d = deque(maxlen=3)
for i in range(10):
d.append(i)
# d → deque([7, 8, 9], maxlen=3)| Operation | list |
deque |
|---|---|---|
| Append right | O(1) amortized | O(1) |
| Append left | O(n) | O(1) |
| Pop right | O(1) | O(1) |
| Pop left | O(n) | O(1) |
| Index access | O(1) | O(n) |
| Insert at middle | O(n) | O(n) |
When to use: Queue, stack, sliding window, round-robin, or any pattern with frequent appends/pops from both ends.
defaultdict — Dict with Missing Key Factory
Like a regular dict but calls a factory function for missing keys instead of raising KeyError.
from collections import defaultdict
# Grouping
words = ["apple", "bear", "ape", "bat"]
by_first = defaultdict(list)
for w in words:
by_first[w[0]].append(w)
# by_first → {'a': ['apple', 'ape'], 'b': ['bear', 'bat']}
# Counting
counts = defaultdict(int)
for w in words:
counts[w] += 1
# Nesting
tree = lambda: defaultdict(tree)
root = tree()
root["a"]["b"]["c"] = 42 # no KeyError, creates intermediate dicts
# Set
pairs = [("a", 1), ("a", 2), ("b", 1)]
sets = defaultdict(set)
for k, v in pairs:
sets[k].add(v)
# sets → {'a': {1, 2}, 'b': {1}}Common factories: int (0), list ([]), set (set()), dict ({}), str (""), float (0.0), lambda: "N/A".
Counter — Multiset / Frequency Tally
A dict subclass for counting hashable objects.
from collections import Counter
c = Counter("abracadabra")
# Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
c["z"] # 0 (no KeyError)
# Top N most common
c.most_common(3) # [('a', 5), ('b', 2), ('r', 2)]
# Elements (repeats by count)
list(c.elements()) # ['a','a','a','a','a','b','b','r','r','c','d']
# Arithmetic
c1 = Counter(a=3, b=1)
c2 = Counter(a=1, b=2)
c1 + c2 # Counter({'a': 4, 'b': 3})
c1 - c2 # Counter({'a': 2}) (no negatives)
c1 & c2 # Counter({'a': 1, 'b': 1}) (intersection — min)
c1 | c2 # Counter({'a': 3, 'b': 2}) (union — max)
+c1 # Counter({'a': 3, 'b': 1}) (remove zero/negative)
-c1 # Counter() (keep only negative)
# Update and subtract
c.update("abc") # add counts
c.subtract("a") # remove counts (can go negative)OrderedDict — Dict That Remembers Insertion Order
Like a regular dict (which also preserves insertion order since Python 3.7), but with extra methods for reordering.
from collections import OrderedDict
d = OrderedDict()
d["a"] = 1
d["b"] = 2
d["c"] = 3
# Move to end/beginning
d.move_to_end("a") # a → end: order is b, c, a
d.move_to_end("a", last=False) # a → front: order is a, b, c
# Pop first/last
d.popitem(last=True) # (c, 3) — pops from end
d.popitem(last=False) # (a, 1) — pops from front
# Equality — ordered comparison matters
OrderedDict(a=1, b=2) == OrderedDict(b=2, a=1) # False
{"a": 1, "b": 2} == {"b": 2, "a": 1} # True (Python 3.7+)When to use: When you need move_to_end, popitem(last=False), or order-sensitive equality. Otherwise, plain dict is sufficient.
ChainMap — Multiple Dicts as One View
Groups multiple mappings into a single view. Lookups search each mapping in order until found. Writes always go to the first mapping.
from collections import ChainMap
defaults = {"theme": "dark", "lang": "en", "debug": False}
user = {"theme": "light"}
config = ChainMap(user, defaults)
config["theme"] # "light" (from user)
config["lang"] # "en" (from defaults)
config["debug"] # False (from defaults)
config["debug"] = True # writes to user dict only
user # {"theme": "light", "debug": True}
defaults # unchanged
# New child (adds a new map at front)
child = config.new_child({"lang": "fr"})
child["lang"] # "fr"
# Reversed — iterate from defaults to user
for key, val in config.maps: ...| Method | Description |
|---|---|
maps |
List of mappings (read/write) |
new_child(m) |
New ChainMap with m prepended |
parents |
ChainMap without the first mapping |
update() |
Updates first mapping (1:1 with dict) |
When to use: Configuration layering, argument defaults, namespace scoping — anywhere you want cascading lookup without merging dicts.
UserDict, UserList, UserString — Easier Subclassing
Wrapper classes that make subclassing dict/list/str simpler. The underlying data is accessible as .data.
from collections import UserDict, UserList, UserString
class LowerDict(UserDict):
def __setitem__(self, key, val):
super().__setitem__(key.lower(), val)
d = LowerDict({"A": 1})
d["B"] = 2
# d → {"a": 1, "b": 2}
class UpperList(UserList):
def append(self, item):
super().append(item.upper() if isinstance(item, str) else item)
l = UpperList(["a", "b"])
l.append("c")
# l → ["a", "b", "C"]Subclassing built-in types directly can be brittle (e.g. dict doesn’t call __setitem__ in update or __init__). These wrappers ensure your overrides always fire.
defaultdict vs Counter vs dict — When to Use
| Task | Tool |
|---|---|
| Counting | Counter (with most_common, arithmetic) |
| Grouping items into lists | defaultdict(list) |
| Nested dicts (tree) | defaultdict(tree) where tree = lambda: defaultdict(tree) |
Frequency with int factory |
defaultdict(int) (simple, no extra methods) |
| Histogram top N | Counter.most_common(N) |
| Mutable counter | defaultdict(int) or Counter (both work) |
Quick Reference Table
| Class | Description | Ordered | Mutable | Key Feature |
|---|---|---|---|---|
deque |
Double-ended queue | ✓ | ✓ | O(1) append/pop both ends |
defaultdict |
Dict with default factory | ✓ (3.7+) | ✓ | No KeyError on missing |
Counter |
Multiset for counting | ✓ (3.7+) | ✓ | most_common, arithmetic |
OrderedDict |
Dict with ordering methods | ✓ | ✓ | move_to_end, popitem |
namedtuple |
Immutable named tuple | ✓ | ✗ | p.x, _replace |
ChainMap |
Cascading dict view | ✓ | ✓ (first map) | maps, parents |
UserDict |
Dict subclassing wrapper | ✓ | ✓ | .data access |
UserList |
List subclassing wrapper | ✓ | ✓ | .data access |
UserString |
String subclassing wrapper | ✓ | ✗ | .data access |
Real-World Patterns
Sliding window — deque
def moving_average(iterable, n):
window = deque(maxlen=n)
for i, val in enumerate(iterable):
window.append(val)
if i >= n - 1:
yield sum(window) / n
list(moving_average([1, 2, 3, 4, 5], 3))
# [2.0, 3.0, 4.0]Multi-level counter — defaultdict(Counter)
data = [("en", "GET"), ("en", "POST"), ("fr", "GET")]
lang_methods = defaultdict(Counter)
for lang, method in data:
lang_methods[lang][method] += 1
# lang_methods["en"] → Counter({'GET': 1, 'POST': 1})Config with overrides — ChainMap
defaults = {"host": "localhost", "port": 8080}
env = {"port": 9000}
cli = {"host": "api.example.com"}
config = ChainMap(cli, env, defaults)
# priority: CLI > env > defaultsLRU cache — OrderedDict
class LRU:
def __init__(self, capacity):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key not in self.cache:
return -1
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key, val):
self.cache[key] = val
self.cache.move_to_end(key)
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)Golden rule: Pick the collection that best matches your access pattern. deque for queue/stack, Counter for tallying, defaultdict for grouping, ChainMap for layered config.