Graphs and Network Algorithms

Graphs are everywhere - social networks, transportation systems, web pages, dependency trees, and neural networks. I’ve used graphs to build recommendation systems, optimize delivery routes, and analyze network security. Understanding graphs isn’t just about data structures; it’s about modeling the connected world around us.

Graphs excel at representing relationships and enabling powerful algorithms for pathfinding, network analysis, and optimization problems that would be impossible with linear data structures.

Graph Fundamentals

A graph consists of vertices (nodes) connected by edges. Think of vertices as entities and edges as relationships between them. Graphs can be directed (one-way relationships) or undirected (mutual relationships), and weighted (relationships have costs) or unweighted.

class Graph:
    def __init__(self, directed=False):
        self.directed = directed
        self.adjacency_list = {}
    
    def add_vertex(self, vertex):
        if vertex not in self.adjacency_list:
            self.adjacency_list[vertex] = []
    
    def add_edge(self, vertex1, vertex2, weight=1):
        self.add_vertex(vertex1)
        self.add_vertex(vertex2)
        
        self.adjacency_list[vertex1].append((vertex2, weight))
        
        # If undirected, add reverse edge
        if not self.directed:
            self.adjacency_list[vertex2].append((vertex1, weight))
    
    def get_neighbors(self, vertex):
        return self.adjacency_list.get(vertex, [])

# Create a city network
graph = Graph(directed=False)
cities = ['New York', 'Los Angeles', 'Chicago', 'Houston']
connections = [
    ('New York', 'Chicago', 790),
    ('Los Angeles', 'Houston', 1550),
    ('Chicago', 'Houston', 1090)
]

for city in cities:
    graph.add_vertex(city)

for city1, city2, distance in connections:
    graph.add_edge(city1, city2, distance)

This adjacency list representation is memory-efficient for sparse graphs (few edges relative to vertices).

Graph Traversal Algorithms

Breadth-First Search (BFS)

BFS explores a graph level by level, making it perfect for finding shortest paths in unweighted graphs:

from collections import deque

def bfs_shortest_path(graph, start, end):
    if start == end:
        return [start]
    
    queue = deque([(start, [start])])
    visited = {start}
    
    while queue:
        node, path = queue.popleft()
        
        for neighbor, weight in graph.get_neighbors(node):
            if neighbor == end:
                return path + [neighbor]
            
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append((neighbor, path + [neighbor]))
    
    return None  # No path found

def bfs_levels(graph, start_vertex):
    """BFS that returns vertices grouped by distance from start"""
    visited = set()
    queue = deque([(start_vertex, 0)])
    levels = {}
    
    while queue:
        vertex, level = queue.popleft()
        
        if vertex in visited:
            continue
        
        visited.add(vertex)
        
        if level not in levels:
            levels[level] = []
        levels[level].append(vertex)
        
        for neighbor, weight in graph.get_neighbors(vertex):
            if neighbor not in visited:
                queue.append((neighbor, level + 1))
    
    return levels

# Test BFS
path = bfs_shortest_path(graph, 'New York', 'Houston')
print(f"Shortest path: {path}")

levels = bfs_levels(graph, 'New York')
print(f"Vertices by distance: {levels}")

BFS guarantees the shortest path in unweighted graphs because it explores all nodes at distance k before exploring nodes at distance k+1.

Depth-First Search (DFS)

DFS explores as far as possible along each branch before backtracking. It’s useful for detecting cycles and exploring all possible paths:

def dfs_iterative(graph, start):
    visited = set()
    stack = [start]
    path = []
    
    while stack:
        node = stack.pop()
        
        if node not in visited:
            visited.add(node)
            path.append(node)
            
            # Add neighbors to stack
            neighbors = [neighbor for neighbor, weight in graph.get_neighbors(node)]
            for neighbor in reversed(neighbors):  # Reverse for consistent order
                if neighbor not in visited:
                    stack.append(neighbor)
    
    return path

def has_cycle_undirected(graph):
    """Detect cycle in undirected graph using DFS"""
    visited = set()
    
    def dfs_cycle_check(vertex, parent):
        visited.add(vertex)
        
        for neighbor, weight in graph.get_neighbors(vertex):
            if neighbor not in visited:
                if dfs_cycle_check(neighbor, vertex):
                    return True
            elif neighbor != parent:  # Back edge found
                return True
        
        return False
    
    for vertex in graph.adjacency_list:
        if vertex not in visited:
            if dfs_cycle_check(vertex, None):
                return True
    
    return False

print(f"DFS traversal: {dfs_iterative(graph, 'New York')}")
print(f"Graph has cycle: {has_cycle_undirected(graph)}")

DFS uses less memory than BFS and is often easier to implement recursively.

Shortest Path Algorithms

Dijkstra’s Algorithm

Dijkstra’s algorithm finds shortest paths in weighted graphs with non-negative edge weights:

import heapq

def dijkstra_shortest_path(graph, start_vertex):
    distances = {vertex: float('infinity') for vertex in graph.adjacency_list}
    previous = {vertex: None for vertex in graph.adjacency_list}
    distances[start_vertex] = 0
    
    # Priority queue: (distance, vertex)
    pq = [(0, start_vertex)]
    visited = set()
    
    while pq:
        current_distance, current_vertex = heapq.heappop(pq)
        
        if current_vertex in visited:
            continue
        
        visited.add(current_vertex)
        
        for neighbor, weight in graph.get_neighbors(current_vertex):
            if neighbor not in visited:
                new_distance = current_distance + weight
                
                if new_distance < distances[neighbor]:
                    distances[neighbor] = new_distance
                    previous[neighbor] = current_vertex
                    heapq.heappush(pq, (new_distance, neighbor))
    
    return distances, previous

def reconstruct_path(previous, start, end):
    path = []
    current = end
    
    while current is not None:
        path.append(current)
        current = previous[current]
    
    path.reverse()
    return path if path[0] == start else None

# Test Dijkstra's algorithm
distances, previous = dijkstra_shortest_path(graph, 'New York')

for city, distance in distances.items():
    if distance != float('infinity'):
        path = reconstruct_path(previous, 'New York', city)
        print(f"To {city}: {distance} miles, path: {' -> '.join(path)}")

Dijkstra’s algorithm is greedy - it always chooses the closest unvisited vertex, which guarantees optimal paths.

Real-World Graph Applications

Graphs naturally model social networks where vertices are people and edges are relationships:

class SocialNetwork:
    def __init__(self):
        self.graph = Graph(directed=False)
        self.user_data = {}
    
    def add_user(self, user_id, name, interests=None):
        self.graph.add_vertex(user_id)
        self.user_data[user_id] = {
            'name': name,
            'interests': interests or []
        }
    
    def add_friendship(self, user1, user2):
        self.graph.add_edge(user1, user2)
    
    def get_friends(self, user_id):
        return [friend for friend, weight in self.graph.get_neighbors(user_id)]
    
    def suggest_friends(self, user_id, max_suggestions=5):
        """Suggest friends based on mutual connections"""
        user_friends = set(self.get_friends(user_id))
        suggestions = {}
        
        # Look at friends of friends
        for friend in user_friends:
            for friend_of_friend, weight in self.graph.get_neighbors(friend):
                if (friend_of_friend != user_id and 
                    friend_of_friend not in user_friends):
                    
                    suggestions[friend_of_friend] = suggestions.get(friend_of_friend, 0) + 1
        
        # Sort by number of mutual connections
        sorted_suggestions = sorted(suggestions.items(), key=lambda x: x[1], reverse=True)
        return sorted_suggestions[:max_suggestions]
    
    def find_shortest_connection(self, user1, user2):
        return bfs_shortest_path(self.graph, user1, user2)

# Build social network
social_net = SocialNetwork()

users = [
    ('alice', 'Alice Johnson', ['reading', 'hiking']),
    ('bob', 'Bob Smith', ['gaming', 'programming']),
    ('charlie', 'Charlie Brown', ['music', 'hiking']),
    ('diana', 'Diana Prince', ['fitness', 'reading'])
]

for user_id, name, interests in users:
    social_net.add_user(user_id, name, interests)

friendships = [('alice', 'bob'), ('alice', 'charlie'), ('bob', 'diana')]
for user1, user2 in friendships:
    social_net.add_friendship(user1, user2)

print(f"Alice's friends: {social_net.get_friends('alice')}")
suggestions = social_net.suggest_friends('alice')
print(f"Friend suggestions for Alice: {suggestions}")

Web Crawler and Page Rank

Graphs can model web pages and links for search engine algorithms:

def simple_pagerank(graph, damping_factor=0.85, max_iterations=100, tolerance=1e-6):
    """Simple PageRank implementation"""
    vertices = list(graph.adjacency_list.keys())
    n = len(vertices)
    
    # Initialize PageRank values
    pagerank = {vertex: 1.0 / n for vertex in vertices}
    
    for iteration in range(max_iterations):
        new_pagerank = {}
        
        for vertex in vertices:
            # Base probability
            new_pagerank[vertex] = (1 - damping_factor) / n
            
            # Add contributions from incoming links
            for other_vertex in vertices:
                neighbors = [neighbor for neighbor, weight in graph.get_neighbors(other_vertex)]
                if vertex in neighbors:
                    outgoing_links = len(neighbors)
                    if outgoing_links > 0:
                        new_pagerank[vertex] += damping_factor * pagerank[other_vertex] / outgoing_links
        
        # Check for convergence
        diff = sum(abs(new_pagerank[v] - pagerank[v]) for v in vertices)
        if diff < tolerance:
            break
        
        pagerank = new_pagerank
    
    return pagerank

# Create web graph
web_graph = Graph(directed=True)
pages = ['A', 'B', 'C', 'D']
links = [('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'A'), ('D', 'A')]

for page in pages:
    web_graph.add_vertex(page)

for source, target in links:
    web_graph.add_edge(source, target)

pagerank_scores = simple_pagerank(web_graph)
sorted_pages = sorted(pagerank_scores.items(), key=lambda x: x[1], reverse=True)

print("PageRank Scores:")
for page, score in sorted_pages:
    print(f"Page {page}: {score:.4f}")

PageRank was the foundation of Google’s search algorithm, using graph structure to determine page importance.

What’s Next

In Part 12, we’ll bring everything together with performance optimization and best practices. You’ll learn how to profile your data structure usage, optimize for specific use cases, and make informed decisions about when to use each data structure in real-world applications.

This final part will synthesize everything you’ve learned and give you the tools to write efficient, maintainable Python code that scales with your data and requirements.