Fundamentals and Core Concepts

How Raft Works

Raft divides the consensus problem into three subproblems:

1. Leader Election

Servers start as followers
If followers don’t hear from a leader, they become candidates
Candidates request votes from other servers
A candidate becomes leader if it receives votes from a majority
Leaders send periodic heartbeats to maintain authority

2. Log Replication

Clients send commands to the leader
The leader appends the command to its log
The leader replicates the log entry to followers
Once safely replicated, the leader commits the entry
The leader notifies followers of committed entries

3. Safety

Election Safety: At most one leader per term
Leader Append-Only: Leaders never overwrite or delete entries
Log Matching: If two logs contain an entry with the same index and term, all previous entries are identical
Leader Completeness: If an entry is committed, it will be present in the logs of all future leaders
State Machine Safety: If a server applies an entry to its state machine, no other server will apply a different entry for the same log index

Raft Implementation Example

Here’s a simplified implementation of Raft’s core components:

// Server states
const (
    Follower = iota
    Candidate
    Leader
)

type RaftServer struct {
    // Server identity
    id int
    state int
    
    // Persistent state
    currentTerm int
    votedFor int
    log []LogEntry
    
    // Volatile state
    commitIndex int
    lastApplied int
    
    // Leader state
    nextIndex map[int]int
    matchIndex map[int]int
    
    // Channels for communication
    electionTimer *time.Timer
    heartbeatTimer *time.Timer
    
    // Network connections to other servers
    peers []RaftPeer
}

type LogEntry struct {
    Term int
    Command interface{}
}

func (s *RaftServer) Start() {
    s.state = Follower
    s.resetElectionTimer()
    
    for {
        switch s.state {
        case Follower:
            s.runFollower()
        case Candidate:
            s.runCandidate()
        case Leader:
            s.runLeader()
        }
    }
}

func (s *RaftServer) runFollower() {
    select {
    case <-s.electionTimer.C:
        s.state = Candidate
    case rpc := <-s.rpcCh:
        s.handleRPC(rpc)
        s.resetElectionTimer()
    }
}

func (s *RaftServer) runCandidate() {
    s.currentTerm++
    s.votedFor = s.id
    votesReceived := 1  // Vote for self
    
    // Request votes from all peers
    for _, peer := range s.peers {
        go func(p RaftPeer) {
            args := RequestVoteArgs{
                Term:         s.currentTerm,
                CandidateId:  s.id,
                LastLogIndex: len(s.log) - 1,
                LastLogTerm:  s.getLastLogTerm(),
            }
            
            response := p.RequestVote(args)
            
            if response.VoteGranted {
                votesReceived++
                if votesReceived > len(s.peers)/2 {
                    s.state = Leader
                    s.initLeaderState()
                }
            } else if response.Term > s.currentTerm {
                s.currentTerm = response.Term
                s.state = Follower
                s.votedFor = -1
            }
        }(peer)
    }
    
    s.resetElectionTimer()
}

func (s *RaftServer) runLeader() {
    s.sendHeartbeats()
    s.heartbeatTimer = time.NewTimer(100 * time.Millisecond)
    
    select {
    case <-s.heartbeatTimer.C:
        // Time to send heartbeats again
    case rpc := <-s.rpcCh:
        s.handleRPC(rpc)
    case cmd := <-s.proposeCh:
        // Append to local log
        s.log = append(s.log, LogEntry{Term: s.currentTerm, Command: cmd})
        // Replicate to followers (simplified)
        s.replicateLogs()
    }
}

func (s *RaftServer) handleRequestVote(args RequestVoteArgs) RequestVoteResponse {
    if args.Term < s.currentTerm {
        return RequestVoteResponse{Term: s.currentTerm, VoteGranted: false}
    }
    
    if args.Term > s.currentTerm {
        s.currentTerm = args.Term
        s.state = Follower
        s.votedFor = -1
    }
    
    if (s.votedFor == -1 || s.votedFor == args.CandidateId) && 
       s.isLogUpToDate(args.LastLogIndex, args.LastLogTerm) {
        s.votedFor = args.CandidateId
        return RequestVoteResponse{Term: s.currentTerm, VoteGranted: true}
    }
    
    return RequestVoteResponse{Term: s.currentTerm, VoteGranted: false}
}

func (s *RaftServer) handleAppendEntries(args AppendEntriesArgs) AppendEntriesResponse {
    if args.Term < s.currentTerm {
        return AppendEntriesResponse{Term: s.currentTerm, Success: false}
    }
    
    // Valid leader, reset election timer
    s.resetElectionTimer()
    
    if args.Term > s.currentTerm {
        s.currentTerm = args.Term
        s.state = Follower
        s.votedFor = -1
    }
    
    // Check if log contains an entry at prevLogIndex with prevLogTerm
    if args.PrevLogIndex >= len(s.log) || 
       (args.PrevLogIndex >= 0 && s.log[args.PrevLogIndex].Term != args.PrevLogTerm) {
        return AppendEntriesResponse{Term: s.currentTerm, Success: false}
    }
    
    // Process log entries
    for i, entry := range args.Entries {
        index := args.PrevLogIndex + 1 + i
        
        if index < len(s.log) {
            if s.log[index].Term != entry.Term {
                // Delete conflicting entries and all that follow
                s.log = s.log[:index]
                // Append new entries
                s.log = append(s.log, entry)
            }
            // Else entry already exists, do nothing
        } else {
            // Append new entries
            s.log = append(s.log, entry)
        }
    }
    
    // Update commit index
    if args.LeaderCommit > s.commitIndex {
        s.commitIndex = min(args.LeaderCommit, len(s.log)-1)
        s.applyCommittedEntries()
    }
    
    return AppendEntriesResponse{Term: s.currentTerm, Success: true}
}

When to Use Raft

Raft is ideal when:

You need a consensus algorithm that’s easier to understand and implement
You’re building a system where leader-based operation is acceptable
You want strong consistency with reasonable performance
You need a well-documented algorithm with many existing implementations

Challenges with Raft

Leader-based design can be a bottleneck
Leader changes require at least one round-trip time
Membership changes require careful handling

ZAB: ZooKeeper Atomic Broadcast

ZAB (ZooKeeper Atomic Broadcast) is the consensus protocol used by Apache ZooKeeper, a widely-used coordination service for distributed systems. While less general-purpose than Paxos or Raft, ZAB is optimized for the specific requirements of ZooKeeper.

How ZAB Works

ZAB operates in three phases:

1. Leader Election

Servers elect a leader using a fast leader election algorithm
The elected leader must have the most up-to-date transaction history

How Raft Works

1. Leader Election

2. Log Replication

3. Safety

Raft Implementation Example

When to Use Raft

Challenges with Raft

ZAB: ZooKeeper Atomic Broadcast

How ZAB Works

1. Leader Election

Continue Your Learning