Performance and Optimization

Advanced Error Handling

gRPC provides a rich error model that can be leveraged for detailed error reporting:

package main

import (
	"context"
	"database/sql"
	"errors"
	"log"
	"net"

	"github.com/example/service/proto"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/status"
	"google.golang.org/protobuf/types/known/timestamppb"
)

// Custom error types
var (
	ErrNotFound      = errors.New("resource not found")
	ErrAlreadyExists = errors.New("resource already exists")
	ErrDatabase      = errors.New("database error")
	ErrValidation    = errors.New("validation error")
)

// Service implementation with advanced error handling
type userService struct {
	proto.UnimplementedUserServiceServer
	db *sql.DB // Database connection
}

func (s *userService) GetUser(ctx context.Context, req *proto.GetUserRequest) (*proto.User, error) {
	// Validate request
	if req.UserId == "" {
		return nil, status.Error(codes.InvalidArgument, "user ID cannot be empty")
	}
	
	// Query database
	user, err := s.findUserByID(ctx, req.UserId)
	if err != nil {
		// Map domain errors to appropriate gRPC errors
		switch {
		case errors.Is(err, ErrNotFound):
			return nil, status.Errorf(codes.NotFound, "user not found: %s", req.UserId)
		case errors.Is(err, ErrDatabase):
			log.Printf("Database error: %v", err)
			return nil, status.Error(codes.Internal, "internal server error")
		default:
			log.Printf("Unknown error: %v", err)
			return nil, status.Error(codes.Unknown, "unknown error")
		}
	}
	
	return user, nil
}

func (s *userService) CreateUser(ctx context.Context, req *proto.CreateUserRequest) (*proto.User, error) {
	// Validate request
	if err := validateCreateUserRequest(req); err != nil {
		// Return detailed validation errors
		st := status.New(codes.InvalidArgument, "validation failed")
		
		// Add detailed error information
		detailedStatus, err := st.WithDetails(
			&proto.ValidationError{
				Field:   "email",
				Message: "invalid email format",
			},
		)
		if err != nil {
			// If adding details fails, return the simple status
			return nil, st.Err()
		}
		
		return nil, detailedStatus.Err()
	}
	
	// Check if user already exists
	_, err := s.findUserByEmail(ctx, req.Email)
	if err == nil {
		// User already exists
		return nil, status.Errorf(codes.AlreadyExists, "user with email %s already exists", req.Email)
	} else if !errors.Is(err, ErrNotFound) {
		// Database error
		log.Printf("Database error: %v", err)
		return nil, status.Error(codes.Internal, "internal server error")
	}
	
	// Create user
	user, err := s.createUser(ctx, req)
	if err != nil {
		log.Printf("Failed to create user: %v", err)
		return nil, status.Error(codes.Internal, "failed to create user")
	}
	
	return user, nil
}

Client-side error handling:

package main

import (
	"context"
	"log"
	"time"

	"github.com/example/service/proto"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/credentials/insecure"
	"google.golang.org/grpc/status"
)

func main() {
	conn, err := grpc.Dial("localhost:50051", 
		grpc.WithTransportCredentials(insecure.NewCredentials()))
	if err != nil {
		log.Fatalf("failed to connect: %v", err)
	}
	defer conn.Close()
	
	client := proto.NewUserServiceClient(conn)
	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
	defer cancel()
	
	// Try to get a non-existent user
	user, err := client.GetUser(ctx, &proto.GetUserRequest{
		UserId: "nonexistent",
	})
	
	// Handle the error
	if err != nil {
		st, ok := status.FromError(err)
		if ok {
			switch st.Code() {
			case codes.NotFound:
				log.Printf("User not found: %s", st.Message())
			case codes.InvalidArgument:
				log.Printf("Invalid argument: %s", st.Message())
			case codes.Internal:
				log.Printf("Internal error: %s", st.Message())
			default:
				log.Printf("Unexpected error: %s", st.Message())
			}
			
			// Check for additional error details
			for _, detail := range st.Details() {
				switch t := detail.(type) {
				case *proto.ValidationError:
					log.Printf("Validation error: field %s - %s", t.Field, t.Message)
				}
			}
		} else {
			log.Printf("Non-gRPC error: %v", err)
		}
	} else {
		log.Printf("Got user: %v", user)
	}
}

Retry and Circuit Breaking

Implementing retry logic and circuit breaking for resilient clients:

package main

import (
	"context"
	"log"
	"math/rand"
	"sync"
	"time"

	"github.com/example/service/proto"
	"github.com/sony/gobreaker"
	"google.golang.org/grpc"
	"google.golang.org/grpc/codes"
	"google.golang.org/grpc/credentials/insecure"
	"google.golang.org/grpc/status"
)

// ResilienceClient wraps a gRPC client with retry and circuit breaking capabilities
type ResilienceClient struct {
	client     proto.UserServiceClient
	cb         *gobreaker.CircuitBreaker
	maxRetries int
}

// NewResilienceClient creates a new resilient client
func NewResilienceClient(conn *grpc.ClientConn, maxRetries int) *ResilienceClient {
	// Configure circuit breaker
	cbSettings := gobreaker.Settings{
		Name:        "user-service",
		MaxRequests: 5,                  // Max requests allowed when circuit is half-open
		Interval:    10 * time.Second,   // Cyclic period of the closed state
		Timeout:     30 * time.Second,   // Time after which the circuit breaker resets to half-open state
		ReadyToTrip: func(counts gobreaker.Counts) bool {
			// Trip the circuit when the failure rate exceeds 50% and we have at least 5 requests
			failureRatio := float64(counts.TotalFailures) / float64(counts.Requests)
			return counts.Requests >= 5 && failureRatio >= 0.5
		},
		OnStateChange: func(name string, from gobreaker.State, to gobreaker.State) {
			log.Printf("Circuit breaker %s state changed from %s to %s", name, from, to)
		},
	}
	
	return &ResilienceClient{
		client:     proto.NewUserServiceClient(conn),
		cb:         gobreaker.NewCircuitBreaker(cbSettings),
		maxRetries: maxRetries,
	}
}

// GetUser calls the GetUser RPC with retry and circuit breaking
func (c *ResilienceClient) GetUser(ctx context.Context, userID string) (*proto.User, error) {
	var user *proto.User
	var err error
	
	// Execute through circuit breaker
	result, err := c.cb.Execute(func() (interface{}, error) {
		// Implement retry logic
		for attempt := 0; attempt <= c.maxRetries; attempt++ {
			// Create a new context with timeout for this attempt
			attemptCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
			defer cancel()
			
			user, err = c.client.GetUser(attemptCtx, &proto.GetUserRequest{
				UserId: userID,
			})
			
			// If successful, return the result
			if err == nil {
				return user, nil
			}
			
			// Check if the error is retriable
			if isRetriable(err) {
				// If this wasn't the last attempt, wait and retry
				if attempt < c.maxRetries {
					// Exponential backoff with jitter
					backoff := time.Duration(1<<uint(attempt)) * 100 * time.Millisecond
					jitter := time.Duration(rand.Intn(100)) * time.Millisecond
					sleepTime := backoff + jitter
					
					log.Printf("Request failed with retriable error: %v. Retrying in %v...", err, sleepTime)
					time.Sleep(sleepTime)
					continue
				}
			}
			
			// Non-retriable error or max retries reached
			return nil, err
		}
		
		// This should never be reached, but just in case
		return nil, err
	})
	
	if err != nil {
		return nil, err
	}
	
	return result.(*proto.User), nil
}

// isRetriable determines if an error should be retried
func isRetriable(err error) bool {
	if err == nil {
		return false
	}
	
	st, ok := status.FromError(err)
	if !ok {
		// Non-gRPC error, don't retry
		return false
	}
	
	// Retry on these status codes
	switch st.Code() {
	case codes.Unavailable, // Server is currently unavailable
		codes.ResourceExhausted, // Client or server has insufficient quota
		codes.DeadlineExceeded, // Request timed out
		codes.Aborted: // Concurrency conflict
		return true
	default:
		return false
	}
}

Health Checking

Implementing health checks for service monitoring:

// health_service.proto
syntax = "proto3";
package health;
option go_package = "github.com/example/health";

service HealthService {
  // Check the health of the service
  rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
  
  // Watch for health changes
  rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
}

message HealthCheckRequest {
  string service = 1;
}

message HealthCheckResponse {
  enum ServingStatus {
    UNKNOWN = 0;
    SERVING = 1;
    NOT_SERVING = 2;
    SERVICE_UNKNOWN = 3;
  }
  ServingStatus status = 1;
}

Server implementation:

package main

import (
	"context"
	"log"
	"net"
	"sync"
	"time"

	"github.com/example/health"
	"google.golang.org/grpc"
)

type healthServer struct {
	health.UnimplementedHealthServiceServer
	mu             sync.RWMutex
	statusMap      map[string]health.HealthCheckResponse_ServingStatus
	statusWatchers map[string][]health.HealthService_WatchServer
}

func newHealthServer() *healthServer {
	return &healthServer{
		statusMap:      make(map[string]health.HealthCheckResponse_ServingStatus),
		statusWatchers: make(map[string][]health.HealthService_WatchServer),
	}
}

func (s *healthServer) Check(ctx context.Context, req *health.HealthCheckRequest) (*health.HealthCheckResponse, error) {
	s.mu.RLock()
	defer s.mu.RUnlock()
	
	if req.Service == "" {
		// Overall service health
		return &health.HealthCheckResponse{
			Status: health.HealthCheckResponse_SERVING,
		}, nil
	}
	
	status, ok := s.statusMap[req.Service]
	if !ok {
		return &health.HealthCheckResponse{
			Status: health.HealthCheckResponse_SERVICE_UNKNOWN,
		}, nil
	}
	
	return &health.HealthCheckResponse{
		Status: status,
	}, nil
}

func (s *healthServer) Watch(req *health.HealthCheckRequest, stream health.HealthService_WatchServer) error {
	service := req.Service
	
	// Register watcher
	s.mu.Lock()
	if _, ok := s.statusWatchers[service]; !ok {
		s.statusWatchers[service] = make([]health.HealthService_WatchServer, 0)
	}
	s.statusWatchers[service] = append(s.statusWatchers[service], stream)
	s.mu.Unlock()
	
	// Send initial status
	status := health.HealthCheckResponse_SERVICE_UNKNOWN
	s.mu.RLock()
	if st, ok := s.statusMap[service]; ok {
		status = st
	} else if service == "" {
		status = health.HealthCheckResponse_SERVING
	}
	s.mu.RUnlock()
	
	if err := stream.Send(&health.HealthCheckResponse{Status: status}); err != nil {
		return err
	}
	
	// Keep the stream open until the client disconnects
	<-stream.Context().Done()
	
	return nil
}

Production Deployment Strategies

Deploying gRPC services in production requires careful consideration of infrastructure, monitoring, and scaling strategies.

Advanced Error Handling

Retry and Circuit Breaking

Health Checking

Production Deployment Strategies

Continue Your Learning