Advanced Error Handling
gRPC provides a rich error model that can be leveraged for detailed error reporting:
package main
import (
"context"
"database/sql"
"errors"
"log"
"net"
"github.com/example/service/proto"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/timestamppb"
)
// Custom error types
var (
ErrNotFound = errors.New("resource not found")
ErrAlreadyExists = errors.New("resource already exists")
ErrDatabase = errors.New("database error")
ErrValidation = errors.New("validation error")
)
// Service implementation with advanced error handling
type userService struct {
proto.UnimplementedUserServiceServer
db *sql.DB // Database connection
}
func (s *userService) GetUser(ctx context.Context, req *proto.GetUserRequest) (*proto.User, error) {
// Validate request
if req.UserId == "" {
return nil, status.Error(codes.InvalidArgument, "user ID cannot be empty")
}
// Query database
user, err := s.findUserByID(ctx, req.UserId)
if err != nil {
// Map domain errors to appropriate gRPC errors
switch {
case errors.Is(err, ErrNotFound):
return nil, status.Errorf(codes.NotFound, "user not found: %s", req.UserId)
case errors.Is(err, ErrDatabase):
log.Printf("Database error: %v", err)
return nil, status.Error(codes.Internal, "internal server error")
default:
log.Printf("Unknown error: %v", err)
return nil, status.Error(codes.Unknown, "unknown error")
}
}
return user, nil
}
func (s *userService) CreateUser(ctx context.Context, req *proto.CreateUserRequest) (*proto.User, error) {
// Validate request
if err := validateCreateUserRequest(req); err != nil {
// Return detailed validation errors
st := status.New(codes.InvalidArgument, "validation failed")
// Add detailed error information
detailedStatus, err := st.WithDetails(
&proto.ValidationError{
Field: "email",
Message: "invalid email format",
},
)
if err != nil {
// If adding details fails, return the simple status
return nil, st.Err()
}
return nil, detailedStatus.Err()
}
// Check if user already exists
_, err := s.findUserByEmail(ctx, req.Email)
if err == nil {
// User already exists
return nil, status.Errorf(codes.AlreadyExists, "user with email %s already exists", req.Email)
} else if !errors.Is(err, ErrNotFound) {
// Database error
log.Printf("Database error: %v", err)
return nil, status.Error(codes.Internal, "internal server error")
}
// Create user
user, err := s.createUser(ctx, req)
if err != nil {
log.Printf("Failed to create user: %v", err)
return nil, status.Error(codes.Internal, "failed to create user")
}
return user, nil
}
Client-side error handling:
package main
import (
"context"
"log"
"time"
"github.com/example/service/proto"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/status"
)
func main() {
conn, err := grpc.Dial("localhost:50051",
grpc.WithTransportCredentials(insecure.NewCredentials()))
if err != nil {
log.Fatalf("failed to connect: %v", err)
}
defer conn.Close()
client := proto.NewUserServiceClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
// Try to get a non-existent user
user, err := client.GetUser(ctx, &proto.GetUserRequest{
UserId: "nonexistent",
})
// Handle the error
if err != nil {
st, ok := status.FromError(err)
if ok {
switch st.Code() {
case codes.NotFound:
log.Printf("User not found: %s", st.Message())
case codes.InvalidArgument:
log.Printf("Invalid argument: %s", st.Message())
case codes.Internal:
log.Printf("Internal error: %s", st.Message())
default:
log.Printf("Unexpected error: %s", st.Message())
}
// Check for additional error details
for _, detail := range st.Details() {
switch t := detail.(type) {
case *proto.ValidationError:
log.Printf("Validation error: field %s - %s", t.Field, t.Message)
}
}
} else {
log.Printf("Non-gRPC error: %v", err)
}
} else {
log.Printf("Got user: %v", user)
}
}
Retry and Circuit Breaking
Implementing retry logic and circuit breaking for resilient clients:
package main
import (
"context"
"log"
"math/rand"
"sync"
"time"
"github.com/example/service/proto"
"github.com/sony/gobreaker"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/status"
)
// ResilienceClient wraps a gRPC client with retry and circuit breaking capabilities
type ResilienceClient struct {
client proto.UserServiceClient
cb *gobreaker.CircuitBreaker
maxRetries int
}
// NewResilienceClient creates a new resilient client
func NewResilienceClient(conn *grpc.ClientConn, maxRetries int) *ResilienceClient {
// Configure circuit breaker
cbSettings := gobreaker.Settings{
Name: "user-service",
MaxRequests: 5, // Max requests allowed when circuit is half-open
Interval: 10 * time.Second, // Cyclic period of the closed state
Timeout: 30 * time.Second, // Time after which the circuit breaker resets to half-open state
ReadyToTrip: func(counts gobreaker.Counts) bool {
// Trip the circuit when the failure rate exceeds 50% and we have at least 5 requests
failureRatio := float64(counts.TotalFailures) / float64(counts.Requests)
return counts.Requests >= 5 && failureRatio >= 0.5
},
OnStateChange: func(name string, from gobreaker.State, to gobreaker.State) {
log.Printf("Circuit breaker %s state changed from %s to %s", name, from, to)
},
}
return &ResilienceClient{
client: proto.NewUserServiceClient(conn),
cb: gobreaker.NewCircuitBreaker(cbSettings),
maxRetries: maxRetries,
}
}
// GetUser calls the GetUser RPC with retry and circuit breaking
func (c *ResilienceClient) GetUser(ctx context.Context, userID string) (*proto.User, error) {
var user *proto.User
var err error
// Execute through circuit breaker
result, err := c.cb.Execute(func() (interface{}, error) {
// Implement retry logic
for attempt := 0; attempt <= c.maxRetries; attempt++ {
// Create a new context with timeout for this attempt
attemptCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
user, err = c.client.GetUser(attemptCtx, &proto.GetUserRequest{
UserId: userID,
})
// If successful, return the result
if err == nil {
return user, nil
}
// Check if the error is retriable
if isRetriable(err) {
// If this wasn't the last attempt, wait and retry
if attempt < c.maxRetries {
// Exponential backoff with jitter
backoff := time.Duration(1<<uint(attempt)) * 100 * time.Millisecond
jitter := time.Duration(rand.Intn(100)) * time.Millisecond
sleepTime := backoff + jitter
log.Printf("Request failed with retriable error: %v. Retrying in %v...", err, sleepTime)
time.Sleep(sleepTime)
continue
}
}
// Non-retriable error or max retries reached
return nil, err
}
// This should never be reached, but just in case
return nil, err
})
if err != nil {
return nil, err
}
return result.(*proto.User), nil
}
// isRetriable determines if an error should be retried
func isRetriable(err error) bool {
if err == nil {
return false
}
st, ok := status.FromError(err)
if !ok {
// Non-gRPC error, don't retry
return false
}
// Retry on these status codes
switch st.Code() {
case codes.Unavailable, // Server is currently unavailable
codes.ResourceExhausted, // Client or server has insufficient quota
codes.DeadlineExceeded, // Request timed out
codes.Aborted: // Concurrency conflict
return true
default:
return false
}
}
Health Checking
Implementing health checks for service monitoring:
// health_service.proto
syntax = "proto3";
package health;
option go_package = "github.com/example/health";
service HealthService {
// Check the health of the service
rpc Check(HealthCheckRequest) returns (HealthCheckResponse);
// Watch for health changes
rpc Watch(HealthCheckRequest) returns (stream HealthCheckResponse);
}
message HealthCheckRequest {
string service = 1;
}
message HealthCheckResponse {
enum ServingStatus {
UNKNOWN = 0;
SERVING = 1;
NOT_SERVING = 2;
SERVICE_UNKNOWN = 3;
}
ServingStatus status = 1;
}
Server implementation:
package main
import (
"context"
"log"
"net"
"sync"
"time"
"github.com/example/health"
"google.golang.org/grpc"
)
type healthServer struct {
health.UnimplementedHealthServiceServer
mu sync.RWMutex
statusMap map[string]health.HealthCheckResponse_ServingStatus
statusWatchers map[string][]health.HealthService_WatchServer
}
func newHealthServer() *healthServer {
return &healthServer{
statusMap: make(map[string]health.HealthCheckResponse_ServingStatus),
statusWatchers: make(map[string][]health.HealthService_WatchServer),
}
}
func (s *healthServer) Check(ctx context.Context, req *health.HealthCheckRequest) (*health.HealthCheckResponse, error) {
s.mu.RLock()
defer s.mu.RUnlock()
if req.Service == "" {
// Overall service health
return &health.HealthCheckResponse{
Status: health.HealthCheckResponse_SERVING,
}, nil
}
status, ok := s.statusMap[req.Service]
if !ok {
return &health.HealthCheckResponse{
Status: health.HealthCheckResponse_SERVICE_UNKNOWN,
}, nil
}
return &health.HealthCheckResponse{
Status: status,
}, nil
}
func (s *healthServer) Watch(req *health.HealthCheckRequest, stream health.HealthService_WatchServer) error {
service := req.Service
// Register watcher
s.mu.Lock()
if _, ok := s.statusWatchers[service]; !ok {
s.statusWatchers[service] = make([]health.HealthService_WatchServer, 0)
}
s.statusWatchers[service] = append(s.statusWatchers[service], stream)
s.mu.Unlock()
// Send initial status
status := health.HealthCheckResponse_SERVICE_UNKNOWN
s.mu.RLock()
if st, ok := s.statusMap[service]; ok {
status = st
} else if service == "" {
status = health.HealthCheckResponse_SERVING
}
s.mu.RUnlock()
if err := stream.Send(&health.HealthCheckResponse{Status: status}); err != nil {
return err
}
// Keep the stream open until the client disconnects
<-stream.Context().Done()
return nil
}
Production Deployment Strategies
Deploying gRPC services in production requires careful consideration of infrastructure, monitoring, and scaling strategies.