Docker Compose Best Practices and Optimization

This section covers production-ready best practices, security hardening, performance optimization, and operational excellence for Docker Compose deployments.

Security Best Practices

Container Security Hardening

version: '3.8'

services:
  web:
    build: ./web
    # Security configurations
    user: "1000:1000"  # Non-root user
    read_only: true     # Read-only filesystem
    cap_drop:
      - ALL
    cap_add:
      - CHOWN
      - SETGID
      - SETUID
    security_opt:
      - no-new-privileges:true
      - apparmor:docker-default
    
    # Temporary filesystems for writable areas
    tmpfs:
      - /tmp:noexec,nosuid,size=100m
      - /var/cache:noexec,nosuid,size=50m
    
    # Resource limits
    mem_limit: 512m
    memswap_limit: 512m
    cpu_count: 2
    pids_limit: 100
    
    # Health checks
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  database:
    image: postgres:13-alpine
    user: postgres
    read_only: true
    cap_drop:
      - ALL
    cap_add:
      - CHOWN
      - DAC_OVERRIDE
      - FOWNER
      - SETGID
      - SETUID
    security_opt:
      - no-new-privileges:true
    
    # Secure volume mounts
    volumes:
      - postgres_data:/var/lib/postgresql/data:Z
      - /dev/shm:/dev/shm:rw,noexec,nosuid,size=100m
    
    tmpfs:
      - /tmp:noexec,nosuid,size=50m
      - /run:noexec,nosuid,size=50m

volumes:
  postgres_data:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: /secure/postgres/data

Secrets Management

version: '3.8'

services:
  app:
    build: ./app
    secrets:
      - db_password
      - api_key
      - ssl_cert
      - ssl_key
    environment:
      - DB_PASSWORD_FILE=/run/secrets/db_password
      - API_KEY_FILE=/run/secrets/api_key
      - SSL_CERT_FILE=/run/secrets/ssl_cert
      - SSL_KEY_FILE=/run/secrets/ssl_key

  vault:
    image: vault:latest
    cap_add:
      - IPC_LOCK
    environment:
      - VAULT_DEV_ROOT_TOKEN_ID=${VAULT_ROOT_TOKEN}
      - VAULT_DEV_LISTEN_ADDRESS=0.0.0.0:8200
    ports:
      - "8200:8200"
    volumes:
      - vault_data:/vault/data
      - ./vault-config:/vault/config

secrets:
  db_password:
    external: true
  api_key:
    external: true
  ssl_cert:
    file: ./secrets/ssl/cert.pem
  ssl_key:
    file: ./secrets/ssl/key.pem

volumes:
  vault_data:

Network Security

version: '3.8'

services:
  # WAF/Reverse Proxy
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./nginx/ssl:/etc/nginx/ssl:ro
      - ./nginx/modsecurity:/etc/nginx/modsecurity:ro
    networks:
      - frontend
    depends_on:
      - app

  app:
    build: ./app
    networks:
      - frontend
      - backend
    # No exposed ports - only accessible through nginx

  database:
    image: postgres:13
    networks:
      - backend  # Isolated from frontend
    environment:
      - POSTGRES_SSL_MODE=require

networks:
  frontend:
    driver: bridge
    driver_opts:
      com.docker.network.bridge.name: frontend
      com.docker.network.bridge.enable_icc: "false"
  backend:
    driver: bridge
    internal: true  # No external access
    driver_opts:
      com.docker.network.bridge.name: backend
      com.docker.network.bridge.enable_icc: "true"

Performance Optimization

Resource Management

version: '3.8'

services:
  web:
    build: ./web
    deploy:
      resources:
        limits:
          cpus: '1.0'
          memory: 1G
        reservations:
          cpus: '0.5'
          memory: 512M
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
        window: 120s
    
    # Optimize for performance
    init: true  # Proper signal handling
    stop_grace_period: 30s
    
    # Logging optimization
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
        compress: "true"

  database:
    image: postgres:13
    deploy:
      resources:
        limits:
          cpus: '2.0'
          memory: 2G
        reservations:
          cpus: '1.0'
          memory: 1G
    
    # Database-specific optimizations
    shm_size: 256m
    command: |
      postgres
      -c shared_buffers=512MB
      -c effective_cache_size=1GB
      -c maintenance_work_mem=128MB
      -c checkpoint_completion_target=0.9
      -c wal_buffers=16MB
      -c default_statistics_target=100
      -c random_page_cost=1.1
      -c effective_io_concurrency=200
    
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - postgres_logs:/var/log/postgresql

volumes:
  postgres_data:
  postgres_logs:

Caching Strategies

version: '3.8'

services:
  app:
    build: ./app
    environment:
      - REDIS_URL=redis://redis:6379/0
      - MEMCACHED_URL=memcached:11211
    depends_on:
      - redis
      - memcached

  # Redis for session storage and caching
  redis:
    image: redis:7-alpine
    command: |
      redis-server
      --maxmemory 512mb
      --maxmemory-policy allkeys-lru
      --save 900 1
      --save 300 10
      --save 60 10000
      --appendonly yes
      --appendfsync everysec
    volumes:
      - redis_data:/data
    sysctls:
      - net.core.somaxconn=65535

  # Memcached for application caching
  memcached:
    image: memcached:alpine
    command: memcached -m 256 -c 1024 -I 4m
    
  # Varnish for HTTP caching
  varnish:
    image: varnish:stable
    ports:
      - "80:80"
    volumes:
      - ./varnish/default.vcl:/etc/varnish/default.vcl:ro
    environment:
      - VARNISH_SIZE=256M
    depends_on:
      - app
    command: |
      varnishd -F
      -a :80
      -T :6082
      -f /etc/varnish/default.vcl
      -s malloc,256m

volumes:
  redis_data:

Database Optimization

version: '3.8'

services:
  # Master Database
  postgres-master:
    image: postgres:13
    environment:
      - POSTGRES_REPLICATION_MODE=master
      - POSTGRES_REPLICATION_USER=replicator
      - POSTGRES_REPLICATION_PASSWORD=${REPLICATION_PASSWORD}
    volumes:
      - postgres_master_data:/var/lib/postgresql/data
      - ./postgres/master.conf:/etc/postgresql/postgresql.conf
      - ./postgres/pg_hba.conf:/etc/postgresql/pg_hba.conf
    command: |
      postgres
      -c config_file=/etc/postgresql/postgresql.conf
      -c hba_file=/etc/postgresql/pg_hba.conf

  # Read Replica
  postgres-replica:
    image: postgres:13
    environment:
      - POSTGRES_REPLICATION_MODE=slave
      - POSTGRES_MASTER_HOST=postgres-master
      - POSTGRES_REPLICATION_USER=replicator
      - POSTGRES_REPLICATION_PASSWORD=${REPLICATION_PASSWORD}
    volumes:
      - postgres_replica_data:/var/lib/postgresql/data
    depends_on:
      - postgres-master

  # Connection Pooler
  pgbouncer:
    image: pgbouncer/pgbouncer:latest
    environment:
      - DATABASES_HOST=postgres-master
      - DATABASES_PORT=5432
      - DATABASES_USER=${DB_USER}
      - DATABASES_PASSWORD=${DB_PASSWORD}
      - DATABASES_DBNAME=${DB_NAME}
      - POOL_MODE=transaction
      - MAX_CLIENT_CONN=200
      - DEFAULT_POOL_SIZE=50
      - SERVER_RESET_QUERY=DISCARD ALL
    depends_on:
      - postgres-master

volumes:
  postgres_master_data:
  postgres_replica_data:

Monitoring and Observability

Comprehensive Monitoring Stack

version: '3.8'

services:
  # Application with metrics
  app:
    build: ./app
    environment:
      - METRICS_ENABLED=true
      - METRICS_PORT=9090
    labels:
      - "prometheus.io/scrape=true"
      - "prometheus.io/port=9090"
      - "prometheus.io/path=/metrics"

  # Prometheus
  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - ./prometheus/rules:/etc/prometheus/rules
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=30d'
      - '--web.enable-lifecycle'

  # Grafana
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
      - GF_USERS_ALLOW_SIGN_UP=false
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning

  # AlertManager
  alertmanager:
    image: prom/alertmanager:latest
    ports:
      - "9093:9093"
    volumes:
      - ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
      - alertmanager_data:/alertmanager

  # Node Exporter
  node-exporter:
    image: prom/node-exporter:latest
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'

  # cAdvisor for container metrics
  cadvisor:
    image: gcr.io/cadvisor/cadvisor:latest
    ports:
      - "8080:8080"
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:rw
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro

volumes:
  prometheus_data:
  grafana_data:
  alertmanager_data:

Logging Best Practices

version: '3.8'

services:
  app:
    build: ./app
    logging:
      driver: "fluentd"
      options:
        fluentd-address: "fluentd:24224"
        tag: "app.{{.Name}}"
        fluentd-async-connect: "true"
        fluentd-retry-wait: "1s"
        fluentd-max-retries: "30"

  # Fluentd Log Aggregator
  fluentd:
    build: ./fluentd
    ports:
      - "24224:24224"
    volumes:
      - ./fluentd/conf:/fluentd/etc
      - fluentd_data:/fluentd/log
    environment:
      - FLUENTD_CONF=fluent.conf
    depends_on:
      - elasticsearch

  # Elasticsearch
  elasticsearch:
    image: elasticsearch:7.17.0
    environment:
      - discovery.type=single-node
      - "ES_JAVA_OPTS=-Xms1g -Xmx1g"
      - xpack.security.enabled=false
    volumes:
      - elasticsearch_data:/usr/share/elasticsearch/data
    ulimits:
      memlock:
        soft: -1
        hard: -1

  # Kibana
  kibana:
    image: kibana:7.17.0
    ports:
      - "5601:5601"
    environment:
      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
    depends_on:
      - elasticsearch

volumes:
  fluentd_data:
  elasticsearch_data:

Deployment Best Practices

Blue-Green Deployment

# docker-compose.blue.yml
version: '3.8'

services:
  app-blue:
    build: ./app
    image: myapp:${BLUE_VERSION}
    environment:
      - ENVIRONMENT=blue
      - VERSION=${BLUE_VERSION}
    networks:
      - app-network
    labels:
      - "deployment=blue"

  nginx-blue:
    image: nginx:alpine
    ports:
      - "8080:80"
    volumes:
      - ./nginx/blue.conf:/etc/nginx/nginx.conf
    depends_on:
      - app-blue
    networks:
      - app-network

networks:
  app-network:
    external: true
# docker-compose.green.yml
version: '3.8'

services:
  app-green:
    build: ./app
    image: myapp:${GREEN_VERSION}
    environment:
      - ENVIRONMENT=green
      - VERSION=${GREEN_VERSION}
    networks:
      - app-network
    labels:
      - "deployment=green"

  nginx-green:
    image: nginx:alpine
    ports:
      - "8081:80"
    volumes:
      - ./nginx/green.conf:/etc/nginx/nginx.conf
    depends_on:
      - app-green
    networks:
      - app-network

networks:
  app-network:
    external: true

Health Checks and Graceful Shutdown

version: '3.8'

services:
  app:
    build: ./app
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    
    # Graceful shutdown
    stop_signal: SIGTERM
    stop_grace_period: 30s
    
    # Proper init system
    init: true
    
    environment:
      - SHUTDOWN_TIMEOUT=25  # Less than stop_grace_period

  database:
    image: postgres:13
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 30s
    
    stop_grace_period: 60s  # Allow time for checkpoint

Summary

This section covered production-ready best practices:

Security Excellence

  • Container Hardening: Non-root users, read-only filesystems, capability dropping
  • Secrets Management: External secrets and secure credential handling
  • Network Security: WAF integration, network isolation, and SSL/TLS

Performance Optimization

  • Resource Management: CPU and memory limits with proper reservations
  • Caching Strategies: Multi-layer caching with Redis, Memcached, and Varnish
  • Database Optimization: Master-slave replication and connection pooling

Operational Excellence

  • Monitoring: Comprehensive metrics, logging, and alerting
  • Deployment Patterns: Blue-green deployments and graceful shutdowns
  • Health Checks: Proper readiness and liveness probes

Next Steps: Part 6 demonstrates complete real-world implementations that combine all these best practices into production-ready systems with full CI/CD integration and operational monitoring.