Production Docker: Security, Performance, and Best Practices

This final section covers everything you need to know about running Docker in production environments, including security hardening, performance optimization, monitoring, and operational best practices.

Production Security Best Practices

Container Security Fundamentals

# Secure Dockerfile example
FROM node:16-alpine AS builder

# Create app directory
WORKDIR /usr/src/app

# Copy package files
COPY package*.json ./

# Install dependencies
RUN npm ci --only=production && npm cache clean --force

# Production stage
FROM node:16-alpine

# Install security updates
RUN apk update && apk upgrade && apk add --no-cache dumb-init

# Create non-root user
RUN addgroup -g 1001 -S nodejs && \
    adduser -S nextjs -u 1001

# Set working directory
WORKDIR /usr/src/app

# Copy dependencies from builder stage
COPY --from=builder /usr/src/app/node_modules ./node_modules

# Copy application code
COPY --chown=nextjs:nodejs . .

# Switch to non-root user
USER nextjs

# Expose port
EXPOSE 3000

# Use dumb-init for proper signal handling
ENTRYPOINT ["dumb-init", "--"]

# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
  CMD node healthcheck.js

# Start application
CMD ["node", "server.js"]

Runtime Security Configuration

# Run containers with security options
docker run -d \
  --name secure-app \
  --user 1001:1001 \
  --read-only \
  --tmpfs /tmp \
  --tmpfs /var/cache \
  --security-opt no-new-privileges:true \
  --cap-drop ALL \
  --cap-add NET_BIND_SERVICE \
  --memory 512m \
  --cpus 1.0 \
  my-app:latest

# Use Docker secrets for sensitive data
echo "my-secret-password" | docker secret create db_password -
docker service create \
  --name web \
  --secret db_password \
  my-app:latest

Image Security Scanning

# Scan images for vulnerabilities
docker scout cves my-app:latest

# Use Trivy for comprehensive scanning
docker run --rm -v /var/run/docker.sock:/var/run/docker.sock \
  aquasec/trivy:latest image my-app:latest

# Scan during build process
docker build -t my-app:latest .
docker run --rm -v /var/run/docker.sock:/var/run/docker.sock \
  aquasec/trivy:latest image my-app:latest --exit-code 1

Secure Compose Configuration

# docker-compose.prod.yml
version: '3.8'

services:
  web:
    image: my-app:latest
    user: "1001:1001"
    read_only: true
    tmpfs:
      - /tmp:rw,noexec,nosuid,size=100m
      - /var/cache:rw,noexec,nosuid,size=50m
    security_opt:
      - no-new-privileges:true
    cap_drop:
      - ALL
    cap_add:
      - NET_BIND_SERVICE
    deploy:
      resources:
        limits:
          cpus: '1.0'
          memory: 512M
        reservations:
          cpus: '0.5'
          memory: 256M
    networks:
      - frontend
    secrets:
      - db_password
      - api_key

  database:
    image: postgres:13-alpine
    user: "999:999"
    read_only: true
    tmpfs:
      - /tmp
      - /var/run/postgresql
    security_opt:
      - no-new-privileges:true
    cap_drop:
      - ALL
    cap_add:
      - CHOWN
      - DAC_OVERRIDE
      - FOWNER
      - SETGID
      - SETUID
    volumes:
      - postgres_data:/var/lib/postgresql/data
    networks:
      - backend
    secrets:
      - db_password
    environment:
      - POSTGRES_PASSWORD_FILE=/run/secrets/db_password

secrets:
  db_password:
    external: true
  api_key:
    external: true

networks:
  frontend:
    driver: bridge
  backend:
    driver: bridge
    internal: true

volumes:
  postgres_data:
    driver: local

Performance Optimization

Image Optimization

# Multi-stage build for minimal production image
FROM node:16-alpine AS dependencies
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production && npm cache clean --force

FROM node:16-alpine AS build
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build

FROM node:16-alpine AS runtime
RUN apk add --no-cache dumb-init
WORKDIR /app
RUN addgroup -g 1001 -S nodejs && adduser -S nextjs -u 1001
COPY --from=dependencies /app/node_modules ./node_modules
COPY --from=build /app/dist ./dist
COPY --from=build /app/public ./public
USER nextjs
EXPOSE 3000
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "dist/server.js"]

Resource Management

# Resource-optimized compose file
version: '3.8'

services:
  web:
    image: my-app:latest
    deploy:
      resources:
        limits:
          cpus: '2.0'
          memory: 1G
        reservations:
          cpus: '1.0'
          memory: 512M
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
        window: 120s
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  database:
    image: postgres:13-alpine
    deploy:
      resources:
        limits:
          cpus: '1.0'
          memory: 2G
        reservations:
          cpus: '0.5'
          memory: 1G
    volumes:
      - postgres_data:/var/lib/postgresql/data
    environment:
      - POSTGRES_SHARED_BUFFERS=256MB
      - POSTGRES_EFFECTIVE_CACHE_SIZE=1GB
      - POSTGRES_WORK_MEM=4MB

volumes:
  postgres_data:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: /opt/postgres/data

Caching Strategies

# Optimize Docker layer caching
FROM node:16-alpine

WORKDIR /app

# Copy package files first (better caching)
COPY package*.json ./
RUN npm ci --only=production

# Copy source code last
COPY . .

# Use .dockerignore to exclude unnecessary files
# .dockerignore
node_modules
npm-debug.log
.git
.gitignore
README.md
.env
.nyc_output
coverage
.nyc_output
.coverage
.vscode
.idea
*.swp
*.swo
*~

Build Optimization

# Use BuildKit for faster builds
export DOCKER_BUILDKIT=1
docker build -t my-app:latest .

# Multi-platform builds
docker buildx create --use
docker buildx build --platform linux/amd64,linux/arm64 -t my-app:latest --push .

# Build with cache mounts
docker build \
  --build-arg BUILDKIT_INLINE_CACHE=1 \
  --cache-from my-app:cache \
  -t my-app:latest .

Monitoring and Logging

Application Monitoring

# docker-compose.monitoring.yml
version: '3.8'

services:
  # Application
  app:
    image: my-app:latest
    environment:
      - METRICS_ENABLED=true
      - METRICS_PORT=9090
    ports:
      - "3000:3000"
      - "9090:9090"  # Metrics endpoint

  # Prometheus
  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9091:9090"
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=200h'
      - '--web.enable-lifecycle'

  # Grafana
  grafana:
    image: grafana/grafana:latest
    ports:
      - "3001:3000"
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin123
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning

  # Node Exporter
  node-exporter:
    image: prom/node-exporter:latest
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.rootfs=/rootfs'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'

  # cAdvisor
  cadvisor:
    image: gcr.io/cadvisor/cadvisor:latest
    ports:
      - "8080:8080"
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro

volumes:
  prometheus_data:
  grafana_data:

Centralized Logging

# docker-compose.logging.yml
version: '3.8'

services:
  # Application with structured logging
  app:
    image: my-app:latest
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
        labels: "service=app,environment=production"
    environment:
      - LOG_LEVEL=info
      - LOG_FORMAT=json

  # ELK Stack
  elasticsearch:
    image: elasticsearch:7.14.0
    environment:
      - discovery.type=single-node
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
    volumes:
      - elasticsearch_data:/usr/share/elasticsearch/data
    ports:
      - "9200:9200"

  logstash:
    image: logstash:7.14.0
    volumes:
      - ./logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml:ro
      - ./logstash/pipeline:/usr/share/logstash/pipeline:ro
    ports:
      - "5044:5044"
    depends_on:
      - elasticsearch

  kibana:
    image: kibana:7.14.0
    ports:
      - "5601:5601"
    environment:
      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
    depends_on:
      - elasticsearch

  # Filebeat for log shipping
  filebeat:
    image: elastic/filebeat:7.14.0
    user: root
    volumes:
      - ./filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
      - /var/run/docker.sock:/var/run/docker.sock:ro
    depends_on:
      - logstash

volumes:
  elasticsearch_data:

High Availability and Scaling

Load Balancing

# docker-compose.ha.yml
version: '3.8'

services:
  # Load Balancer
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./nginx/ssl:/etc/nginx/ssl:ro
    depends_on:
      - app
    deploy:
      replicas: 2

  # Application (multiple instances)
  app:
    image: my-app:latest
    deploy:
      replicas: 3
      update_config:
        parallelism: 1
        delay: 10s
        failure_action: rollback
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
    environment:
      - NODE_ENV=production
    depends_on:
      - database
      - redis

  # Database with replication
  database:
    image: postgres:13-alpine
    environment:
      - POSTGRES_REPLICATION_MODE=master
      - POSTGRES_REPLICATION_USER=replicator
      - POSTGRES_REPLICATION_PASSWORD=replicator_password
    volumes:
      - postgres_master_data:/var/lib/postgresql/data

  database-replica:
    image: postgres:13-alpine
    environment:
      - POSTGRES_REPLICATION_MODE=slave
      - POSTGRES_REPLICATION_USER=replicator
      - POSTGRES_REPLICATION_PASSWORD=replicator_password
      - POSTGRES_MASTER_HOST=database
    depends_on:
      - database

  # Redis Cluster
  redis:
    image: redis:6-alpine
    command: redis-server --appendonly yes --cluster-enabled yes
    deploy:
      replicas: 3

volumes:
  postgres_master_data:

Health Checks and Circuit Breakers

# Application with health check
FROM node:16-alpine

WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
COPY . .

# Health check endpoint
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
  CMD node healthcheck.js

EXPOSE 3000
CMD ["node", "server.js"]
// healthcheck.js
const http = require('http');

const options = {
  hostname: 'localhost',
  port: 3000,
  path: '/health',
  method: 'GET',
  timeout: 2000
};

const req = http.request(options, (res) => {
  if (res.statusCode === 200) {
    process.exit(0);
  } else {
    process.exit(1);
  }
});

req.on('error', () => {
  process.exit(1);
});

req.on('timeout', () => {
  req.destroy();
  process.exit(1);
});

req.end();

Backup and Disaster Recovery

Database Backups

# Automated backup script
#!/bin/bash
BACKUP_DIR="/backups"
DATE=$(date +%Y%m%d_%H%M%S)
CONTAINER_NAME="postgres-db"

# Create backup
docker exec $CONTAINER_NAME pg_dump -U user -d myapp > $BACKUP_DIR/backup_$DATE.sql

# Compress backup
gzip $BACKUP_DIR/backup_$DATE.sql

# Remove old backups (keep last 7 days)
find $BACKUP_DIR -name "backup_*.sql.gz" -mtime +7 -delete

# Upload to S3 (optional)
aws s3 cp $BACKUP_DIR/backup_$DATE.sql.gz s3://my-backups/database/

Volume Backups

# Backup named volume
docker run --rm \
  -v postgres_data:/data:ro \
  -v $(pwd):/backup \
  alpine \
  tar czf /backup/postgres_backup_$(date +%Y%m%d).tar.gz -C /data .

# Restore volume
docker run --rm \
  -v postgres_data:/data \
  -v $(pwd):/backup \
  alpine \
  tar xzf /backup/postgres_backup_20231201.tar.gz -C /data

Disaster Recovery Plan

# docker-compose.dr.yml
version: '3.8'

services:
  # Primary application
  app-primary:
    image: my-app:latest
    environment:
      - DATABASE_URL=postgresql://user:pass@db-primary:5432/myapp
      - REDIS_URL=redis://redis-primary:6379
    depends_on:
      - db-primary
      - redis-primary

  # Standby application
  app-standby:
    image: my-app:latest
    environment:
      - DATABASE_URL=postgresql://user:pass@db-standby:5432/myapp
      - REDIS_URL=redis://redis-standby:6379
    depends_on:
      - db-standby
      - redis-standby
    profiles:
      - disaster-recovery

  # Database replication
  db-primary:
    image: postgres:13-alpine
    environment:
      - POSTGRES_REPLICATION_MODE=master
    volumes:
      - postgres_primary_data:/var/lib/postgresql/data

  db-standby:
    image: postgres:13-alpine
    environment:
      - POSTGRES_REPLICATION_MODE=slave
      - POSTGRES_MASTER_HOST=db-primary
    volumes:
      - postgres_standby_data:/var/lib/postgresql/data
    profiles:
      - disaster-recovery

volumes:
  postgres_primary_data:
  postgres_standby_data:

CI/CD Integration

GitLab CI Pipeline

# .gitlab-ci.yml
stages:
  - test
  - build
  - security
  - deploy

variables:
  DOCKER_DRIVER: overlay2
  DOCKER_TLS_CERTDIR: "/certs"

services:
  - docker:20.10.16-dind

before_script:
  - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY

test:
  stage: test
  script:
    - docker build -t $CI_PROJECT_NAME:test --target test .
    - docker run --rm $CI_PROJECT_NAME:test npm test

build:
  stage: build
  script:
    - docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
    - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
    - docker tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA $CI_REGISTRY_IMAGE:latest
    - docker push $CI_REGISTRY_IMAGE:latest

security_scan:
  stage: security
  script:
    - docker run --rm -v /var/run/docker.sock:/var/run/docker.sock 
      aquasec/trivy:latest image --exit-code 1 $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA

deploy_staging:
  stage: deploy
  script:
    - docker-compose -f docker-compose.staging.yml pull
    - docker-compose -f docker-compose.staging.yml up -d
  environment:
    name: staging
    url: https://staging.example.com
  only:
    - develop

deploy_production:
  stage: deploy
  script:
    - docker-compose -f docker-compose.prod.yml pull
    - docker-compose -f docker-compose.prod.yml up -d
  environment:
    name: production
    url: https://example.com
  only:
    - main
  when: manual

GitHub Actions

# .github/workflows/docker.yml
name: Docker Build and Deploy

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    
    - name: Build test image
      run: docker build -t test-image --target test .
    
    - name: Run tests
      run: docker run --rm test-image npm test

  build-and-push:
    runs-on: ubuntu-latest
    needs: test
    permissions:
      contents: read
      packages: write
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Log in to Container Registry
      uses: docker/login-action@v2
      with:
        registry: ${{ env.REGISTRY }}
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
    
    - name: Extract metadata
      id: meta
      uses: docker/metadata-action@v4
      with:
        images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
    
    - name: Build and push Docker image
      uses: docker/build-push-action@v4
      with:
        context: .
        push: true
        tags: ${{ steps.meta.outputs.tags }}
        labels: ${{ steps.meta.outputs.labels }}

  deploy:
    runs-on: ubuntu-latest
    needs: build-and-push
    if: github.ref == 'refs/heads/main'
    
    steps:
    - uses: actions/checkout@v3
    
    - name: Deploy to production
      run: |
        docker-compose -f docker-compose.prod.yml pull
        docker-compose -f docker-compose.prod.yml up -d

Troubleshooting Production Issues

Common Production Problems

# Container keeps restarting
docker logs container-name
docker inspect container-name --format='{{.State.ExitCode}}'
docker inspect container-name --format='{{.State.Error}}'

# High memory usage
docker stats --no-stream
docker exec container-name ps aux --sort=-%mem | head

# Network connectivity issues
docker exec container-name ping other-container
docker exec container-name nslookup service-name
docker network inspect network-name

# Volume permission issues
docker exec -u root container-name ls -la /data
docker exec -u root container-name chown -R appuser:appuser /data

# Performance issues
docker exec container-name top
docker exec container-name iostat -x 1
docker system df  # Check disk usage

Debugging Tools

# Debug container with tools
docker run -it --rm \
  --network container:target-container \
  --pid container:target-container \
  nicolaka/netshoot

# System resource monitoring
docker run --rm -it \
  --pid host \
  --privileged \
  alpine htop

# Container filesystem analysis
docker run --rm -it \
  -v /var/lib/docker:/var/lib/docker:ro \
  wagoodman/dive:latest image-name

Summary

In this comprehensive section, you learned:

Production Security

  • Container hardening techniques
  • Runtime security configurations
  • Image vulnerability scanning
  • Secrets management

Performance Optimization

  • Image size optimization strategies
  • Resource management and limits
  • Caching and build optimization
  • Multi-stage build patterns

Monitoring and Operations

  • Application monitoring with Prometheus/Grafana
  • Centralized logging with ELK stack
  • Health checks and circuit breakers
  • High availability patterns

DevOps Integration

  • CI/CD pipeline integration
  • Automated testing and security scanning
  • Backup and disaster recovery
  • Troubleshooting production issues

Key Takeaways:

  • Always run containers as non-root users in production
  • Implement comprehensive monitoring and logging
  • Use multi-stage builds to minimize image size
  • Automate security scanning in your CI/CD pipeline
  • Plan for disaster recovery and backup strategies
  • Monitor resource usage and set appropriate limits

Congratulations! You’ve completed the Docker Fundamentals guide. You now have the knowledge to:

  • Build and deploy containerized applications
  • Implement security best practices
  • Optimize performance for production workloads
  • Monitor and troubleshoot Docker applications
  • Integrate Docker into CI/CD pipelines

Continue your Docker journey by exploring Kubernetes for container orchestration, Docker Swarm for clustering, or specialized topics like serverless containers with AWS Fargate or Google Cloud Run.