Monitoring and Compliance

Effective monitoring and compliance automation are essential for maintaining reliable, secure AWS infrastructure at scale. Terraform enables you to implement comprehensive observability and compliance controls as code, ensuring consistent monitoring across all your resources and automated compliance validation.

This part covers patterns for implementing monitoring, logging, alerting, and compliance automation using AWS native services and Terraform.

CloudWatch Monitoring Foundation

Establish comprehensive CloudWatch monitoring for all critical resources:

# CloudWatch Log Groups with proper retention
resource "aws_cloudwatch_log_group" "application_logs" {
  for_each = var.log_groups
  
  name              = "/aws/${each.key}/${var.application_name}"
  retention_in_days = each.value.retention_days
  kms_key_id        = var.log_encryption_key_id
  
  tags = merge(var.common_tags, {
    LogType     = each.value.log_type
    Application = var.application_name
  })
}

# Custom CloudWatch Metrics
resource "aws_cloudwatch_metric_alarm" "application_errors" {
  for_each = var.error_alarms
  
  alarm_name          = "${var.application_name}-${each.key}-errors"
  comparison_operator = "GreaterThanThreshold"
  evaluation_periods  = each.value.evaluation_periods
  metric_name         = each.value.metric_name
  namespace           = each.value.namespace
  period              = each.value.period
  statistic           = each.value.statistic
  threshold           = each.value.threshold
  alarm_description   = "High error rate for ${each.key}"
  treat_missing_data  = "notBreaching"
  
  dimensions = each.value.dimensions
  
  alarm_actions = [
    aws_sns_topic.alerts.arn
  ]
  
  ok_actions = [
    aws_sns_topic.alerts.arn
  ]
  
  tags = var.common_tags
}

# Composite Alarms for complex conditions
resource "aws_cloudwatch_composite_alarm" "application_health" {
  alarm_name        = "${var.application_name}-overall-health"
  alarm_description = "Overall application health based on multiple metrics"
  
  alarm_rule = join(" OR ", [
    for alarm in aws_cloudwatch_metric_alarm.application_errors :
    "ALARM(${alarm.alarm_name})"
  ])
  
  actions_enabled = true
  alarm_actions = [
    aws_sns_topic.critical_alerts.arn
  ]
  
  ok_actions = [
    aws_sns_topic.critical_alerts.arn
  ]
  
  tags = var.common_tags
}

# CloudWatch Dashboard
resource "aws_cloudwatch_dashboard" "application" {
  dashboard_name = "${var.application_name}-dashboard"
  
  dashboard_body = jsonencode({
    widgets = [
      {
        type   = "metric"
        x      = 0
        y      = 0
        width  = 12
        height = 6
        
        properties = {
          metrics = [
            ["AWS/ApplicationELB", "RequestCount", "LoadBalancer", var.load_balancer_arn_suffix],
            [".", "TargetResponseTime", ".", "."],
            [".", "HTTPCode_Target_2XX_Count", ".", "."],
            [".", "HTTPCode_Target_4XX_Count", ".", "."],
            [".", "HTTPCode_Target_5XX_Count", ".", "."]
          ]
          view    = "timeSeries"
          stacked = false
          region  = data.aws_region.current.name
          title   = "Load Balancer Metrics"
          period  = 300
        }
      },
      {
        type   = "log"
        x      = 0
        y      = 6
        width  = 24
        height = 6
        
        properties = {
          query   = "SOURCE '${aws_cloudwatch_log_group.application_logs["app"].name}' | fields @timestamp, @message | filter @message like /ERROR/ | sort @timestamp desc | limit 100"
          region  = data.aws_region.current.name
          title   = "Recent Errors"
        }
      }
    ]
  })
}

AWS Config for Compliance

Implement AWS Config for continuous compliance monitoring:

# Config Configuration Recorder
resource "aws_config_configuration_recorder" "main" {
  name     = "${var.organization_name}-config-recorder"
  role_arn = aws_iam_role.config.arn
  
  recording_group {
    all_supported                 = true
    include_global_resource_types = true
    
    exclusion_by_resource_types {
      resource_types = var.config_excluded_resource_types
    }
  }
  
  depends_on = [aws_config_delivery_channel.main]
}

# Config Delivery Channel
resource "aws_config_delivery_channel" "main" {
  name           = "${var.organization_name}-config-delivery"
  s3_bucket_name = aws_s3_bucket.config_logs.bucket
  s3_key_prefix  = "config"
  
  snapshot_delivery_properties {
    delivery_frequency = "TwentyFour_Hours"
  }
}

# Config Rules for Compliance
resource "aws_config_config_rule" "compliance_rules" {
  for_each = var.config_rules
  
  name = "${var.organization_name}-${each.key}"
  
  source {
    owner             = each.value.source_owner
    source_identifier = each.value.source_identifier
  }
  
  dynamic "source_detail" {
    for_each = each.value.source_details
    content {
      event_source                = source_detail.value.event_source
      message_type               = source_detail.value.message_type
      maximum_execution_frequency = source_detail.value.maximum_execution_frequency
    }
  }
  
  input_parameters = jsonencode(each.value.input_parameters)
  
  depends_on = [aws_config_configuration_recorder.main]
  
  tags = var.common_tags
}

# Config Remediation Configurations
resource "aws_config_remediation_configuration" "auto_remediation" {
  for_each = var.auto_remediation_rules
  
  config_rule_name = aws_config_config_rule.compliance_rules[each.key].name
  
  resource_type    = each.value.resource_type
  target_type      = "SSM_DOCUMENT"
  target_id        = each.value.ssm_document_name
  target_version   = "1"
  
  parameter {
    name           = "AutomationAssumeRole"
    static_value   = aws_iam_role.config_remediation.arn
  }
  
  dynamic "parameter" {
    for_each = each.value.parameters
    content {
      name         = parameter.key
      static_value = parameter.value
    }
  }
  
  automatic                = each.value.automatic
  maximum_automatic_attempts = each.value.maximum_automatic_attempts
}

# Config Conformance Packs
resource "aws_config_conformance_pack" "security_pack" {
  name = "${var.organization_name}-security-conformance-pack"
  
  template_body = file("${path.module}/conformance-packs/security-pack.yaml")
  
  input_parameter {
    parameter_name  = "AccessLoggingBucketParameter"
    parameter_value = aws_s3_bucket.access_logs.bucket
  }
  
  depends_on = [aws_config_configuration_recorder.main]
}

Security Hub Integration

Centralize security findings with AWS Security Hub:

# Enable Security Hub
resource "aws_securityhub_account" "main" {
  enable_default_standards = true
}

# Security Standards Subscriptions
resource "aws_securityhub_standards_subscription" "aws_foundational" {
  standards_arn = "arn:aws:securityhub:::ruleset/finding-format/aws-foundational-security-standard/v/1.0.0"
  depends_on    = [aws_securityhub_account.main]
}

resource "aws_securityhub_standards_subscription" "cis" {
  standards_arn = "arn:aws:securityhub:::ruleset/finding-format/cis-aws-foundations-benchmark/v/1.2.0"
  depends_on    = [aws_securityhub_account.main]
}

resource "aws_securityhub_standards_subscription" "pci_dss" {
  count = var.enable_pci_dss ? 1 : 0
  
  standards_arn = "arn:aws:securityhub:::ruleset/finding-format/pci-dss/v/3.2.1"
  depends_on    = [aws_securityhub_account.main]
}

# Custom Security Hub Insights
resource "aws_securityhub_insight" "high_severity_findings" {
  filters {
    severity_label {
      comparison = "EQUALS"
      value      = "HIGH"
    }
    
    record_state {
      comparison = "EQUALS"
      value      = "ACTIVE"
    }
  }
  
  group_by_attribute = "ResourceId"
  name              = "High Severity Active Findings"
  
  depends_on = [aws_securityhub_account.main]
}

# EventBridge Rule for Security Hub Findings
resource "aws_cloudwatch_event_rule" "security_hub_findings" {
  name        = "${var.organization_name}-security-hub-findings"
  description = "Capture Security Hub findings"
  
  event_pattern = jsonencode({
    source      = ["aws.securityhub"]
    detail-type = ["Security Hub Findings - Imported"]
    detail = {
      findings = {
        Severity = {
          Label = ["HIGH", "CRITICAL"]
        }
        RecordState = ["ACTIVE"]
      }
    }
  })
}

resource "aws_cloudwatch_event_target" "security_hub_sns" {
  rule      = aws_cloudwatch_event_rule.security_hub_findings.name
  target_id = "SecurityHubSNSTarget"
  arn       = aws_sns_topic.security_alerts.arn
}

GuardDuty Threat Detection

Implement GuardDuty for threat detection and response:

# Enable GuardDuty
resource "aws_guardduty_detector" "main" {
  enable = true
  
  datasources {
    s3_logs {
      enable = true
    }
    kubernetes {
      audit_logs {
        enable = var.enable_eks_audit_logs
      }
    }
    malware_protection {
      scan_ec2_instance_with_findings {
        ebs_volumes {
          enable = true
        }
      }
    }
  }
  
  finding_publishing_frequency = "FIFTEEN_MINUTES"
  
  tags = var.common_tags
}

# GuardDuty Threat Intel Set
resource "aws_guardduty_threatintelset" "custom_threats" {
  count = length(var.threat_intel_sets) > 0 ? 1 : 0
  
  activate    = true
  detector_id = aws_guardduty_detector.main.id
  format      = "TXT"
  location    = "s3://${aws_s3_bucket.threat_intel[0].bucket}/threat-intel.txt"
  name        = "${var.organization_name}-custom-threat-intel"
  
  tags = var.common_tags
}

# GuardDuty IP Set for trusted IPs
resource "aws_guardduty_ipset" "trusted_ips" {
  count = length(var.trusted_ip_ranges) > 0 ? 1 : 0
  
  activate    = true
  detector_id = aws_guardduty_detector.main.id
  format      = "TXT"
  location    = "s3://${aws_s3_bucket.threat_intel[0].bucket}/trusted-ips.txt"
  name        = "${var.organization_name}-trusted-ips"
  
  tags = var.common_tags
}

# EventBridge Rule for GuardDuty Findings
resource "aws_cloudwatch_event_rule" "guardduty_findings" {
  name        = "${var.organization_name}-guardduty-findings"
  description = "Capture GuardDuty findings"
  
  event_pattern = jsonencode({
    source      = ["aws.guardduty"]
    detail-type = ["GuardDuty Finding"]
    detail = {
      severity = [7.0, 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8.0, 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9.0, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10.0]
    }
  })
}

resource "aws_cloudwatch_event_target" "guardduty_lambda" {
  rule      = aws_cloudwatch_event_rule.guardduty_findings.name
  target_id = "GuardDutyResponseLambda"
  arn       = aws_lambda_function.security_response.arn
}

Automated Compliance Reporting

Generate automated compliance reports:

# Lambda function for compliance reporting
resource "aws_lambda_function" "compliance_reporter" {
  filename         = "compliance_reporter.zip"
  function_name    = "${var.organization_name}-compliance-reporter"
  role            = aws_iam_role.compliance_reporter.arn
  handler         = "index.handler"
  runtime         = "python3.9"
  timeout         = 300
  
  environment {
    variables = {
      CONFIG_BUCKET     = aws_s3_bucket.compliance_reports.bucket
      SECURITY_HUB_REGION = data.aws_region.current.name
      SNS_TOPIC_ARN     = aws_sns_topic.compliance_reports.arn
    }
  }
  
  tags = var.common_tags
}

# Schedule compliance reporting
resource "aws_cloudwatch_event_rule" "compliance_report" {
  name                = "${var.organization_name}-compliance-report"
  description         = "Generate weekly compliance report"
  schedule_expression = "cron(0 8 ? * MON *)"  # Every Monday at 8 AM
  
  tags = var.common_tags
}

resource "aws_cloudwatch_event_target" "compliance_report" {
  rule      = aws_cloudwatch_event_rule.compliance_report.name
  target_id = "ComplianceReportTarget"
  arn       = aws_lambda_function.compliance_reporter.arn
}

# S3 bucket for compliance reports
resource "aws_s3_bucket" "compliance_reports" {
  bucket = "${var.organization_name}-compliance-reports-${random_id.bucket_suffix.hex}"
  
  tags = var.common_tags
}

resource "aws_s3_bucket_lifecycle_configuration" "compliance_reports" {
  bucket = aws_s3_bucket.compliance_reports.id
  
  rule {
    id     = "compliance_report_lifecycle"
    status = "Enabled"
    
    transition {
      days          = 90
      storage_class = "STANDARD_IA"
    }
    
    transition {
      days          = 365
      storage_class = "GLACIER"
    }
    
    expiration {
      days = 2555  # 7 years retention
    }
  }
}

Cost and Usage Monitoring

Monitor costs and usage patterns:

# Cost Budget with multiple notifications
resource "aws_budgets_budget" "monthly_cost" {
  name         = "${var.organization_name}-monthly-cost-budget"
  budget_type  = "COST"
  limit_amount = var.monthly_budget_limit
  limit_unit   = "USD"
  time_unit    = "MONTHLY"
  
  cost_filters = {
    LinkedAccount = [data.aws_caller_identity.current.account_id]
  }
  
  notification {
    comparison_operator        = "GREATER_THAN"
    threshold                 = 50
    threshold_type            = "PERCENTAGE"
    notification_type         = "ACTUAL"
    subscriber_email_addresses = var.budget_notification_emails
  }
  
  notification {
    comparison_operator        = "GREATER_THAN"
    threshold                 = 80
    threshold_type            = "PERCENTAGE"
    notification_type         = "ACTUAL"
    subscriber_email_addresses = var.budget_notification_emails
  }
  
  notification {
    comparison_operator        = "GREATER_THAN"
    threshold                 = 100
    threshold_type            = "PERCENTAGE"
    notification_type          = "FORECASTED"
    subscriber_email_addresses = var.budget_notification_emails
  }
}

# Usage Budget for specific services
resource "aws_budgets_budget" "ec2_usage" {
  name         = "${var.organization_name}-ec2-usage-budget"
  budget_type  = "USAGE"
  limit_amount = var.ec2_usage_limit
  limit_unit   = "Hrs"
  time_unit    = "MONTHLY"
  
  cost_filters = {
    Service = ["Amazon Elastic Compute Cloud - Compute"]
  }
  
  notification {
    comparison_operator        = "GREATER_THAN"
    threshold                 = 80
    threshold_type            = "PERCENTAGE"
    notification_type         = "ACTUAL"
    subscriber_email_addresses = var.budget_notification_emails
  }
}

# Cost Anomaly Detection
resource "aws_ce_anomaly_detector" "cost_anomaly" {
  name         = "${var.organization_name}-cost-anomaly-detector"
  monitor_type = "DIMENSIONAL"
  
  specification = jsonencode({
    Dimension = "SERVICE"
    MatchOptions = ["EQUALS"]
    Values = ["Amazon Elastic Compute Cloud - Compute", "Amazon Relational Database Service"]
  })
  
  tags = var.common_tags
}

resource "aws_ce_anomaly_subscription" "cost_anomaly" {
  name      = "${var.organization_name}-cost-anomaly-subscription"
  frequency = "DAILY"
  
  monitor_arn_list = [
    aws_ce_anomaly_detector.cost_anomaly.arn
  ]
  
  subscriber {
    type    = "EMAIL"
    address = var.cost_anomaly_email
  }
  
  threshold_expression {
    and {
      dimension {
        key           = "ANOMALY_TOTAL_IMPACT_ABSOLUTE"
        values        = ["100"]
        match_options = ["GREATER_THAN_OR_EQUAL"]
      }
    }
  }
  
  tags = var.common_tags
}

Notification and Alerting

Implement comprehensive notification systems:

# SNS Topics for different alert types
resource "aws_sns_topic" "alerts" {
  name = "${var.organization_name}-alerts"
  
  tags = var.common_tags
}

resource "aws_sns_topic" "critical_alerts" {
  name = "${var.organization_name}-critical-alerts"
  
  tags = var.common_tags
}

resource "aws_sns_topic" "security_alerts" {
  name = "${var.organization_name}-security-alerts"
  
  tags = var.common_tags
}

# SNS Topic Subscriptions
resource "aws_sns_topic_subscription" "email_alerts" {
  for_each = toset(var.alert_email_addresses)
  
  topic_arn = aws_sns_topic.alerts.arn
  protocol  = "email"
  endpoint  = each.value
}

resource "aws_sns_topic_subscription" "slack_alerts" {
  count = var.slack_webhook_url != null ? 1 : 0
  
  topic_arn = aws_sns_topic.critical_alerts.arn
  protocol  = "https"
  endpoint  = var.slack_webhook_url
}

# Lambda function for alert processing
resource "aws_lambda_function" "alert_processor" {
  filename         = "alert_processor.zip"
  function_name    = "${var.organization_name}-alert-processor"
  role            = aws_iam_role.alert_processor.arn
  handler         = "index.handler"
  runtime         = "python3.9"
  timeout         = 60
  
  environment {
    variables = {
      SLACK_WEBHOOK_URL = var.slack_webhook_url
      TEAMS_WEBHOOK_URL = var.teams_webhook_url
    }
  }
  
  tags = var.common_tags
}

resource "aws_sns_topic_subscription" "lambda_processor" {
  topic_arn = aws_sns_topic.alerts.arn
  protocol  = "lambda"
  endpoint  = aws_lambda_function.alert_processor.arn
}

resource "aws_lambda_permission" "allow_sns" {
  statement_id  = "AllowExecutionFromSNS"
  action        = "lambda:InvokeFunction"
  function_name = aws_lambda_function.alert_processor.function_name
  principal     = "sns.amazonaws.com"
  source_arn    = aws_sns_topic.alerts.arn
}

What’s Next

Comprehensive monitoring and compliance automation provide the observability and governance needed for production AWS infrastructure. These patterns ensure you can detect issues early, maintain compliance standards, and respond quickly to security threats.

In the final part, we’ll explore advanced AWS service integrations, including EKS, serverless architectures, and complex multi-service patterns that demonstrate how all these concepts work together in real-world applications.