Best Practices and Optimization
The difference between code that works and code that works well lies in the details. I learned this the hard way when my first Python script took 10 minutes to process a file that should have taken 10 seconds. The problem wasn’t the algorithm - it was dozens of small inefficiencies that added up to a performance disaster.
Writing good Python code isn’t just about making it work; it’s about making it readable, maintainable, and efficient. These best practices will help you write code that other developers (including future you) will thank you for.
Code Style and PEP 8
Python has official style guidelines called PEP 8. Following them makes your code more readable and professional:
# Good: Clear, readable code following PEP 8
def calculate_monthly_payment(principal, annual_rate, years):
"""Calculate monthly mortgage payment."""
monthly_rate = annual_rate / 12
num_payments = years * 12
if monthly_rate == 0:
return principal / num_payments
payment = principal * (monthly_rate * (1 + monthly_rate) ** num_payments) / \
((1 + monthly_rate) ** num_payments - 1)
return round(payment, 2)
# Bad: Hard to read, doesn't follow conventions
def calc(p,r,y):
mr=r/12
n=y*12
if mr==0:return p/n
pmt=p*(mr*(1+mr)**n)/((1+mr)**n-1)
return round(pmt,2)
Key PEP 8 guidelines:
- Use 4 spaces for indentation (not tabs)
- Keep lines under 79 characters
- Use descriptive variable names
- Add spaces around operators
- Use lowercase with underscores for function names
- Use docstrings to document functions
Error Handling Best Practices
Good error handling makes your code robust and user-friendly:
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class FileProcessor:
"""Process files with proper error handling"""
def __init__(self, input_dir, output_dir):
self.input_dir = Path(input_dir)
self.output_dir = Path(output_dir)
# Create output directory if it doesn't exist
self.output_dir.mkdir(parents=True, exist_ok=True)
def process_file(self, filename):
"""Process a single file with comprehensive error handling"""
input_path = self.input_dir / filename
output_path = self.output_dir / f"processed_{filename}"
try:
# Validate input file exists
if not input_path.exists():
raise FileNotFoundError(f"Input file not found: {input_path}")
# Process file
with open(input_path, 'r', encoding='utf-8') as infile:
content = infile.read()
# Transform content (example: uppercase)
processed_content = content.upper()
# Write output
with open(output_path, 'w', encoding='utf-8') as outfile:
outfile.write(processed_content)
logger.info(f"Successfully processed {filename}")
return True
except FileNotFoundError as e:
logger.error(f"File error: {e}")
return False
except PermissionError as e:
logger.error(f"Permission error: {e}")
return False
except UnicodeDecodeError as e:
logger.error(f"Encoding error in {filename}: {e}")
return False
except Exception as e:
logger.error(f"Unexpected error processing {filename}: {e}")
return False
def process_all_files(self, pattern="*.txt"):
"""Process all files matching pattern"""
processed_count = 0
error_count = 0
for file_path in self.input_dir.glob(pattern):
if self.process_file(file_path.name):
processed_count += 1
else:
error_count += 1
logger.info(f"Processing complete: {processed_count} successful, {error_count} errors")
return processed_count, error_count
Performance Optimization
Small optimizations can make big differences in Python performance:
import time
from collections import defaultdict, Counter
def timing_comparison():
"""Compare different approaches for common operations"""
# String concatenation
def slow_string_concat(items):
result = ""
for item in items:
result += str(item)
return result
def fast_string_concat(items):
return "".join(str(item) for item in items)
# List operations
def slow_list_search(items, target):
for item in items:
if item == target:
return True
return False
def fast_list_search(items, target):
return target in set(items) # Convert to set for O(1) lookup
# Dictionary operations
def slow_counting(items):
counts = {}
for item in items:
if item in counts:
counts[item] += 1
else:
counts[item] = 1
return counts
def fast_counting(items):
return Counter(items)
# Test data
test_items = list(range(10000))
# Time string concatenation
start = time.time()
slow_result = slow_string_concat(test_items[:1000])
slow_time = time.time() - start
start = time.time()
fast_result = fast_string_concat(test_items[:1000])
fast_time = time.time() - start
print(f"String concatenation - Slow: {slow_time:.4f}s, Fast: {fast_time:.4f}s")
print(f"Speedup: {slow_time/fast_time:.1f}x")
# Memory-efficient generators
def memory_efficient_processing():
"""Demonstrate memory-efficient patterns"""
# Bad: Loads entire file into memory
def process_large_file_bad(filename):
with open(filename, 'r') as f:
lines = f.readlines() # Loads everything into memory
processed = []
for line in lines:
processed.append(line.strip().upper())
return processed
# Good: Process line by line
def process_large_file_good(filename):
with open(filename, 'r') as f:
for line in f: # Generator - one line at a time
yield line.strip().upper()
# Usage
# for processed_line in process_large_file_good('large_file.txt'):
# print(processed_line)
# Efficient data structures
class OptimizedDataProcessor:
"""Use appropriate data structures for better performance"""
def __init__(self):
self.lookup_data = set() # O(1) lookups
self.counter_data = Counter() # Efficient counting
self.grouped_data = defaultdict(list) # Automatic list creation
def add_item(self, item, category):
"""Add item with category"""
self.lookup_data.add(item)
self.counter_data[item] += 1
self.grouped_data[category].append(item)
def is_item_present(self, item):
"""Fast lookup - O(1) instead of O(n)"""
return item in self.lookup_data
def get_most_common(self, n=5):
"""Get most common items efficiently"""
return self.counter_data.most_common(n)
Code Organization and Structure
Well-organized code is easier to maintain and debug:
# config.py - Configuration management
import os
from pathlib import Path
class Config:
"""Application configuration"""
# Directories
BASE_DIR = Path(__file__).parent
DATA_DIR = BASE_DIR / "data"
LOG_DIR = BASE_DIR / "logs"
# Database
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///app.db")
# API settings
API_KEY = os.getenv("API_KEY")
API_TIMEOUT = int(os.getenv("API_TIMEOUT", "30"))
# Logging
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
@classmethod
def validate(cls):
"""Validate configuration"""
if not cls.API_KEY:
raise ValueError("API_KEY environment variable is required")
# Create directories
cls.DATA_DIR.mkdir(exist_ok=True)
cls.LOG_DIR.mkdir(exist_ok=True)
# utils.py - Utility functions
import functools
import time
def retry(max_attempts=3, delay=1):
"""Decorator to retry functions on failure"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_attempts):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_attempts - 1:
raise e
time.sleep(delay)
return None
return wrapper
return decorator
def validate_input(validator_func):
"""Decorator to validate function inputs"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not validator_func(*args, **kwargs):
raise ValueError("Input validation failed")
return func(*args, **kwargs)
return wrapper
return decorator
# main.py - Main application
import logging
from config import Config
from utils import retry, validate_input
def setup_logging():
"""Set up application logging"""
logging.basicConfig(
level=getattr(logging, Config.LOG_LEVEL),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(Config.LOG_DIR / 'app.log'),
logging.StreamHandler()
]
)
class Application:
"""Main application class"""
def __init__(self):
Config.validate()
setup_logging()
self.logger = logging.getLogger(__name__)
@retry(max_attempts=3)
def fetch_data(self, url):
"""Fetch data with retry logic"""
import requests
response = requests.get(url, timeout=Config.API_TIMEOUT)
response.raise_for_status()
return response.json()
def run(self):
"""Run the application"""
self.logger.info("Application starting")
try:
# Application logic here
pass
except Exception as e:
self.logger.error(f"Application error: {e}")
raise
finally:
self.logger.info("Application finished")
if __name__ == "__main__":
app = Application()
app.run()
Testing and Documentation
Good code includes tests and documentation:
def calculate_compound_interest(principal, rate, time, compound_frequency=1):
"""
Calculate compound interest.
Args:
principal (float): Initial amount of money
rate (float): Annual interest rate (as decimal, e.g., 0.05 for 5%)
time (float): Time period in years
compound_frequency (int): Number of times interest compounds per year
Returns:
float: Final amount after compound interest
Raises:
ValueError: If any parameter is negative
Examples:
>>> calculate_compound_interest(1000, 0.05, 2)
1102.5
>>> calculate_compound_interest(1000, 0.05, 2, 12)
1104.89
"""
if principal < 0 or rate < 0 or time < 0 or compound_frequency <= 0:
raise ValueError("All parameters must be non-negative (compound_frequency must be positive)")
amount = principal * (1 + rate / compound_frequency) ** (compound_frequency * time)
return round(amount, 2)
# Simple test function
def test_compound_interest():
"""Test compound interest calculation"""
# Test basic calculation
result = calculate_compound_interest(1000, 0.05, 2)
assert abs(result - 1102.5) < 0.01, f"Expected ~1102.5, got {result}"
# Test with monthly compounding
result = calculate_compound_interest(1000, 0.05, 2, 12)
assert abs(result - 1104.89) < 0.01, f"Expected ~1104.89, got {result}"
# Test error handling
try:
calculate_compound_interest(-1000, 0.05, 2)
assert False, "Should have raised ValueError"
except ValueError:
pass # Expected
print("All tests passed!")
if __name__ == "__main__":
test_compound_interest()
Security Best Practices
Security should be built into your code from the start:
import hashlib
import secrets
import os
from pathlib import Path
class SecureFileHandler:
"""Handle files securely"""
def __init__(self, allowed_dir):
self.allowed_dir = Path(allowed_dir).resolve()
def safe_file_path(self, filename):
"""Ensure file path is within allowed directory"""
# Remove any path traversal attempts
safe_name = os.path.basename(filename)
full_path = (self.allowed_dir / safe_name).resolve()
# Ensure path is within allowed directory
if not str(full_path).startswith(str(self.allowed_dir)):
raise ValueError("Invalid file path")
return full_path
def hash_password(self, password):
"""Hash password securely"""
# Generate random salt
salt = secrets.token_hex(32)
# Hash password with salt
password_hash = hashlib.pbkdf2_hmac('sha256',
password.encode('utf-8'),
salt.encode('utf-8'),
100000) # 100,000 iterations
return salt + password_hash.hex()
def verify_password(self, password, stored_hash):
"""Verify password against stored hash"""
# Extract salt (first 64 characters)
salt = stored_hash[:64]
stored_password_hash = stored_hash[64:]
# Hash provided password with same salt
password_hash = hashlib.pbkdf2_hmac('sha256',
password.encode('utf-8'),
salt.encode('utf-8'),
100000)
return password_hash.hex() == stored_password_hash
# Input validation
def validate_email(email):
"""Basic email validation"""
import re
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
def sanitize_input(user_input, max_length=100):
"""Sanitize user input"""
if not isinstance(user_input, str):
raise ValueError("Input must be a string")
# Remove potentially dangerous characters
sanitized = re.sub(r'[<>"\']', '', user_input)
# Limit length
sanitized = sanitized[:max_length]
# Strip whitespace
return sanitized.strip()
These best practices transform good code into great code. They’re not just academic exercises - they solve real problems that emerge when code moves from development to production. Follow these patterns, and your Python code will be more reliable, maintainable, and professional.
Next, we’ll put everything together with real-world projects that demonstrate how these fundamentals, techniques, and best practices combine to create complete applications.