Understanding Database Design Principles
Normalization: The Foundation
First Normal Form (1NF)
First Normal Form
-- Bad: Violates 1NF
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100),
phone_numbers TEXT -- Stores multiple phone numbers
);
-- Good: Follows 1NF
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100)
);
CREATE TABLE user_phone_numbers (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
phone_number VARCHAR(20),
type VARCHAR(20) -- 'home', 'work', 'mobile'
);Second Normal Form (2NF)
Second Normal Form
-- Good: Follows 2NF
CREATE TABLE orders (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER,
order_date TIMESTAMP
);
CREATE TABLE order_items (
order_id INTEGER REFERENCES orders(order_id),
product_id INTEGER REFERENCES products(product_id),
quantity INTEGER,
price DECIMAL(10,2),
PRIMARY KEY (order_id, product_id)
);
CREATE TABLE products (
product_id SERIAL PRIMARY KEY,
product_name VARCHAR(100),
base_price DECIMAL(10,2)
);Indexing Strategies
Primary and Unique Indexes
Primary and Unique Indexes
-- Primary key automatically creates a unique index
CREATE TABLE users (
id SERIAL PRIMARY KEY, -- Creates unique index on id
email VARCHAR(100) UNIQUE, -- Creates unique index on email
username VARCHAR(50) UNIQUE -- Creates unique index on username
);Composite Indexes
Composite Indexes
-- Create composite index for common query patterns
CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);
CREATE INDEX idx_order_items_order_product ON order_items(order_id, product_id);
-- Covering index (includes all columns needed for query)
CREATE INDEX idx_orders_covering ON orders(customer_id, order_date, status, total_amount);Query Optimization
Understanding Query Plans
Query Analysis
-- Use EXPLAIN to analyze query performance
EXPLAIN (ANALYZE, BUFFERS)
SELECT u.name, COUNT(o.order_id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.customer_id
WHERE u.created_at > '2024-01-01'
GROUP BY u.id, u.name
HAVING COUNT(o.order_id) > 5
ORDER BY order_count DESC;Data Types and Constraints
Choosing Appropriate Data Types
Data Types
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL, -- For currency
weight DECIMAL(8,3), -- For measurements
quantity INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);Constraints and Validation
Constraints
-- Check constraints for data validation
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL CHECK (price > 0),
quantity INTEGER NOT NULL DEFAULT 0 CHECK (quantity >= 0),
email VARCHAR(255) CHECK (email ~* '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$')
);
-- Foreign key constraints
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
status order_status NOT NULL DEFAULT 'pending'
);Advanced Design Patterns
Soft Deletes
Soft Deletes
-- Add deleted_at column instead of hard deletes
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(100) NOT NULL UNIQUE,
deleted_at TIMESTAMP WITH TIME ZONE NULL
);
-- Create index for efficient soft delete queries
CREATE INDEX idx_users_active ON users(email) WHERE deleted_at IS NULL;
-- Soft delete function
CREATE OR REPLACE FUNCTION soft_delete_user(user_id INTEGER)
RETURNS VOID AS $$
BEGIN
UPDATE users
SET deleted_at = NOW()
WHERE id = user_id AND deleted_at IS NULL;
END;
$$
LANGUAGE plpgsql;Performance Monitoring
Query Performance Analysis
Performance Monitoring
-- Monitor slow queries
SELECT
query,
calls,
total_time,
mean_time,
rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;
-- Check table statistics
SELECT
schemaname,
tablename,
n_tup_ins as inserts,
n_tup_upd as updates,
n_tup_del as deletes,
n_live_tup as live_tuples,
n_dead_tup as dead_tuples
FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;Best Practices
- Start with normalization - Get the data structure right first
- Index strategically - Create indexes based on actual query patterns
- Monitor performance - Use tools to identify bottlenecks
- Plan for growth - Design with scalability in mind
- Security first - Implement proper access controls and encryption
- Version control - Track schema changes over time