Normalization Without Overdoing It
First Normal Form (1NF)
Sql
-- Bad: multiple values in one column
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100),
phone_numbers TEXT
);
-- Good: separate table for phone numbers
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
email VARCHAR(100)
);
CREATE TABLE user_phone_numbers (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
phone_number VARCHAR(20),
type VARCHAR(20)
);
Second Normal Form (2NF)
Sql
CREATE TABLE orders (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER,
order_date TIMESTAMP
);
CREATE TABLE order_items (
order_id INTEGER REFERENCES orders(order_id),
product_id INTEGER REFERENCES products(product_id),
quantity INTEGER,
price DECIMAL(10,2),
PRIMARY KEY (order_id, product_id)
);
CREATE TABLE products (
product_id SERIAL PRIMARY KEY,
product_name VARCHAR(100),
base_price DECIMAL(10,2)
);
Indexing for real queries
Sql
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(100) UNIQUE,
username VARCHAR(50) UNIQUE
);
-- Composite index for "orders by customer, sorted by date"
CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);
CREATE INDEX idx_order_items_order_product ON order_items(order_id, product_id);
-- Covering index when the query only needs these columns
CREATE INDEX idx_orders_covering ON orders(customer_id, order_date, status, total_amount);
Inspecting query plans
Sql
EXPLAIN (ANALYZE, BUFFERS)
SELECT u.name, COUNT(o.order_id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.customer_id
WHERE u.created_at > '2024-01-01'
GROUP BY u.id, u.name
HAVING COUNT(o.order_id) > 5
ORDER BY order_count DESC;
Types and Constraints
Sql
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL CHECK (price > 0),
quantity INTEGER NOT NULL DEFAULT 0 CHECK (quantity >= 0),
is_active BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
customer_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
status order_status NOT NULL DEFAULT 'pending'
);
Soft Deletes
Sql
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(100) NOT NULL UNIQUE,
deleted_at TIMESTAMP WITH TIME ZONE NULL
);
CREATE INDEX idx_users_active ON users(email) WHERE deleted_at IS NULL;
CREATE OR REPLACE FUNCTION soft_delete_user(user_id INTEGER)
RETURNS VOID AS $$
BEGIN
UPDATE users
SET deleted_at = NOW()
WHERE id = user_id AND deleted_at IS NULL;
END;
$$ LANGUAGE plpgsql;
Monitoring
Sql
SELECT
query,
calls,
total_time,
mean_time,
rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;
SELECT
schemaname,
tablename,
n_tup_ins as inserts,
n_tup_upd as updates,
n_tup_del as deletes,
n_live_tup as live_tuples,
n_dead_tup as dead_tuples
FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;