By the end of this lesson, you will understand the core concepts of dbt: Transform Data in the Warehouse — Data Engineering in 5 Days and be able to apply them in a real project.
pip install dbt-core dbt-duckdb # or dbt-bigquery, dbt-snowflake dbt init my_project cd my_project dbt debug # verify connection
-- models/staging/stg_orders.sql
-- Staging model: clean and rename raw columns
{{ config(materialized='view') }}
SELECT
id::VARCHAR AS order_id,
customer_id,
LOWER(status) AS status,
created_at::DATE AS order_date,
total_amount::DECIMAL(10,2) AS amount
FROM {{ source('raw', 'orders') }}
WHERE id IS NOT NULL
-- models/marts/orders_daily.sql
-- Mart model: business-level aggregate
{{ config(materialized='table') }}
WITH orders AS (
SELECT * FROM {{ ref('stg_orders') }}
WHERE status = 'completed'
),
daily AS (
SELECT
order_date,
COUNT(*) AS order_count,
SUM(amount) AS revenue,
AVG(amount) AS avg_order_value
FROM orders
GROUP BY order_date
)
SELECT * FROM daily
ORDER BY order_date
# models/staging/schema.yml — documentation and tests
version: 2
models:
- name: stg_orders
description: "Cleaned orders from the raw source"
columns:
- name: order_id
description: "Unique order ID"
tests:
- unique
- not_null
- name: status
tests:
- accepted_values:
values: ['pending', 'completed', 'cancelled']
dbt run && dbt test to build all models and run all tests in one command.