DBT Modeling Guide
This guide covers creating dimensional models using DBT, including fact tables, dimension tables, and star schema design for the AdventureWorks dataset.
Dimensional Model for Sale Processes
Overview
Fact Table
Dim Date
Dim Territory
Dim Product
fact_sale
x
x
x
Star Schema Design
@startuml
!theme plain
entity fact_sale {
* sale_key : bigint <<PK>>
--
* date_key : int <<FK>>
* territory_key : int <<FK>>
* product_key : int <<FK>>
--
order_qty : decimal
unit_price : decimal
line_total : decimal
sub_total : decimal
tax_amt : decimal
total_due : decimal
sales_order_id : int
}
entity dim_date {
* date_key : int <<PK>>
--
date : date
year : int
quarter : int
month : int
day : int
day_of_week : int
week_of_year : int
is_weekend : boolean
fiscal_year : int
fiscal_quarter : int
}
entity dim_territory {
* territory_key : int <<PK>>
--
territory_id : int
territory_name : varchar(50)
country_region_code : varchar(3)
territory_group : varchar(50)
region_name : varchar(50)
}
entity dim_product {
* product_key : int <<PK>>
--
product_id : int
product_name : varchar(50)
product_number : varchar(25)
product_category : varchar(50)
product_subcategory : varchar(50)
standard_cost : decimal
list_price : decimal
is_current : boolean
}
dim_date ||--o{ fact_sale
dim_territory ||--o{ fact_sale
dim_product ||--o{ fact_sale
@enduml
Building Dimensional Models
Fact Table Creation
Create models/marts/core/fact_sale.sql
:
{{ config(
materialized='incremental',
unique_key='sale_key',
on_schema_change='fail'
) }}
WITH sales_with_keys AS (
SELECT
{{ dbt_utils.surrogate_key(['soh.salesorderid', 'sod.salesorderdetailid']) }} as sale_key,
{{ get_date_key('soh.orderdate') }} as date_key,
{{ get_territory_key('soh.territoryid') }} as territory_key,
{{ get_product_key('sod.productid') }} as product_key,
-- Measures
sod.orderqty,
sod.unitprice,
sod.linetotal,
soh.subtotal,
soh.taxamt,
soh.totaldue,
soh.salesorderid,
soh.orderdate
FROM {{ ref('stg_sales_order_header') }} soh
INNER JOIN {{ ref('stg_sales_order_detail') }} sod
ON soh.salesorderid = sod.salesorderid
)
SELECT * FROM sales_with_keys
{% if is_incremental() %}
WHERE orderdate > (SELECT MAX(orderdate) FROM {{ this }})
{% endif %}
Dimension Tables
Date Dimension (models/marts/core/dim_date.sql
):
{{ config(materialized='table') }}
SELECT
{{ dbt_utils.surrogate_key(['date']) }} as date_key,
date,
EXTRACT(year FROM date) as year,
EXTRACT(quarter FROM date) as quarter,
EXTRACT(month FROM date) as month,
EXTRACT(day FROM date) as day,
EXTRACT(dow FROM date) as day_of_week,
EXTRACT(week FROM date) as week_of_year,
CASE WHEN EXTRACT(dow FROM date) IN (0,6) THEN TRUE ELSE FALSE END as is_weekend,
-- Add fiscal year logic as needed
EXTRACT(year FROM date) as fiscal_year,
EXTRACT(quarter FROM date) as fiscal_quarter
FROM {{ ref('date_spine') }}
Product Dimension (models/marts/core/dim_product.sql
):
{{ config(materialized='table') }}
SELECT
{{ dbt_utils.surrogate_key(['productid']) }} as product_key,
productid,
productname,
productnumber,
pc.name as product_category,
psc.name as product_subcategory,
standardcost,
listprice,
productline,
size,
weight,
color,
TRUE as is_current,
current_timestamp() as effective_from,
NULL as effective_to
FROM {{ ref('stg_products') }} p
LEFT JOIN {{ ref('stg_product_categories') }} pc
ON p.productcategoryid = pc.productcategoryid
LEFT JOIN {{ ref('stg_product_subcategories') }} psc
ON p.productsubcategoryid = psc.productsubcategoryid
Testing and Documentation
Data Quality Tests
Create models/marts/schema.yml
:
version: 2
models:
- name: fact_sale
description: "Central sales transaction fact table"
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date_key
- territory_key
- product_key
- salesorderid
columns:
- name: sale_key
tests:
- unique
- not_null
- name: total_due
tests:
- dbt_utils.accepted_range:
min_value: 0
max_value: 1000000
- name: dim_date
columns:
- name: date_key
tests:
- unique
- not_null
Macros and Utilities
Create helper macros in macros/
directory:
macros/get_date_key.sql
:
{% macro get_date_key(date_column) %}
toInt32(toYYYYMMDD({{ date_column }}))
{% endmacro %}
Next Steps
Continue to Testing & Validation to ensure data quality.
Last updated
Was this helpful?