Create a Date dimension
You often need a Date Dimension in your Data Mart to analyze your data by date.
Often, you don't have any source data to build this Date Dimension.
This article provides some guidance on building it, depending on your Target Technology.
Date source data
Microsoft SQL Server
For Microsoft SQL Server, we can:
- Create a Manual_Data database
USE [master]
CREATE DATABASE [Manual_Data]
- Create a GetDateRange() function in the Manual_Data database
USE [Manual_Data]
CREATE FUNCTION [dbo].[GetDateRange] (
@dtFrom DATETIME
,@dtTo DATETIME
,@fiscalYearOffsetInMonth INT
)
RETURNS
@Result TABLE
(
[DateKey] INT PRIMARY KEY
,[Date] DATETIME
,[YearKey] INT
,[YearNumber] INT
,[Year] VARCHAR(30)
,[QuarterKey] INT
,[QuarterNumber] TINYINT
,[Quarter] VARCHAR(30)
,[MonthKey] INT
,[MonthNumber] TINYINT
,[Month] VARCHAR(30)
,[MonthName] VARCHAR(30)
,[FirstDateInMonth] DATETIME
,[LastDateInMonth] DATETIME
,[DayKey] INT
,[Day] VARCHAR(30)
,[DayNumberOfWeekKey] TINYINT
,[DayNumberOfWeekNumber] TINYINT
,[DayNumberOfWeekName] VARCHAR(30)
,[DayNumberOfMonth] TINYINT
,[DayNumberOfYear] SMALLINT
,[WeekKey] INT
,[WeekNumber] SMALLINT
,[WeekName] VARCHAR(30)
,[WeekNumberOfYear] SMALLINT
)
AS
BEGIN
DECLARE
@dt DateTime
,@dtFiscal DateTime
,@id Int
,@year int
,@yearName char(4)
,@semester int
,@quarter int
,@month int
,@firstDateInMonth date
,@yearFiscal int
,@yearNameFiscal char(4)
,@semesterFiscal int
,@quarterFiscal int
,@monthFiscal int
,@day int
,@dayOfWeek tinyint
;
SET @dt = @dtFrom
WHILE @dt <= @dtTo
BEGIN
SELECT @year= YEAR(@dt)
,@yearName= CAST(@year AS CHAR(4))
,@quarter= DATEPART(q, @dt)
,@semester= CASE WHEN @quarter <= 2 THEN 1 ELSE 2 END
,@month= MONTH(@dt)
,@firstDateInMonth= CAST(CAST(@year AS NVARCHAR(10)) + '-' + CAST(@month AS NVARCHAR(10)) + '-01' AS DATE)
,@day= DAY(@dt)
,@id= @year * 10000 + @month * 100 + @day
,@dtFiscal= DATEADD(m, @fiscalYearOffsetInMonth, @dt)
,@yearFiscal= YEAR(@dtFiscal)
,@yearNameFiscal= CAST(@yearFiscal AS CHAR(4))
,@quarterFiscal= DATEPART(q, @dtFiscal)
,@semesterFiscal= CASE WHEN @quarterFiscal <= 2 THEN 1 ELSE 2 END
,@monthFiscal= MONTH(@dtFiscal)
,@dayOfWeek= DATEPART(dw, @dt)
;
INSERT INTO @Result(
[DateKey]
,[Date]
,[YearKey]
,[YearNumber]
,[Year]
,[QuarterKey]
,[QuarterNumber]
,[Quarter]
,[MonthKey]
,[MonthNumber]
,[Month]
,[MonthName]
,[FirstDateInMonth]
,[LastDateInMonth]
,[DayKey]
,[Day]
,[DayNumberOfWeekKey]
,[DayNumberOfWeekNumber]
,[DayNumberOfWeekName]
,[DayNumberOfMonth]
,[DayNumberOfYear]
,[WeekKey]
,[WeekNumber]
,[WeekName]
,[WeekNumberOfYear]
)
VALUES(
@id
,@dt
,@year --YearKey
,@year --YearNumber
,@yearName --YearName
,@year * 10 + @quarter --QuarterKey
,@quarter --QuarterNumber
,@yearName + '/Q' + CAST(@quarter AS CHAR(1)) --Quarter unique name
,@year * 100 + @month --MonthKey
,@month --MonthNumber
,DATENAME(mm, @dt) + ' ' + CAST(@yearName AS VARCHAR(4)) --Month unique name
,DATENAME(mm, @dt)--MonthName
,@firstDateInMonth
,DATEADD( DAY, -1, DATEADD(MONTH, 1, @firstDateInMonth))
,@id --DayKey
,DATENAME(mm, @dt) + ' ' + CAST(DAY(@dt) AS VARCHAR(2)) + ', ' + CAST(YEAR(@dt) AS VARCHAR(4)) -- Day unique name
,@dayOfWeek --DayNumberOfWeekKey
,@dayOfWeek --DayNumberOfWeekNumber
,CASE WHEN @dayOfWeek = 1 THEN 'Sunday'
WHEN @dayOfWeek = 2 THEN 'Monday'
WHEN @dayOfWeek = 3 THEN 'Tuesday'
WHEN @dayOfWeek = 4 THEN 'Wednesday'
WHEN @dayOfWeek = 5 THEN 'Thursday'
WHEN @dayOfWeek = 6 THEN 'Friday'
WHEN @dayOfWeek = 7 THEN 'Saturday'
END --DayNumberOfWeekName
,@day --DayNumberOfMonth
,DATEPART(dy, @dt) --DayNumberOfYear
,@year * 100 + DATEPART(wk, @dt) --WeekKey
,DATEPART(wk, @dt) --WeekNumber
,@yearName + '/W' + RIGHT('00' + CAST(DATEPART(wk, @dt) AS VARCHAR(2)), 2) --WeekName
,DATEPART(wk, @dt) --WeekNumberOfYear
);
SELECT @dt = DATEADD(day, 1, @dt);
END;
RETURN;
END;
- Create a DateRange table in the Manual_Data database
CREATE TABLE [dbo].[DateRange](
[DateKey] INT PRIMARY KEY
,[Date] DATETIME
,[YearKey] INT
,[YearNumber] INT
,[Year] VARCHAR(30)
,[QuarterKey] INT
,[QuarterNumber] TINYINT
,[Quarter] VARCHAR(30)
,[MonthKey] INT
,[MonthNumber] TINYINT
,[Month] VARCHAR(30)
,[MonthName] VARCHAR(30)
,[FirstDateInMonth] DATETIME
,[LastDateInMonth] DATETIME
,[DayKey] INT
,[Day] VARCHAR(30)
,[DayNumberOfWeekKey] TINYINT
,[DayNumberOfWeekNumber] TINYINT
,[DayNumberOfWeekName] VARCHAR(30)
,[DayNumberOfMonth] TINYINT
,[DayNumberOfYear] SMALLINT
,[WeekKey] INT
,[WeekNumber] SMALLINT
,[WeekName] VARCHAR(30)
,[WeekNumberOfYear] SMALLINT
)
- Insert data in the DateRange table by using the GetDateRange() function
INSERT INTO [dbo].[DateRange]
SELECT *
FROM [dbo].[GetDateRange](CAST(N'1900-01-01' AS DATETIME), CAST(N'2199-12-31' AS DATETIME), 0);
Snowflake
For Snowflake, we can:
- Create a DATERANGE View in our source system:
CREATE VIEW AW2017.DATERANGE
AS
WITH precalculation AS
(
SELECT
--Date range will start at 2000-01-01
DATEADD(day, ROW_NUMBER() OVER(ORDER BY seq8()), '1999-12-31'::DATE) AS dt
, YEAR(dt) AS year
, CAST(year AS CHAR(4)) AS yearName
, DATE_PART(quarter, dt::date) AS quarter
, CASE WHEN quarter <= 2 THEN 1 ELSE 2 END AS semester
, MONTH(dt) AS month
, CAST(CAST(year AS VARCHAR(10)) || '-' || CAST(month AS VARCHAR(10)) || '-01' AS DATE) AS firstDateInMonth
, DAY(dt) AS day
, year * 10000 + month * 100 + DAY AS id
, DATE_PART(dayofweek, dt::date) + 1 AS dayOfWeek
--Date range will contains 100 years starting from 1999 so from beginning of 2000 TO end of 2099
FROM TABLE(GENERATOR(ROWCOUNT => 365*100))
)
SELECT
id AS DateKey
,dt AS Date
,year AS YearKey
,year AS YearNumber
,yearName AS Year
,year * 10 + quarter AS QuarterKey
,quarter AS QuarterNumber
,yearName || '/Q' || CAST(quarter AS CHAR(1)) AS Quarter
,year * 100 + month AS MonthKey
,month AS MonthNumber
,TO_CHAR(dt::date,'MMMM') || ' ' || CAST(yearName AS VARCHAR(4)) AS Month
,TO_CHAR(dt::date,'MMMM') AS MonthName
,firstDateInMonth AS FirstDateInMonth
,DATEADD( DAY, -1, DATEADD(MONTH, 1, firstDateInMonth)) AS LastDateInMonth
,id AS DayKey
,TO_CHAR(dt::date,'MMMM') || ' ' || CAST(DAY(dt) AS VARCHAR(2)) || ', ' || CAST(YEAR(dt) AS VARCHAR(4)) AS Day
,dayOfWeek AS DayNumberOfWeekKey
,dayOfWeek AS DayNumberOfWeekNumber
,CASE WHEN dayOfWeek = 1 THEN 'Sunday'
WHEN dayOfWeek = 2 THEN 'Monday'
WHEN dayOfWeek = 3 THEN 'Tuesday'
WHEN dayOfWeek = 4 THEN 'Wednesday'
WHEN dayOfWeek = 5 THEN 'Thursday'
WHEN dayOfWeek = 6 THEN 'Friday'
WHEN dayOfWeek = 7 THEN 'Saturday'
END AS DayNumberOfWeekName
,day AS DayNumberOfMonth
,DATE_PART(dayofyear, dt::date) AS DayNumberOfYear
,year * 100 + DATE_PART(week, dt::date) + 1 AS WeekKey
,DATE_PART(week, dt::date) + 1 AS WeekNumber
,yearName || '/W' || RIGHT('00' || CAST(DATE_PART(week, dt::date) + 1 AS VARCHAR(2)), 2) AS WeekName
,DATE_PART(week, dt::date) + 1 AS WeekNumberOfYear
FROM precalculation
Databricks
For Databricks, we can:
- Create a daterange View in a catalog with the following code in a notebook:
%sql
CREATE OR REPLACE VIEW elearning_databricks.aw2017.daterange AS
WITH precalculation AS (
SELECT
dt,
YEAR(dt) AS year,
CAST(YEAR(dt) AS STRING) AS yearName,
QUARTER(dt) AS quarter,
CASE WHEN QUARTER(dt) <= 2 THEN 1 ELSE 2 END AS semester,
MONTH(dt) AS month,
DATE_TRUNC('month', dt) AS firstDateInMonth,
DAY(dt) AS day,
(YEAR(dt) * 10000 + MONTH(dt) * 100 + DAY(dt)) AS id,
-- Spark SQL: dayofweek returns 1=Sunday, 2=Monday, ..., 7=Saturday (same as Snowflake +1)
DAYOFWEEK(dt) AS dayOfWeek
FROM (
SELECT explode(sequence(
DATE('2000-01-01'),
DATE('2099-12-31'),
INTERVAL 1 DAY
)) AS dt
)
)
SELECT
id AS DateKey,
dt AS Date,
year AS YearKey,
year AS YearNumber,
yearName AS Year,
year * 10 + quarter AS QuarterKey,
quarter AS QuarterNumber,
CONCAT(yearName, '/Q', quarter) AS Quarter,
year * 100 + month AS MonthKey,
month AS MonthNumber,
CONCAT(date_format(dt, 'MMMM'), ' ', yearName) AS Month,
date_format(dt, 'MMMM') AS MonthName,
firstDateInMonth AS FirstDateInMonth,
-- Last day of month: add 1 month, subtract 1 day
date_sub(date_add(firstDateInMonth, 32), day(date_add(firstDateInMonth, 32)) - 1) AS LastDateInMonth,
id AS DayKey,
CONCAT(date_format(dt, 'MMMM'), ' ', day, ', ', year) AS Day,
dayOfWeek AS DayNumberOfWeekKey,
dayOfWeek AS DayNumberOfWeekNumber,
CASE dayOfWeek
WHEN 1 THEN 'Sunday'
WHEN 2 THEN 'Monday'
WHEN 3 THEN 'Tuesday'
WHEN 4 THEN 'Wednesday'
WHEN 5 THEN 'Thursday'
WHEN 6 THEN 'Friday'
WHEN 7 THEN 'Saturday'
END AS DayNumberOfWeekName,
day AS DayNumberOfMonth,
DAYOFYEAR(dt) AS DayNumberOfYear,
year * 100 + WEEKOFYEAR(dt) AS WeekKey,
WEEKOFYEAR(dt) AS WeekNumber,
CONCAT(yearName, '/W', lpad(WEEKOFYEAR(dt), 2, '0')) AS WeekName,
WEEKOFYEAR(dt) AS WeekNumberOfYear
FROM precalculation
Microsoft Fabric
For Microsoft Fabric, we can:
- Create a table in a Lake House with the following code in a notebook:
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType, StringType, DateType
from datetime import date, timedelta
# Define date range
start_date = date(2000, 1, 1)
end_date = date(2099, 12, 31)
num_days = (end_date - start_date).days + 1
# Create DataFrame with all dates in range
df = spark.createDataFrame(
[(start_date + timedelta(days=i),) for i in range(num_days)],
["dt"]
)
# Add date dimension columns
df = (
df.withColumn("year", F.year("dt"))
.withColumn("yearName", F.col("year").cast(StringType()))
.withColumn("quarter", F.quarter("dt"))
.withColumn("semester", F.when(F.col("quarter") <= 2, 1).otherwise(2))
.withColumn("month", F.month("dt"))
.withColumn("firstDateInMonth", F.trunc("dt", "month"))
.withColumn("day", F.dayofmonth("dt"))
.withColumn("id", F.col("year") * 10000 + F.col("month") * 100 + F.col("day"))
.withColumn("dayOfWeek", F.dayofweek("dt")) # 1=Sunday, 7=Saturday
)
# Final select with formatting and additional columns
df = (
df.withColumn("DateKey", F.col("id"))
.withColumn("Date", F.col("dt"))
.withColumn("YearKey", F.col("year"))
.withColumn("YearNumber", F.col("year"))
.withColumn("Year", F.col("yearName"))
.withColumn("QuarterKey", F.col("year") * 10 + F.col("quarter"))
.withColumn("QuarterNumber", F.col("quarter"))
.withColumn("Quarter", F.concat(F.col("yearName"), F.lit("/Q"), F.col("quarter").cast(StringType())))
.withColumn("MonthKey", F.col("year") * 100 + F.col("month"))
.withColumn("MonthNumber", F.col("month"))
.withColumn("Month", F.concat(F.date_format("dt", "MMMM"), F.lit(" "), F.col("yearName")))
.withColumn("MonthName", F.date_format("dt", "MMMM"))
.withColumn("FirstDateInMonth", F.col("firstDateInMonth"))
.withColumn("LastDateInMonth", F.expr("last_day(dt)"))
.withColumn("DayKey", F.col("id"))
.withColumn("Day", F.concat(F.date_format("dt", "MMMM"), F.lit(" "), F.col("day").cast(StringType()), F.lit(", "), F.col("year").cast(StringType())))
.withColumn("DayNumberOfWeekKey", F.col("dayOfWeek"))
.withColumn("DayNumberOfWeekNumber", F.col("dayOfWeek"))
.withColumn("DayNumberOfWeekName", F.when(F.col("dayOfWeek") == 1, "Sunday")
.when(F.col("dayOfWeek") == 2, "Monday")
.when(F.col("dayOfWeek") == 3, "Tuesday")
.when(F.col("dayOfWeek") == 4, "Wednesday")
.when(F.col("dayOfWeek") == 5, "Thursday")
.when(F.col("dayOfWeek") == 6, "Friday")
.when(F.col("dayOfWeek") == 7, "Saturday"))
.withColumn("DayNumberOfMonth", F.col("day"))
.withColumn("DayNumberOfYear", F.dayofyear("dt"))
.withColumn("WeekKey", F.col("year") * 100 + F.weekofyear("dt"))
.withColumn("WeekNumber", F.weekofyear("dt"))
.withColumn("WeekName", F.concat(F.col("yearName"), F.lit("/W"), F.lpad(F.weekofyear("dt").cast(StringType()), 2, "0")))
.withColumn("WeekNumberOfYear", F.weekofyear("dt"))
.select(
"DateKey", "Date", "YearKey", "YearNumber", "Year", "QuarterKey", "QuarterNumber", "Quarter",
"MonthKey", "MonthNumber", "Month", "MonthName", "FirstDateInMonth", "LastDateInMonth",
"DayKey", "Day", "DayNumberOfWeekKey", "DayNumberOfWeekNumber", "DayNumberOfWeekName",
"DayNumberOfMonth", "DayNumberOfYear", "WeekKey", "WeekNumber", "WeekName", "WeekNumberOfYear"
)
)
# Save as a table in your Lakehouse or Warehouse - Lakehouse name to adapt
df.write.mode("overwrite").saveAsTable("docu_bglakehouse.DATERANGE")
Build the Date Dimension
The date dimension build depends on the modeling approach.
Data Vault modeling
The data lineage of the Date Mart Dimension in a Data Vault modeling can be:
To model it, please follow the steps:
- Discover the source date table you created previously for your Target Technology
- Create a Source Model Object from the discovery
- Create a Stage Model Object from the Source Model Object
It can be interesting to filter the data in the Stage Model Object with a WHERE clause in the Dataflow Set to fit the date range that actually exists in your date source data.
- Create a Reference Table Model Object from the Stage Model Object
- Create a D_Date Mart Dimension from the Reference Table:
- Business key: DayKey
- Terms: descriptive terms of a day, a month, a quarter, and a year
Dimensional Modeling
The data lineage of the Date Mart Dimension in a Dimensional modeling can be:
To model it, please follow the steps:
- Discover the source date table you created previously for your Target Technology
- Create a Source Model Object from the discovery
- Create a Stage Model Object from the Source Model Object
It can be interesting to filter the data in the Stage Model Object with a WHERE clause in the Dataflow Set to fit the date range that actually exists in your date source data.
- Create 4 entities:
- Date_Year:
- Business key: YearKey
- Terms: descriptive terms of a year
- Deduplication Method of the Dataflow set to Distinct
- Date_Quarter:
- Business key: QuarterKey
- Terms: descriptive terms of a quarter
- Relationship with Date_Year
- Deduplication Method of the Dataflow set to Distinct
- Date_Year:
-
- Date_Month:
- Business key: MonthKey
- Terms: descriptive terms of a month
- Relationship with Date_Quarter
- Deduplication Method of the Dataflow set to Distinct
- Date_Month:
-
- Date_Day:
- Business key: DayKey
- Terms: descriptive terms of a day
- Relationship with Date_Month
- Date_Day:
- Create a D_Date Mart Dimension:
- Business key: DayKey
- terms: descriptive terms of a day, a month, a quarter, and a year
- Dataflow composed by the entities Date_Day, Date_Month, Date_Quarter, and Date_Year with the correct Join Rules: