Wednesday, October 23, 2013

Simple (as possible) TSQL Pivot / Unpivot Samples

Pivoting in TSQL is difficult to understand.  It seems most samples are complex.  Every time I need to pivot I end up going back to one of these samples.  They start with the simplest possible example and build from there keeping it as simple as possible along the way.

------------  simplest pivot  --------------------
declare @bobo table (itemId int, quantity int)

                -- itemId qty
insert @bobo values (1, 1)
insert @bobo values (2, 2)
insert @bobo values (2, 2)
insert @bobo values (3, 3)

-- regular aggregate query (sum - only item 2 has more than one row to sum)
select itemID, sum(quantity) as 'total count' from @bobo group by itemID

-- pivoted view of same data.  Notice how the column headers become (really) a row to be read
-- left to right.  The items in the first column are really the column headers pivoted.
select [1],[2],[3] from
    -- source set
    (select itemID, quantity from @bobo) as alias_required
                            pivot
    -- pivoted set (aggregates: must use aggregate function)
    ( sum(quantity) for itemID in ([1],[2],[3]) ) as pivot_alias_required


------------------- nuther pivot ------------------------
declare @ExamAnswers Table
(
   StudentID varchar(12) NOT NULL,
   QuestionID int NOT NULL,
   Answer int
)

insert @ExamAnswers values('1', 1, 10) --Numbers in brackets below correspond to middle column here
insert @ExamAnswers values('1', 2, 2-- (thus these values are static - known)
insert @ExamAnswers values('1', 3, 3)
insert @ExamAnswers values('1', 4, 4)

-- normal view
select max(StudentID) as 'StudentId', max(QuestionID) as 'QuestionId', SUM(Answer) as 'Answer_sum'
from @ExamAnswers group by QuestionID

-- pivoted view
SELECT StudentID, [1] as Q1_sum, [2] as Q2_sum, [3] as Q3_sum, [4] as Q4_sum
FROM
(
    SELECT StudentID, QuestionID, Answer FROM @ExamAnswers
) AS piv
PIVOT
(
    -- aggregate function required (note: char columns can use min/max)
    sum(Answer) FOR QuestionID IN ([1], [2], [3], [4])
) AS chld
------------------------------------------------------------------------
--------------------------- same as above with double pivot --------------------
declare @ExamAnswers Table
(
   StudentID varchar(12) NOT NULL,
   QuestionID int NOT NULL,
   Answer int,
   SomethingElse int
)

insert @ExamAnswers values('1', 1, 10, 11) -- Numbers in brackets below correspond to middle column here
insert @ExamAnswers values('1', 2, 2, 2)   -- (thus these values are static - known)
insert @ExamAnswers values('1', 3, 3, 3)
insert @ExamAnswers values('1', 4, 4, 4)

-- normal view
select max(StudentID) as 'StudentId', max(QuestionID) as 'QuestionId', SUM(Answer) as 'Answer_sum', SUM(SomethingElse) as 'SomethingElse_sum'
from @ExamAnswers group by QuestionID;

with pivoted_view as
(
    SELECT
        StudentID, [1] as Q1_sum, [2] as Q2_sum, [3] as Q3_sum, [4] as Q4_sum,
        [S1] as S1_sum, [S2] as S2_sum, [S3] as S3_sum, [S4] as S4_sum
    FROM
    (
        SELECT
            StudentID,
            Answer as 'Answer',
            SomethingElse as 'SomethingElse',
            QuestionID as 'QuestionId',  -- used as number (hence column names) for first pivot
            'S' + convert(char(1), row_number() over (order by StudentId)) as 'SomethingElseNumber'  -- used as number for second pivot
        FROM @ExamAnswers
    ) main
    PIVOT
    (
        -- aggregate function required (note: char columns can use min/max)
        sum(Answer) FOR QuestionID IN ([1], [2], [3], [4])
    ) pivot_answer
    pivot
    (
        sum(SomethingElse) FOR SomethingElseNumber IN ([S1], [S2], [S3], [S4])
    ) pivot_something_else
)

-- Final result: studentID, Answer_sum pivoted, SomethingElse_sum pivoted
select StudentID,
                SUM(Q1_sum) A1_sum, SUM(Q2_sum) A2_sum, SUM(Q3_sum) A3_sum, SUM(Q4_sum) A4_sum,
             SUM(S1_sum) S1_sum, SUM(S2_sum) S2_sum, SUM(S3_sum) S3_sum, SUM(S4_sum) S4_sum
from pivoted_view
group by StudentID
-----------------------------------------------------------------------------
------------  unpivot  --------------------
declare @orig table ( fips bit, awd bit, inst bit )
insert @orig values (1,0,1)
select * from @orig
select IsEmpty, ColumnName from
    @orig
    unpivot -- cols must be same type
    (IsEmpty for ColumnName in (fips,awd,inst)) as alias_required;

-- Unpivoting multiple columns: http://mangalpardeshi.blogspot.com/2009/04/unpivot-multiple-columns.html    
-------------------- understanding the grouping that takes place ------------------------
declare @bobo2 table (id int, itemId int, quantity varchar(15))

                    --id, itemId qty
insert @bobo2 values (1, 1, 'a')

insert @bobo2 values (2, 1, 'c')
insert @bobo2 values (2, 2, 'd')

insert @bobo2 values (3, 1, 'e')
insert @bobo2 values (3, 2, 'f')
insert @bobo2 values (3, 2, 'g')

--select * from @bobo2

-- source set doesn't include id (one group out of all)
select [1],[2] from
    (select itemID, quantity from @bobo2) as alias_required
                            pivot
    ( min(quantity) for itemID in ([1],[2]) ) as pivot_alias_required

-- source set includes id (aggregate works on implicit group by the column that's not in the pivoted set)
select [1],[2] from
    (select id, itemID, quantity from @bobo2) as alias_required
                            pivot
    ( min(quantity) for itemID in ([1],[2]) ) as pivot_alias_required

--============================== Using row number to generate proper values to pivot on ==========================================
-- Parent / Child tables
declare @poppy table( id int primary key, name varchar(25) not null )
declare @child table( poppyId int not null, name varchar(25) not null )

insert @poppy values( 1, 'Hank' )
insert @child values( 1, 'HankChild1' )
insert @child values( 1, 'HankChild2' )

insert @poppy values( 2, 'Fred' )
insert @child values( 2, 'FredChild1' )
insert @child values( 2, 'FredChild2' )
insert @child values( 2, 'FredChild3' );
insert @child values( 2, 'FredChild4' );

with peep_data as
(
    select id, poppyId, p.name 'Poppy', c.name 'Child'
    from @poppy p
        inner join @child c on p.id = c.poppyId
),

-- We want to show parent with flattened children (up to a max of 4)
pivoted_data as
(
    select Poppy, Child1, Child2, Child3, Child4
    from
    (
        select Poppy, Child,
        -- Generate the values for each parent's (potential) children that are statically referenced in the pivot clause below
        'Child' + convert(char(1), ROW_NUMBER() over (partition by Id order by Id)) 'child_num'
        from peep_data
    ) src
            pivot
    ( max(Child) for child_num in (Child1, Child2, Child3, Child4) ) as useless_required_alias
)

select * from pivoted_data