tcp-ds數(shù)據(jù)集sql查詢

TPC-DS是與真實(shí)場(chǎng)景非常接近的一個(gè)測(cè)試集,它包含7張事實(shí)表,17張緯度表,平均每張表含有18列。用這個(gè)數(shù)據(jù)集能夠很好的模擬企業(yè)數(shù)據(jù)查詢、報(bào)表生成以及數(shù)據(jù)挖掘等復(fù)雜應(yīng)用。

下列是7個(gè)事實(shí)表與17個(gè)維度表之間的ER圖。

1.jpg
2.png
3.png
4.png
5.png
6.png
7.png
-- 1.符合某條件的每各客戶單次消費(fèi)額總額大于900的客戶表
-- 條件:
-- 在1999-2001年期間,
-- 每月的1-3或25-28日的,
-- 來(lái)自williamson county的客戶。

with sales_record(ss_ticket_number,ss_customer_sk,sales)as
(
    select
        ss_ticket_number,
        ss_customer_sk,
        sum(ss_sales_price)as sales
    from
        store_sales ss,
        date_dim dd,
        store s,
        household_demographics hd
    where
        ss.ss_sold_date_sk=dd.d_date_sk
        and ss.ss_store_sk=s.s_store_sk
        and ss.ss_hdemo_sk=hd.hd_demo_sk
        and(dd.d_dom between 1 and 3 or dd.d_dom between 25 and 28)
        and dd.d_year in (1999,2000,2001)
        and s.s_county in('williamson county')
    group by
        ss_ticket_number,
        ss_cutomer_sk
)
select
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales,
    count(*) as cnt
from
    sales_record,
    customer
where
    ss_customer_sk=c_customer_sk
    and sales>900
group by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales 
order by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag desc;


-- 2.4號(hào)店利潤(rùn)率最好最差的top10名產(chǎn)品名稱


with ascending as
(
    select
        item_sk,
        rank()over(order by avg_net_profit asc)as rn
    from
    (
    select
            ss_item_sk as item_sk,
            avg(ss_net_paid-ss_ext_wholesale_cost)as avg_net_profit
        from
            store_sales
        where
            ss_store_sk=4
        group by
            ss_item_sk
        having
            avg(ss_net_paid-ss_ext_discount_amt)>0.8*
            (
                select
                    avg(ss_net_paid-ss_ext_wholesale_cost)
                from
                    store_sales
                where
                    ss_store_sk=4
                group by
                ss_store_sk
                )
        
        
        )s
),
descending as -- 小于平均水平的0.8
(
    select
        item_sk,
        rank()over(order by avg_net_profit desc)as rn
    from
    (select
        ss_item_sk as item_sk,
        avg(ss_net_paid-ss_ext_wholesale_cost)as avg_net_profit
    from
        store_sales
    where
        ss_store_sk=4
    group by
        ss_item_sk
    having
        avg(ss_net_paid-ss_ext_wholesale_cost)<0.2*
        (select
            avg(ss_net_paid-ss_ext_wholesale_cost)
        from
            store_sales
        where
            ss_store_sk=4
        group by
            ss_store_sk)
    )s
)

select
    ascending.rn,
    i1.i_product_name as best_performing,
    i2.i_product_name as worst_performing
from
    item i1,
    item i2,
    ascending,
    descending
where
    ascending.rn=descending.rn
    and ascending.rn<=10
    and descending.rn<=10
    and i1.i_item_sk=ascending.item_sk
    and i2.i_item_sk=descending.item_sk
order by
    ascending.rn


-- 3.符合某條件的客戶的消費(fèi)總額,按。。排序

select
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales,
    count(*)as cnt
from
    (
    select
        ss_ticket_number,
        ss_customer_sk,
        sum(ss_sales_price)as sales
    from
        store_sales ss,
        date_dim dd,
        store s,
        household_demographics hd
    where
        ss_sold_date_sk=d_date_sk
        and ss_store_sk=s_store_sk
        and ss_hdemo_sk=hd_demo_sk
        and(d_dom between 1 and 3 or d_dom between 25 and 28)
        and hd_buy_potential='>10000'
        and d_year in (1999,2000,2001)
        and s_county in ('williamson county')
    group by
        ss_ticket_number,
        ss_customer_sk)as sales_record(ss_ticket_number,ss_customer_sk,sales),
    customer
where
    ss_customer_sk=c_customer_sk
    and sales>900
group by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag,
    sales
order by
    c_last_name,
    c_first_name,
    c_salutation,
    c_preferred_cust_flag desc;
        
        

-- 4.人群畫(huà)像統(tǒng)計(jì)
-- 在2001年4-6月內(nèi),在store,沒(méi)在web、catalog上買的用戶a

SELECT 
    cd_gender,
    cd_marital_status,
    cd_education_status,
    cd_purchase_estimate,
    cd_credit_rating,
    COUNT(DISTINCT c_customer_sk) AS cust_num
FROM
    customer c,
    customer_address ca,
    customer_demographics cd
WHERE
    c_current_addr_sk = ca_address_sk
        AND ca_state IN ('nm' , 'ky', 'ga')
        AND cd_demo_sk = c_current_cdemo_sk
        -- 在xx時(shí)間內(nèi) 沒(méi)在web catalog 在store
        AND EXISTS( SELECT 
            *
        FROM
            store_sales,
            date_dim
        WHERE
            c_customer_sk = ss_customer_sk
                AND ss_sold_date_sk = d_date_sk
                AND d_year = 2001
                AND d_moy BETWEEN 4 AND 4 + 2)
        AND NOT EXISTS( SELECT 
            *
        FROM
            web_sales,
            date_dim
        WHERE
            c_customer_sk = ws_bill_customer_sk
                AND ws_sold_date_sk = d_date_sk
                AND d_year = 2001
                AND d_moy BETWEEN 4 AND 4 + 2)
        AND NOT EXISTS( SELECT 
            *
        FROM
            catalog_sales,
            date_dim
        WHERE
            c_customer_sk = cs_ship_customer_sk
                AND cs_sold_date_sk = d_date_sk
                AND d_year = 2001
                AND d_moy BETWEEN 4 AND 4 + 2)
GROUP BY cd_gender , cd_marital_status  , cd_education_status, cd_purchase_estimate , cd_credit_rating
ORDER BY cd_gender , cd_marital_status , cd_education_status , cd_purchase_estimate , cd_credit_rating;

    

-- 5月銷售額與年平均銷售額差10%以上

with v1 as
(   select
        i_category,
        i_brand,
        cc_name,
        d_year,
        d_moy,
        sum(cs_sales_price)as current_sum_sales,
        avg(sum(cs_sales_price))over(partition by i_category,i_brand,cc_name,d_year)as avg_year_sales,
        sum(sum(cs_sales_price))over(partition by i_category,i_brand,cc_name,d_year)as sum_year_sales,
        sum(sum(cs_sales_price))over(partition by i_category,i_brand,cc_name,d_year order by d_moy rows 
            between unbounded preceding and current row)as accumulated_sum_year_sales,
        rank()over(partition by i_category,i_brand,cc_name order by d_year,d_moy)as rn
    from
        item,
        catalog_sales,
        date_dim,
        call_center
    where
        cs_item_sk=i_item_sk
        and cs_sold_date_sk=d_date_sk
        and cc_call_center_sk=cs_call_center_sk
        and(d_year=2001 or(d_year=2001-1 and d_moy=12)or (d_year=2001+1 and d_moy=1))
    group by
        i_category,
        i_brand,
        cc_name,
        d_year,
        d_moy
),
v2 as
(
    select 
        v1.i_category,
        v1.i_brand,
        v1.cc_name,
        v1.d_year,
        v1.d_moy,
        v1.avg_year_sales,
        v1_lag.current_sum_sales as previous_sum_sales,
        v1.current_sum_sales,
        v1_lead.current_sum_sales as next_sum_sales,
        v1.sum_year_sales,
        v1.accumulated_sum_year_sales
    from
        v1,
        v1 v1_lag,
        v1 v1_lead
    where
        v1.i_category=v1_lag.i_category
        and v1.i_category=v1_lead.i_category
        and v1.i_brand=v1_lag.i_brand
        and v1.i_brand=v1_lead.i_brand
        and v1.cc_name=v1_lag.cc_name
        and v1.cc_name=v1_lead.cc_name
        and v1.rn=v1_lag.rn+1
        and v1.rn=v1_lead.rn-1
)
select
    *
from
    v2
where
    d_year=2001
    and avg_year_sales>0
    and(case when avg_year_sales>0 then abs(current_sum_sales-avg_year_sales)/avg_year_sales else null end)>0.1
    order by
        current_sum_sales-avg_year_sales

-- 2000年store網(wǎng)中,tn州退貨比較大的用戶,按金額降序排列
-- 退款金額是store網(wǎng)用戶平均退貨金額的1.5倍
with
customer_total_return as
(
    select
        sr_customer_sk,
        sr_store_sk,
        sum(sr_fee)as total_return
    from 
        store_returns,
        date_dim
    where
        sr_returned_date_sk=d_date_sk
        and d_year=2000
    group by
        sr_customer_sk,
        sr_store_sk
)

select
    c_customer_id,
    total_return
from
    customer_total_return ctr,
    store s,
    customer c
where
    ctr.total_return>
    (select
        avg(total_return)*1.5
    from
        customer_total_return)
    and s.s_store_sk=ctr.sr_store_sk
    and ctr.sr_customer_sk=c.c_customer_sk
    and s.s_state='tn'
order by
    total_return desc;

--2002年四個(gè)季度web和catalog銷售同比情況。

with s1 as 
(
    select 
        ws_sold_date_sk as sold_date_sk,
        ws_ext_sales_price as sales_price
    from 
        web_sales
    union all
    select
        cs_sold_date_sk as sold_date_sk,
        cs_ext_sales_price as sales_price
    from
        catalog_sales
),
s2 as
(
    select
        sum(case when (d_year=2001 and d_qoy=1) then sales_price else 0 end) as sales_2001_q1,
        sum(case when (d_year=2001 and d_qoy=2) then sales_price else 0 end) as sales_2001_q2,
        sum(case when (d_year=2001 and d_qoy=3) then sales_price else 0 end) as sales_2001_q3,
        sum(case when (d_year=2001 and d_qoy=4) then sales_price else 0 end) as sales_2001_q4,
        sum(case when (d_year=2002 and d_qoy=1) then sales_price else 0 end) as sales_2002_q1,
        sum(case when (d_year=2002 and d_qoy=2) then sales_price else 0 end) as sales_2002_q2,
        sum(case when (d_year=2002 and d_qoy=3) then sales_price else 0 end) as sales_2002_q3,
        sum(case when (d_year=2002 and d_qoy=4) then sales_price else 0 end) as sales_2002_q4
    from
        s1,
        date_dim
    where
        d_date_sk=sold_date_sk
)
select 
    round((sales_2002_q1-sales_2001_q1)/sales_2001_q1,2)as q1_sales_ratio,
    round((sales_2002_q2-sales_2001_q2)/sales_2001_q1,2)as q2_sales_ratio,
    round((sales_2002_q3-sales_2001_q3)/sales_2001_q1,2)as q3_sales_ratio,
    round((sales_2002_q4-sales_2001_q4)/sales_2001_q1,2)as q4_sales_ratio
from
    s2
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容