코딩

Pandas, 데이터 컨트롤링 예시 코드

Bullseye 불새 2022. 8. 2. 17:27

판다스의 멋진 기능중의 하나

 

stack

unstack

groupby

 

def excel_accumulate_page(self):
    dc = self.dc
    df = dc.request_accumulate_data_by_site(self.siteCode, self.month)
    # columns = ['ㄴㄴ', 'ㄴㄴ', 'ㄴㄴ', 'ㄴㄴㄴㄴ']
    df = df.astype({'ㄴㄴ': float, 'ㄴㄴ': float})

    df['구분'] = df['업체명'].apply(lambda x: 'ㄴ' if 'ㄴ' in x else 'ㄴ')
    if 'ㄴ' not in df['ㄴ'].unique():
        df_temp = pd.DataFrame([['ㄴ', '', self.month, 0, 0]], columns=['ㄴ', 'ㄴ', 'ㄴ', 'ㄴ', 'ㄴ'])
        df = pd.concat([df, df_temp])
    if 'ㄴ' not in df['ㄴ'].unique():
        df_temp = pd.DataFrame([['ㄴ', 'ㄴ(주)', self.month, 0, 0]], columns=['ㄴ', 'ㄴ', 'ㄴ', 'ㄴ', 'ㄴ'])
        df = pd.concat([df, df_temp])

    # 자료가 없는 달의 컬럼 생성
    M0 = self.month
    M1 = (datetime.strptime(M0, '%Y-%m') - relativedelta(months=1)).strftime('%Y-%m')
    M2 = (datetime.strptime(M0, '%Y-%m') - relativedelta(months=2)).strftime('%Y-%m')
    M3 = (datetime.strptime(M0, '%Y-%m') - relativedelta(months=3)).strftime('%Y-%m')

    for m in [M0, M1, M2, M3]:
        if m not in df['분석월'].unique():
            df_temp = df.drop_duplicates('업체명').copy()
            df_temp.loc[:, '분석월'] = m
            df_temp.loc[:, 'ㄴ'] = 0
            df_temp.loc[:, 'ㄴ'] = 0
            df = pd.concat([df, df_temp])

    df = df[['ㄴ', 'ㄴ', 'ㄴ', 'ㄴ', 'ㄴ']]
    df = df.sort_values(['ㄴ', 'ㄴ', 'ㄴ'], ascending=[True, True, True])
    df = df.set_index(['ㄴ', 'ㄴ', 'ㄴ'])
    df = df.stack(-1)
    df = df.unstack(-2)
    df = df.fillna(0)

    temp = [d for k, d in df.groupby(level=0)]
    df_hd, df_subcon = temp

    # ㄴ 누계
    df_hd_cumsum = df_hd.sum(level=-1).cumsum(axis=1)
    df_hd_cumsum = df_hd_cumsum.rename({'ㄴ': ('ㄴ', '누 계', 'ㄴ'), 'ㄴ': ('ㄴ', '누 계', 'ㄴ')})
    df_hd = pd.concat([df_hd, df_hd_cumsum])

    # ㄴ 소계 및 누계
    df_subcon_sum = df_subcon.sum(level=-1)
    df_subcon_sum = df_subcon_sum.rename({'ㄴ': ('ㄴ', '소 계', 'ㄴ'), 'ㄴ': ('ㄴ', '소 계', 'ㄴ')})
    df_subcon_cumsum = df_subcon.sum(level=-1).cumsum(axis=1)
    df_subcon_cumsum = df_subcon_cumsum.rename({'ㄴ': ('ㄴ', '누 계', 'ㄴ'), 'ㄴ': ('ㄴ', '누 계', 'ㄴ')})
    df_subcon = pd.concat([df_subcon, df_subcon_sum])
    df_subcon = pd.concat([df_subcon, df_subcon_cumsum])

    # 합치기
    df_result = pd.concat([df_hd, df_subcon])

    # 전체 합계 및 누계
    df_sum = df.sum(level=-1)
    df_sum = df_sum.rename({'ㄴ': ('ㄴ', '', 'ㄴ'), 'ㄴ': ('ㄴ', '', 'ㄴ')})
    df_cumsum = df.sum(level=-1).cumsum(axis=1)
    df_cumsum = df_cumsum.rename({'ㄴ': ('ㄴ', '', 'ㄴ'), 'ㄴ': ('ㄴ', '', 'ㄴ')})
    df_result = pd.concat([df_result, df_sum])
    df_result = pd.concat([df_result, df_cumsum])

    # 비고 삽입
    df_result.loc[:, '비 고'] = ''
    return df_result