我就廢話不多說(shuō)了,大家還是直接看代碼吧!
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
|
# encoding=utf-8 import numpy as np import pandas as pd # 長(zhǎng)寬格式的轉(zhuǎn)換 # 1 data = pd.read_csv( 'd:data/macrodata.csv' ) print 'data:=\n' , data print 'data.to_records():=\n' , data.to_records() print 'data.year:=\n' , data.year print 'data.quarter:=\n' , data.quarter periods = pd.PeriodIndex(year = data.year, quarter = data.quarter, name = 'date' ) print 'periods:=\n' , periods data = pd.DataFrame(data.to_records(), columns = pd.Index([ 'realgdp' , 'infl' , 'unemp' ], name = 'item' ), index = periods.to_timestamp( 'D' , 'end' )) print 'data:=\n' , data ldata = data.stack().reset_index().rename(columns = { 0 : 'value' }) # print 'ldata:=\n', ldata print 'ldata.get(\'realgdp\'):=\n' , ldata.get( 'realgdp' ) print 'ldata.get(\'unemp\'):=\n' , ldata.get( 'unemp' ) wdata = ldata.pivot( 'date' , 'item' , 'value' ) print 'ldata:=\n' , ldata print 'wdata:=\n' , wdata # 2 print 'ldata[:10]:=\n' , ldata[: 10 ] pivoted = ldata.pivot( 'date' , 'item' , 'value' ) print 'pivoted:=\n' , pivoted print 'pivoted.head():=\n' , pivoted.head() print 'ldata:=\n' , ldata ldata[ 'value2' ] = np.random.randn( len (ldata)) print 'ldata[\'value2\']:=\n' , ldata[ 'value2' ] print 'ldata[:10]:=\n' , ldata[: 10 ] pivoted = ldata.pivot( 'date' , 'item' ) print 'pivoted:=\n' , pivoted print pivoted[: 5 ] print 'pivoted[\'value\'][:5]:=\n' , pivoted[ 'value' ][: 5 ] print 'ldata:=\n' , ldata unstacked = ldata.set_index([ 'date' , 'item' ]).unstack( 'item' ) print 'unstacked:=\n' , unstacked print 'test' |
補(bǔ)充知識(shí):python使用_pandas_用stack和unstack進(jìn)行行列重塑(key-value變寬表)
數(shù)據(jù)結(jié)構(gòu)的重塑(reshape)
與數(shù)據(jù)庫(kù)交互時(shí)常遇到堆疊格式(key-value)和寬表形式(dataframe)的轉(zhuǎn)換,如:
堆疊格式:
寬表形式dataframe:
下面是相互轉(zhuǎn)換的示例代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
import pandas as pd import numpy as np # 常用的表格形式的數(shù)據(jù)結(jié)構(gòu) df = pd.DataFrame(np.arange( 6 ).reshape(( 2 , 3 )), index = [ 'id1' , 'id2' ], columns = [ 'attr1' , 'attr2' , 'attr3' ]) print (df) out: attr1 attr2 attr3 id1 0 1 2 id2 3 4 5 # 寬表形式(dataframe)轉(zhuǎn)變?yōu)槎询B形式(key-value)形式 # 數(shù)據(jù)庫(kù)中常以該形式存儲(chǔ) df_key_value = df.stack().reset_index() df_key_value.columns = [ 'id' , 'attr' , 'value' ] print (df_key_value) out: id attr value 0 id1 attr1 0 1 id1 attr2 1 2 id1 attr3 2 3 id2 attr1 3 4 id2 attr2 4 5 id2 attr3 5 # 堆疊轉(zhuǎn)換為寬表形式 # 用set_index創(chuàng)建層次化索引,在用unstack重塑 # unstack中作為旋轉(zhuǎn)軸的變量(如attr),其值會(huì)作為列變量展開(kāi) df_key_value.set_index([ 'id' , 'attr' ]).unstack( 'attr' ) out: value attr attr1 attr2 attr3 id id1 0 1 2 id2 3 4 5 # 多層索引轉(zhuǎn)化為寬表 df_long = df_key_value.set_index([ 'id' , 'attr' ]).unstack( 'attr' )[ 'value' ].reset_index() df_long out: attr id attr1 attr2 attr3 0 id1 0 1 2 1 id2 3 4 5 # 堆疊轉(zhuǎn)換為寬表的快捷鍵---pivot df_key_value.pivot( 'id' , 'attr' , 'value' ) out: attr attr1 attr2 attr3 id id1 0 1 2 id2 3 4 5 |
以上這篇python 數(shù)據(jù)分析實(shí)現(xiàn)長(zhǎng)寬格式的轉(zhuǎn)換就是小編分享給大家的全部?jī)?nèi)容了,希望能給大家一個(gè)參考,也希望大家多多支持服務(wù)器之家。
原文鏈接:https://blog.csdn.net/u013043762/article/details/79180374