DataFrame 是一個表格型
的數據結構,包含一組有序的列,其列的值類型可以是數值
、字符串
、布爾值
等。Dataframe中的數據以一個或多個二維塊存放,不是列表、字典或一維數組結構
# Dataframe 數據結構
# Dataframe是一個表格型的數據結構,"帶有標簽的二維數組"
# Dataframe帶有index(行標簽)和columns(列標簽)
import numpy
import pandas
data = {
"name":["高橋李依","小原好美"],"age":[20,21],"gender":["女","女"]}
frame = pandas.DataFrame(data)
print(frame) # 查看數據,數據類型為dataframe
print(frame.index) # .index查看行標簽
print(frame.columns) # .columns查看列標簽
print(frame.values) # .values查看數值,數據類型為ndarray
import numpy
import pandas
# 字典的值的長度必須保持一致!
data1 = {
"a":[1,2,3],"b":[3,4,5]}
data2 = {
"one":numpy.random.rand(3),"two":numpy.random.rand(3)}
print(data1)
print(data2)
# 創建方法:pandas.Dataframe()
d1 = pandas.DataFrame(data1)
d2 = pandas.DataFrame(data2)
# 由數組/列表組成的字典創建Dataframe,columns為字典key,index為默認數字標簽
print(d1)
print(d2)
# columns參數:可以重新指定列的順序,格式為list,如果現有數據中沒有該列(比如'd'),則產生NaN值
df1 = pandas.DataFrame(data1,columns = ["b","a","d"])
print(df1)
# columns參數:重新指定的時候,列的數量可以少於原數據
df2 = pandas.DataFrame(data2,columns = ["two"])
print(df2)
# index參數:重新定義index,格式為list,長度必須保持一致
df2 = pandas.DataFrame(data2,index = ["f1","f2","f3"])
print(df2)
import numpy
import pandas
# 由Seris組成的字典 創建Dataframe,columns為字典key,index為Series的標簽
# 如果Series沒有指定標簽,則是默認數字標簽
# 沒有設置index的Series
data1 = {
'one':pandas.Series(numpy.random.rand(2)),
'two':pandas.Series(numpy.random.rand(3))}
# 設置了index的Series
data2 = {
'one':pandas.Series(numpy.random.rand(2),
index = ['a','b']),'two':pandas.Series(numpy.random.rand(3),index = ['a','b','c'])}
print(data1)
print(data2)
# Series可以長度不一樣,生成的Dataframe會出現NaN值
df1 = pandas.DataFrame(data1)
df2 = pandas.DataFrame(data2)
print(df1)
print(df2)
import numpy
import pandas
# 通過二維數組直接創建Dataframe,得到一樣形狀的結果數據
# 如果不指定index和columns,兩者均返回默認數字格式
ar = numpy.random.rand(9).reshape(3,3)
print(ar)
df1 = pandas.DataFrame(ar)
df2 = pandas.DataFrame(ar,index = ['a', 'b', 'c'], columns = ['one','two','three'])
print(df1)
print(df2)
# index和colunms指定長度與原數組保持一致
import numpy
import pandas
data = [{
"one":1,"two":2},{
"one":5,"two":10,"three":20}]
print(data)
# 由字典組成的列表創建Dataframe,columns為字典的key,index不做指定則為默認數組標簽
# colunms和index參數分別重新指定相應列及行標簽
df1 = pandas.DataFrame(data)
df2 = pandas.DataFrame(data,index = ["a","b"])
df3 = pandas.DataFrame(data,columns = ["one","two"])
print(df1)
print(df2)
print(df3)
import numpy
import pandas
data = {
'Jack':{
'math':90,'english':89,'art':78},
'Marry':{
'math':82,'english':95,'art':92},
'Tom':{
'math':78,'english':67}}
# 由字典組成的字典創建Dataframe,columns為字典的key,index為子字典的key
df1 = pandas.DataFrame(data)
print(df1)
# columns參數可以增加和減少現有列,如出現新的列,值為NaN
df2 = pandas.DataFrame(data, columns = ['Jack','Tom','Bob'])
# index在這裡和之前不同,並不能改變原有index
# 如果指向新的標簽,值為NaN
df3 = pandas.DataFrame(data, index = ['a','b','c'])
print(df2)
print(df3)
Python Learn to share today :