DataFrame It's a Tabular form
Data structure of , Contains an ordered set of columns , The value type of the column can be The number
、 character string
、 Boolean value
etc. .Dataframe The data in is stored in one or more two-dimensional blocks , It's not a list 、 Dictionary or one-dimensional array structure
# Dataframe data structure
# Dataframe It's a tabular data structure ," 2D array with labels "
# Dataframe with index( Row labels ) and columns( Column labels )
import numpy
import pandas
data = {
"name":[" Takahashi Li Yi "," Xiaoyuan is so beautiful "],"age":[20,21],"gender":[" Woman "," Woman "]}
frame = pandas.DataFrame(data)
print(frame) # View the data , The data type is dataframe
print(frame.index) # .index View row labels
print(frame.columns) # .columns View column labels
print(frame.values) # .values Check the value , The data type is ndarray
import numpy
import pandas
# The length of the dictionary value must be consistent !
data1 = {
"a":[1,2,3],"b":[3,4,5]}
data2 = {
"one":numpy.random.rand(3),"two":numpy.random.rand(3)}
print(data1)
print(data2)
# Create method :pandas.Dataframe()
d1 = pandas.DataFrame(data1)
d2 = pandas.DataFrame(data2)
# By an array of / Create a dictionary of lists Dataframe,columns For the dictionary key,index Is the default numeric label
print(d1)
print(d2)
# columns Parameters : You can re specify the order of columns , The format is list, If the column does not exist in the existing data ( such as 'd'), Then NaN value
df1 = pandas.DataFrame(data1,columns = ["b","a","d"])
print(df1)
# columns Parameters : When reassigning , The number of columns can be less than the original data
df2 = pandas.DataFrame(data2,columns = ["two"])
print(df2)
# index Parameters : Redefinition index, The format is list, The length must be consistent
df2 = pandas.DataFrame(data2,index = ["f1","f2","f3"])
print(df2)
import numpy
import pandas
# from Seris A dictionary made up of establish Dataframe,columns For the dictionary key,index by Series The label of
# If Series No label specified , Is the default numeric label
# No settings index Of Series
data1 = {
'one':pandas.Series(numpy.random.rand(2)),
'two':pandas.Series(numpy.random.rand(3))}
# Set up index Of Series
data2 = {
'one':pandas.Series(numpy.random.rand(2),
index = ['a','b']),'two':pandas.Series(numpy.random.rand(3),index = ['a','b','c'])}
print(data1)
print(data2)
# Series Can be of different lengths , Generated Dataframe There will be NaN value
df1 = pandas.DataFrame(data1)
df2 = pandas.DataFrame(data2)
print(df1)
print(df2)
import numpy
import pandas
# Create directly from a two-dimensional array Dataframe, Get the result data of the same shape
# If you don't specify index and columns, Both return the default number format
ar = numpy.random.rand(9).reshape(3,3)
print(ar)
df1 = pandas.DataFrame(ar)
df2 = pandas.DataFrame(ar,index = ['a', 'b', 'c'], columns = ['one','two','three'])
print(df1)
print(df2)
# index and colunms The specified length is consistent with the original array
import numpy
import pandas
data = [{
"one":1,"two":2},{
"one":5,"two":10,"three":20}]
print(data)
# Create a list of dictionaries Dataframe,columns For the dictionary key,index If it is not specified, it is the default array label
# colunms and index Parameter to reassign the corresponding column and row labels respectively
df1 = pandas.DataFrame(data)
df2 = pandas.DataFrame(data,index = ["a","b"])
df3 = pandas.DataFrame(data,columns = ["one","two"])
print(df1)
print(df2)
print(df3)
import numpy
import pandas
data = {
'Jack':{
'math':90,'english':89,'art':78},
'Marry':{
'math':82,'english':95,'art':92},
'Tom':{
'math':78,'english':67}}
# A dictionary made up of dictionaries Dataframe,columns For the dictionary key,index For sub dictionary key
df1 = pandas.DataFrame(data)
print(df1)
# columns Parameters can increase and decrease existing columns , If a new column appears , The value is NaN
df2 = pandas.DataFrame(data, columns = ['Jack','Tom','Bob'])
# index It's different here , It doesn't change the original index
# If you point to a new label , The value is NaN
df3 = pandas.DataFrame(data, index = ['a','b','c'])
print(df2)
print(df3)
explain : This is a practical
Installation steps Directory :