[TOC]
# datetime构造时间
~~~
import datetime
dt = datetime.datetime(year=2017, month=11, day=24, hour=10, minute=30)
print(dt)
~~~
输出
~~~
2017-11-24 10:30:00
~~~
# pandas构造时间
~~~
import pandas as pd
ts = pd.Timestamp('2017-11-24')
print(ts)
~~~
输出
`2017-11-24 00:00:00`
或者to_datetime这种
~~~
import pandas as pd
datetime = pd.to_datetime('2017-11-24')
to_datetime = pd.to_datetime('11/23/2017')
print(datetime)
print(to_datetime)
~~~
输出
~~~
2017-11-24 00:00:00
2017-11-23 00:00:00
~~~
# 获取月份,天数
~~~
import pandas as pd
# 用/写注意顺序
ts = pd.Timestamp('24/11/2017')
# -的构造也能用这个
# 获取月份
print(ts.month)
print(ts.day)
~~~
输出
~~~
11
24
~~~
# 增加天数
~~~
import pandas as pd
timestamp = pd.Timestamp('2018-05-01')
# 注意单词,是Timedelta
rel = timestamp + pd.Timedelta('5 days')
print(rel)
~~~
输出
`2018-05-06 00:00:00`
# 构造一个Series结构
~~~
import pandas as pd
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
print(s)
~~~
输出
~~~
0 2017-11-24 00:00:00
1 2017-11-25 00:00:00
2 2017-11-26 00:00:00
dtype: object
~~~
把他们转换成datatime格式
~~~
import pandas as pd
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
datetime = pd.to_datetime(s)
print(datetime)
~~~
输出
~~~
0 2017-11-24
1 2017-11-25
2 2017-11-26
dtype: datetime64[ns]
~~~
获取他们的小时和周
~~~
import pandas as pd
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
datetime = pd.to_datetime(s)
print(datetime.dt.hour)
print('-'*30)
# 周的不是按照中国的定义的,而是按照美国那边定义的
print(datetime.dt.weekday)
~~~
输出
~~~
0 0
1 0
2 0
dtype: int64
------------------------------
0 4
1 5
2 6
dtype: int64
~~~
# 构造Series数据
~~~
import pandas as pd
# 从2017-11-24开始,构造3个数据,每个间隔12H
series = pd.Series(pd.date_range(start='2017-11-24', periods=3, frep='12H'))
print(series)
~~~
输出
~~~
0 2017-11-24
1 2017-11-25
2 2017-11-26
dtype: datetime64[ns]
~~~
# 用pandas分析csv的日期
csv结构
![](https://box.kancloud.cn/6ab0f4f20dab3d79a63182f02ca8fe17_1462x360.png)
~~~
import pandas as pd
data = pd.read_csv('./flowdata.csv')
head = data.head()
print(head)
~~~
输出
~~~
Time L06_347 LS06_347 LS06_348
0 2009-01-01 00:00:00 0.137417 0.097500 0.016833
1 2009-01-01 03:00:00 0.131250 0.088833 0.016417
2 2009-01-01 06:00:00 0.113500 0.091250 0.016750
3 2009-01-01 09:00:00 0.135750 0.091500 0.016250
4 2009-01-01 12:00:00 0.140917 0.096167 0.017000
~~~
## 读取后设置索引
~~~
import pandas as pd
data = pd.read_csv('./flowdata.csv').head()
# 把时间转换为datetime结构
data['Time'] = pd.to_datetime(data['Time'])
# 设置索引为datetime
data = data.set_index('Time')
print(data)
print('-'*30)
# 打印索引
print(data.index)
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 00:00:00 0.137417 0.097500 0.016833
2009-01-01 03:00:00 0.131250 0.088833 0.016417
2009-01-01 06:00:00 0.113500 0.091250 0.016750
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-01 12:00:00 0.140917 0.096167 0.017000
------------------------------
DatetimeIndex(['2009-01-01 00:00:00', '2009-01-01 03:00:00',
'2009-01-01 06:00:00', '2009-01-01 09:00:00',
'2009-01-01 12:00:00'],
dtype='datetime64[ns]', name='Time', freq=None)
~~~
## 读取时设置索引列,并格式化
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True).head()
print(data)
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 00:00:00 0.137417 0.097500 0.016833
2009-01-01 03:00:00 0.131250 0.088833 0.016417
2009-01-01 06:00:00 0.113500 0.091250 0.016750
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-01 12:00:00 0.140917 0.096167 0.017000
~~~
## 分片获取数据
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 分片获取数据
# 也可以这样写
# data[('2012-01-01 09:00'):('2012-01-01 19:00')]
# 分片也支持这样 data['2012-01':'2012-03']
dt = data[pd.Timestamp('2012-01-01 09:00'):pd.Timestamp('2012-01-01 19:00')]
print(dt)
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2012-01-01 09:00:00 0.330750 0.293583 0.029750
2012-01-01 12:00:00 0.295000 0.285167 0.031750
2012-01-01 15:00:00 0.301417 0.287750 0.031417
2012-01-01 18:00:00 0.322083 0.304167 0.038083
~~~
## 获取倒数10个数据
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True).tail(10)
print(data)
~~~
## 获取某一年的数据
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 获取某一年的数据
print(data['2013'])
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2013-01-01 00:00:00 1.688333 1.688333 0.207333
2013-01-01 03:00:00 2.693333 2.693333 0.201500
2013-01-01 06:00:00 2.220833 2.220833 0.166917
~~~
## 获取都是某个月份的数据
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 获取全部都是1月的数据
dt = data[data.index.month == 1]
print(dt.head())
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 00:00:00 0.137417 0.097500 0.016833
2009-01-01 03:00:00 0.131250 0.088833 0.016417
2009-01-01 06:00:00 0.113500 0.091250 0.016750
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-01 12:00:00 0.140917 0.096167 0.017000
~~~
# 获取指定时间内的数据
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 获取8-12小时的数据
# 也可以这样 data.between_time('08:00', '12:00')
dt = data[(data.index.hour > 8) & (data.index.hour < 12)]
print(dt.head())
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 09:00:00 0.135750 0.091500 0.016250
2009-01-02 09:00:00 0.141917 0.097083 0.016417
2009-01-03 09:00:00 0.124583 0.084417 0.015833
2009-01-04 09:00:00 0.109000 0.105167 0.018000
2009-01-05 09:00:00 0.161500 0.114583 0.021583
~~~
# 重采样
按天重采样求均值
~~~
import pandas as pd
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./flowdata.csv', index_col=0, parse_dates=True)
# 按3天 data.resample('3D').mean().head()
# 按月 data.resample('M').mean().head()
# 按天采样就最大值 data.resample('D').max().head()
dt = data.resample('D').mean().head()
print(dt)
~~~
输出
~~~
L06_347 LS06_347 LS06_348
Time
2009-01-01 0.125010 0.092281 0.016635
2009-01-02 0.124146 0.095781 0.016406
2009-01-03 0.113562 0.085542 0.016094
2009-01-04 0.140198 0.102708 0.017323
2009-01-05 0.128812 0.104490 0.018167
~~~