读写磁盘
# book python金融大树据分析 P156
path = '/home/yaoleo/readsqldata/'
import numpy as np
from random import gauss
a = [gauss(1.5, 2) for i in range(100000)]
#伪随机数保存到列表中
a = [gauss(1.5, 2) for i in range(100000)]
#伪随机数保存到列表中
pkl_file
pkl_file.close()
ll $path* #100万浮点数大概有20M大小
# 通过pickle.load 可以将他读入内存
pkl_file = open(path + 'data.pkl', 'r')
%time b = pickle.load(pkl_file)
# numpy 有函数 比较不同
np.allclose(np.array(a), np.array(b))
# 或者
np.sum(np.array(a) - np.array(b))
读取数据库
import MySQLdb
db = MySQLdb.connect(user='root', db='fog', passwd='zjy2580925?', host='localhost', charset='utf8')
cursor = db.cursor()
query = "select * from city_date_air where city = '北京'"
cursor.execute(query)
sqldata = cursor.fetchall()
!! 但是这样都出来的数据是元组
sdata = np.array(sqldata)
或者用!!直接是DataFrame格式
import pandas.io.sql as pds
%time data = pds.read_sql("select * from city_date_air where city='北京'", db)
data.head()
data['quality2'] = data['quality']
data.loc[data['quality'] != u' 优', 'quality2'] = 0
data.loc[data['quality'] == u' 优', 'quality2'] = 1
data.loc[data['quality'] == u' 良', 'quality2'] = 1
data.tail()
DataFrame replace
![pandas-sql.png](pandas-sql.png)
import pandas as pd
bbdata=pd.DataFrame(data,columns=['date','quality2'])
bbdata.head()
!!这里把这两项单独拿出来 要是不拿出来 后面分组求和就把quality2搞没了 还不知道为什么
按日期汇总信息
import pandas as pd
bbdata['date'] = pd.to_datetime(bbdata['date'])
bbdata.head()
!!日期规范格式必须的
bbdata = bbdata.set_index('date')
bbdata.head()
!!时间做index
bbdata = bbdata.resample('M').sum()
bbdata.head(10)
!!按月求和
还有一些其他参数在这里 http://www.cnblogs.com/stream886/p/6022279.html#q5