Pandas
1.数据唯一性查找
#day,title,longitude,latitude,num,day_timestramp
#2017/01/01,,,,0,1483200000.0
#2017/01/01,,,,0,1483286400.0
#2017/01/03,,,,0,1483372800.0
#2017/01/04,,,,0,1483459200.0
d_count = df.groupby(["day"]).count()
d_count[d_count.num>1]
title longitude latitude num day_timestramp
day
2017/01/01 0 0 0 2 2
2017/3/3 23:16 2 2 2 2 2
Datetime 减一天操作
import datetime
dt = datetime.datetime.strptime('2019/08/20 15:02','%Y/%m/%d %H:%M')
out_date = (dt + datetime.timedelta(days=-1)).strftime('%Y/%m/%d %H:%M')
print(y,out_date)
多条件筛选
import pandas as pd
info_path = '../info.csv'
m=pd.read_csv(info_path,header=0,sep=',')
day_date = m[(m['longitude'].isna()) | (m['longitude']=='0')]['day_date']
2.多层级index转化为 json
进行下列转换
{
"CompanyA": [{
"productName": " Orange ",
"price": 3000
}],
"CompanyB": [{
"productName": " Apple ",
"price": 2000
}, {
"productName": " Grapes ",
"price": 1000
}]
}
import json
df = pd.DataFrame(
[['CompanyA',' Orange ' , 3000],
['CompanyB',' Apple ' , 2000],
['CompanyB', ' Grapes ' , 1000],
],
columns=["company","productName","price"],
)
# 按照 company 进行分组 将分组的结果转化为字典类型
m_series = df.groupby(['company'])['productName','price'].apply(lambda x: x.to_dict('r'))
# 此时的 m是 series 按列将其转化为 json
m_series.to_json(orient="columns")
m_series.toframe() 的样子
进阶 3层转化
import json
df = pd.DataFrame(
[['PnL', ' Company A',' Orange ' , 3000],
['PnL', ' Company B',' Apple ' , 2000],
['PnL', ' Company B', ' Grapes ' , 1000],
['Tax', ' Company A',' Orange ' , 3000],
['Tax', ' Company B',' Apple ' , 2000],
['Tax', ' Company B',' Grapes ' , 1000]],
columns=["type", "company","productName","price"],
)
d = (df.groupby(['type','company'])['productName','price']
.apply(lambda x: x.to_dict('r'))
.reset_index(name='data')
.groupby('type')['company','data']
.apply(lambda x: x.set_index('company')['data'].to_dict())
.to_json())
到
{
"PnL": {
" Company A": [{
"productName": " Orange ",
"price": 3000
}],
" Company B": [{
"productName": " Apple ",
"price": 2000
}, {
"productName": " Grapes ",
"price": 1000
}]
},
"Tax": {
" Company A": [{
"productName": " Orange ",
"price": 3000
}],
" Company B": [{
"productName": " Apple ",
"price": 2000
}, {
"productName": " Grapes ",
"price": 1000
}]
}
}
中间过程1
s1 = df.groupby(['type','company'])['productName','price'].apply(lambda x: x.to_dict('r')).reset_index(name='data')
s1
中间过程2
s2 = s1.groupby('type')['company','data'].apply(lambda x: x.set_index('company')['data'])
s2
结果
s2.to_json()
'{" Company A":{"PnL":[{"productName":" Orange ","price":3000}],"Tax":[{"productName":" Orange ","price":3000}]}," Company B":{"PnL":[{"productName":" Apple ","price":2000},{"productName":" Grapes ","price":1000}],"Tax":[{"productName":" Apple ","price":2000},{"productName":" Grapes ","price":1000}]}}'