In [5]:
import pandas as pd
import numpy as np
import sqlalchemy
import oursql
import matplotlib.pyplot as plt
import seaborn as sns

#to share the notebook
%matplotlib inline
In [6]:
def load_from_mysql():
    """
    Read the data from mysql
    remark: python3/mysql/sqlalchemy connection is a pain, 
    I had to install a fork of "oursql" to get this to work
    """
    conn = sqlalchemy.create_engine("mysql+oursql://steve:zissou@localhost/torque")
    df = pd.read_sql('raw_logs',conn)
    return df

def load_torque_keys(keyf='/home/aahu/chinar/ryancompton.net/assets/torque/torque_keys.csv'):
    """
    Read the mapping between human-readable variable names and what Torque uses
    """
    d = {}
    with open(keyf,'r') as fin:
        for line in fin:
            d[line.split(',')[0]] = line.split(',')[1].replace('\n','')
    return d
        
def load_from_file():
    df = pd.read_csv('/home/aahu/Desktop/torque_data.tsv',
                      sep='\t')
    df = df.rename(columns=load_torque_keys())

    return df
In [7]:
df = load_from_file()
In [8]:
"""
Draw the histogram of my observed mpg data
compare against 24–26 city / 28–32 hwy
"""

dfmpg = df[df['Miles Per Gallon(Instant)'] > 0]
dfmpg = dfmpg[dfmpg['Miles Per Gallon(Instant)'] < 200]  # outliers..

mpg = dfmpg['Miles Per Gallon(Instant)']
mpg.hist(bins=100, label='observed mpg')

plt.axvspan(24, 26, alpha=0.5, color='red', label='24–26 (advertised city mpg)')
plt.axvspan(28, 32, alpha=0.5, color="orange", label='28-32 (advertised hwy mpg)')

plt.xlabel('Miles per gallon')
plt.ylabel('Frequency')
plt.title('Histogram of instantaneous mpg readouts\n\
2003 Suzuki Aerio SX 5sp manual\n\
573 miles traveled. Data collected in Los Angeles using Open Torque Viewer.')
plt.legend()

plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/mpg_hist.png')
plt.show()
In [9]:
"""
restrict to speed data that has GPS measurements
and convert to freedom units
"""
dfgps = df[df['Speed (GPS)'] > 0]
df_spd = dfgps[['Speed (GPS)','Speed (OBD)']]*0.621 #conver kph to mph
spd_discrep = abs(df_spd['Speed (GPS)'] - df_spd['Speed (OBD)'])
In [10]:
#plot 1D speed histogram
sns.distplot(spd_discrep,bins=200)

plt.xlim([0,10])
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
mu = spd_discrep.mean()
median = spd_discrep.median()
sigma = spd_discrep.std()
textstr = '$\mu=%.2f$\n$\mathrm{median}=%.2f$\n$\sigma=%.2f$'%(mu, median, sigma)
plt.text(8.12,.29,textstr, bbox=props)

plt.xlabel('Discrepancy (mph)')
plt.ylabel('Normalized frequency')
plt.title('Discrepancies between GPS-measured speed and my speedometer')
plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/speed_hist.png')
plt.show()
In [11]:
#plot 2D speed histogram
sns.jointplot('Speed (GPS)', 'Speed (OBD)', df_spd, joint_kws={'alpha':0.25})
plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/speed_joint.png')
plt.show()
In [12]:
#conejo grade study

#box that defines when I'm on the grade
lat0 = 34.209165
lng0 = -118.99
lat1 = 34.195597
lng1 = -118.950455

df_conejo = df[(lng0 < df['GPS Longitude']) &( df['GPS Longitude'] < lng1) & (lat1 < df['GPS Latitude']) & (df['GPS Latitude'] < lat0)]
df_conejo = df_conejo[df_conejo['Miles Per Gallon(Instant)'] > 0]
In [13]:
"""
2D plot
"""
g = sns.JointGrid('Miles Per Gallon(Instant)','Throttle Position(Manifold)', df_conejo, space=0)
g.plot_marginals(sns.distplot, bins=20)#, shade=True)
g.plot_joint(sns.kdeplot, shade=True, n_levels=20, alpha=.8)
g.plot_joint(plt.scatter, alpha=.5)

plt.xlim([0,190])
plt.ylim([0,70])

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
textstr = 'Downhill'
plt.text(130,20, textstr, bbox=props)
textstr = 'Uphill'
plt.text(40,35, textstr, bbox=props)

#seaborn jointplot/jointgrid can't do titles
g.fig.suptitle('Throttle posistion vs. mpg while driving the Conejo Grade in Ventura', y=.995)

plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/conejo_joint.png')
plt.show()
In [14]:
"""
1D plot
"""
dfmpg = df_conejo[df_conejo['Miles Per Gallon(Instant)'] > 0]
dfmpg = dfmpg[dfmpg['Miles Per Gallon(Instant)'] < 200]

mpg = dfmpg['Miles Per Gallon(Instant)']
mpg.hist(bins=100, label='observed mpg')

plt.axvspan(24, 26, alpha=0.5, color='red', label='24–26 (advertised city mpg)')
plt.axvspan(28, 32, alpha=0.5, color="orange", label='28-32 (advertised hwy mpg)')

plt.xlabel('Miles per gallon')
plt.ylabel('Frequency')
plt.title('Histogram of instantaneous mpg readouts while driving the Conejo Grade')
plt.legend()

props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
textstr = 'Downhill'
plt.text(160,25, textstr, bbox=props)
textstr = 'Uphill'
plt.text(1,25, textstr, bbox=props)

plt.savefig('/home/aahu/chinar/ryancompton.net/assets/torque/mpg_hist_conejo.png')
plt.show()
In [15]:
"""
I could not figure from the table how much distance I've traveled in total??
Figure distance traveled by integrating speed...
"""

from scipy import integrate
import pandas as pd
import numpy as np

def integrate_method(self, how='trapz', unit='s'):
    '''Numerically integrate the time series.

    @param how: the method to use (trapz by default)
    @return 

    Available methods:
     * trapz - trapezoidal
     * cumtrapz - cumulative trapezoidal
     * simps - Simpson's rule
     * romb - Romberger's rule

    See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
    or the source code
    https://github.com/scipy/scipy/blob/master/scipy/integrate/quadrature.py
    '''
    available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
    if how in available_rules:
        rule = integrate.__getattribute__(how)
    else:
        print('Unsupported integration rule: %s' % (how))
        print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
        raise AttributeError
    
    result = rule(self.values, self.index.astype(np.int64) / 10**9)
    #result = rule(self.values)
    return result

pd.TimeSeries.integrate = integrate_method


#integrate to get total distance...
ts = df[['Timestamp','Speed (OBD)']]

import datetime
df2 = df[['Timestamp', 'Speed (OBD)']]
df2['Timestamp'] = df2['Timestamp'].map(lambda x: datetime.datetime.fromtimestamp(x//1000))

ts = df2.set_index('Timestamp').resample('s')['Speed (OBD)'].fillna(0)
ts = ts/(60*60)
ts.integrate('simps') # total miles traveled
/usr/local/lib/python3.4/dist-packages/IPython/kernel/__main__.py:46: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[15]:
994.63967592592599