Logo

Comparing OLS and RLMΒΆ

In [1]: import numpy as np

from scipy import stats

In [2]: import statsmodels.api as sm

In [3]: import matplotlib.pyplot as plt

In [4]: from statsmodels.sandbox.regression.predstd import wls_prediction_std

In [5]: nsample = 50

In [6]: x1 = np.linspace(0, 20, nsample)

In [7]: X = np.c_[x1, (x1-5)**2, np.ones(nsample)]

In [8]: sig = 0.3   # smaller error variance makes OLS<->RLM contrast bigger

In [9]: beta = [0.5, -0.0, 5.]

In [10]: y_true2 = np.dot(X, beta)

In [11]: y2 = y_true2 + sig*1. * np.random.normal(size=nsample)

In [12]: y2[[39,41,43,45,48]] -= 5   # add some outliers (10% of nsample)

Example: estimate quadratic function (true is linear)

In [13]: res = sm.OLS(y2, X).fit()

In [14]: print res.params
[ 0.52041769 -0.01343655  5.10209124]

Note: quadratic term captures outlier effect

In [15]: print res.bse
[ 0.07137208  0.00631533  0.46229484]

print res.predict

compare with robust estimator

In [16]: resrlm = sm.RLM(y2, X).fit()

In [17]: print resrlm.params
[ 0.50899669 -0.00311252  5.02369257]

In [18]: print resrlm.bse
[ 0.0194497   0.001721    0.12598057]

In [19]: plt.figure()
Out[19]: <matplotlib.figure.Figure at 0x7c134d0>

In [20]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[20]: 
[<matplotlib.lines.Line2D at 0x7c397d0>,
 <matplotlib.lines.Line2D at 0x7c39d10>]

In [21]: prstd, iv_l, iv_u = wls_prediction_std(res)

In [22]: plt.plot(x1, res.fittedvalues, 'r-')
Out[22]: [<matplotlib.lines.Line2D at 0x7c13bd0>]

In [23]: plt.plot(x1, iv_u, 'r--')
Out[23]: [<matplotlib.lines.Line2D at 0x7c3c690>]

In [24]: plt.plot(x1, iv_l, 'r--')
Out[24]: [<matplotlib.lines.Line2D at 0x7c3cbd0>]

In [25]: plt.plot(x1, resrlm.fittedvalues, 'g.-')
Out[25]: [<matplotlib.lines.Line2D at 0x746e090>]

In [26]: plt.title('blue: true,   red: OLS,   green: RLM')
Out[26]: <matplotlib.text.Text at 0x7c24e50>
../../_images/tut_ols_rlm_0.png

Example: estimate linear function (true is linear)

In [27]: X2 = X[:,[0,2]]  # use only linear term and constant

In [28]: res2 = sm.OLS(y2, X2).fit()

In [29]: print res2.params
[ 0.38605223  5.64366631]

Note: quadratic term captures outlier effect

In [30]: print res2.bse
[ 0.03445102  0.3998306 ]

print res2.predict

In [31]: prstd, iv_l, iv_u = wls_prediction_std(res2)

compare with robust estimator

In [32]: resrlm2 = sm.RLM(y2, X2).fit()

In [33]: print resrlm2.params
[ 0.48344129  5.12227932]

In [34]: print resrlm2.bse
[ 0.0094077   0.10918363]

In [35]: plt.figure()
Out[35]: <matplotlib.figure.Figure at 0x7def5d0>

In [36]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[36]: 
[<matplotlib.lines.Line2D at 0x7e138d0>,
 <matplotlib.lines.Line2D at 0x7e13e10>]

In [37]: plt.plot(x1, res2.fittedvalues, 'r-')
Out[37]: [<matplotlib.lines.Line2D at 0x7defd50>]

In [38]: plt.plot(x1, iv_u, 'r--')
Out[38]: [<matplotlib.lines.Line2D at 0x7defcd0>]

In [39]: plt.plot(x1, iv_l, 'r--')
Out[39]: [<matplotlib.lines.Line2D at 0x7e18a10>]

In [40]: plt.plot(x1, resrlm2.fittedvalues, 'g.-')
Out[40]: [<matplotlib.lines.Line2D at 0x7e1a090>]

In [41]: plt.title('blue: true,   red: OLS,   green: RLM')
Out[41]: <matplotlib.text.Text at 0x7c49f90>
../../_images/tut_ols_rlm_1.png

see also help(sm.RLM.fit) for more options and module sm.robust.scale for scale options

plt.show()

This Page