In [1]: import numpy as np
In [2]: import statsmodels.api as sm
Create some data
In [3]: nsample = 50
In [4]: sig = 0.25
In [5]: x1 = np.linspace(0, 20, nsample)
In [6]: X = np.c_[x1, np.sin(x1), (x1-5)**2, np.ones(nsample)]
In [7]: beta = [0.5, 0.5, -0.02, 5.]
In [8]: y_true = np.dot(X, beta)
In [9]: y = y_true + sig * np.random.normal(size=nsample)
Setup and estimate the model
In [10]: olsmod = sm.OLS(y, X)
In [11]: olsres = olsmod.fit()
In [12]: print olsres.params
[ 0.50898072 0.51916098 -0.02158601 4.99563796]
In [13]: print olsres.bse
[ 0.01141961 0.04489185 0.00100265 0.07404521]
In-sample prediction
In [14]: ypred = olsres.predict(X)
Create a new sample of explanatory variables Xnew, predict and plot
In [15]: x1n = np.linspace(20.5,25, 10)
In [16]: Xnew = np.c_[x1n, np.sin(x1n), (x1n-5)**2, np.ones(10)]
In [17]: ynewpred = olsres.predict(Xnew) # predict out of sample
In [18]: print ypred
[ 4.4559876 4.95431237 5.41158849 5.79952214 6.1000306
6.30821316 6.43315634 6.49644096 6.52859648 6.56408511
6.63563975 6.76888609 6.97813236 7.26401852 7.61341116
8.00156122 8.39617026 8.76269694 9.07003211 9.29560991
9.42911534 9.47417891 9.44778022 9.37745824 9.2967867
9.2398581 9.23568231 9.30342042 9.44923656 9.66528521
9.93100013 10.2164734 10.48736917 10.71056495 10.85959353
10.91899149 10.8868398 10.77507906 10.60754778 10.41606569
10.2352059 10.09661463 10.02381182 10.02832687 10.10780439
10.24639119 10.41734091 10.58740837 10.72231167 10.79236459]
In [19]: import matplotlib.pyplot as plt
In [20]: plt.figure()
Out[20]: <matplotlib.figure.Figure at 0xcfeb32c>
In [21]: plt.plot(x1, y, 'o', x1, y_true, 'b-')
Out[21]:
[<matplotlib.lines.Line2D at 0xcd11f8c>,
<matplotlib.lines.Line2D at 0xcd1734c>]
In [22]: plt.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)),'r')
Out[22]: [<matplotlib.lines.Line2D at 0xcfeb48c>]
In [23]: plt.title('OLS prediction, blue: true and data, fitted/predicted values:red')
Out[23]: <matplotlib.text.Text at 0xccf316c>