Logo Search packages:      
Sourcecode: octave-econometrics version File versions  Download package

kernel_example.m

# Copyright (C) 2007 Michael Creel <michael.creel@uab.es>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; If not, see <http://www.gnu.org/licenses/>.

# kernel_example: examples of how to use kernel density and regression functions
# requires the optim and plot packages from Octave Forge
#
# usage: kernel_example;

# sample size (default n = 500) - should get better fit (on average)
# as this increases, supposing that you allow the optimal window width
# to be found, by uncommenting the relevant lines
n = 500;

# set this to greater than 0 to try parallel computations (requires MPITB)
compute_nodes = 0;
nodes = compute_nodes + 1; # count master node

close all;
hold off;

############################################################
# kernel regression example
# uniformly spaced data points on [0,2]
x = 1:n;
x = x';
x = 2*x/n;
# generate dependent variable
trueline =  x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
sig = 0.5;
y = trueline + sig*randn(n,1);
tic;
fit = kernel_regression(x, y, x); # use the default bandwidth
t1 = toc;
printf("\n");
printf("########################################################################\n");
printf("time for kernel regression example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
plot(x, fit, ";fit;", x, trueline,";true;");
grid("on");
title("Example 1: Kernel regression fit");

############################################################
# kernel density example: univariate - fit to Chi^2(3) data
data = sumsq(randn(n,3),2);
# evaluation point are on a grid for plotting
stepsize = 0.2;
grid_x = (-1:stepsize:11)';
bandwidth = 0.55;
# get optimal bandwidth (time consuming, uncomment if you want to try it)
# bandwidth = kernel_optimal_bandwidth(data);
# get the fitted density and do a plot
tic;
dens = kernel_density(grid_x, data, bandwidth, "__kernel_normal", false, false, compute_nodes);
t1 = toc;
printf("\n");
printf("########################################################################\n");
printf("time for univariate kernel density example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
printf("A rough integration under the fitted univariate density is %f\n", sum(dens)*stepsize);
figure();
plot(grid_x, dens, ";fitted density;", grid_x, chisquare_pdf(grid_x,3), ";true density;");
title("Example 2: Kernel density fit: Univariate Chi^2(3) data");

############################################################
# kernel density example: bivariate
# X ~ N(0,1)
# Y ~ Chi squared(3)
# X, Y are dependent
d = randn(n,3);
data = [d(:,1) sumsq(d,2)];
# evaluation points are on a grid for plotting
stepsize = 0.2;
a = (-5:stepsize:5)'; # for the N(0,1)
b = (-1:stepsize:9)';  # for the Chi squared(3)
gridsize = rows(a);
[grid_x, grid_y] = meshgrid(a, b);
eval_points = [vec(grid_x) vec(grid_y)];
bandwidth = 0.85;
# get optimal bandwidth (time consuming, uncomment if you want to try it)
# bandwidth = kernel_optimal_bandwidth(data);
# get the fitted density and do a plot
tic;
dens = kernel_density(eval_points, data, bandwidth, "__kernel_normal", false, false, compute_nodes);
t1 = toc;
printf("\n");
printf("########################################################################\n");
printf("time for multivariate kernel density example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
dens = reshape(dens, gridsize, gridsize);
printf("A rough integration under the fitted bivariate density is %f\n", sum(sum(dens))*stepsize^2);
figure();
surf(grid_x, grid_y, dens);
title("Example 3: Kernel density fit: dependent bivariate data");
xlabel("true marginal density is N(0,1)");
ylabel("true marginal density is Chi^2(3)");
# more extensive test of parallel
if compute_nodes > 0 # only try this if parallel is available
      ns =[4000; 8000; 10000; 12000; 16000; 20000];
      printf("\n");
      printf("########################################################################\n");
      printf("kernel regression example with several sample sizes serial/parallel timings\n");
      figure();
      clg;
      title("Compute time versus nodes, kernel regression with different sample sizes");
      xlabel("nodes");
      ylabel("time (sec)");
      hold on;
      ts = zeros(rows(ns),4);
      for i = 1:rows(ns)
            for compute_nodes = 0:3
                  nodes = compute_nodes + 1;
                  n = ns(i,:);
                  x = 1:n;
                  x = x';
                  x = 2*x/n;
                  # generate dependent variable
                  trueline =  x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
                  sig = 0.5;
                  y = trueline + sig*randn(n,1);
                  bandwidth = 0.45;
                  tic;
                  fit = kernel_regression(x, y, x, bandwidth, "__kernel_normal", false, false, compute_nodes);
                  t1 = toc;
                  ts(i, nodes) = t1;
                  plot(nodes, t1, "*");
                  printf(" %d data points and %d compute nodes: %f\n", n, nodes, t1);
            endfor
            plot(ts(i,:)');
      endfor
      hold off;
endif

Generated by  Doxygen 1.6.0   Back to index