>> data = randn(1,100); % 100 data points >> n = length(data); % the number of data points >> >> >> figure;hist(data) >> >> >> help randsample RANDSAMPLE Random sample, with or without replacement. Y = RANDSAMPLE(N,K) returns Y as a vector of values sampled uniformly at random, without replacement, from the integers 1:N. Y = RANDSAMPLE(POPULATION,K) returns K values sampled uniformly at random, without replacement, from the values in the vector POPULATION. Y = RANDSAMPLE(...,REPLACE) returns a sample taken with replacement if REPLACE is true, or without replacement if REPLACE is false (the default). Y = RANDSAMPLE(...,true,W) returns a weighted sample, using positive weights W, taken with replacement. W is often a vector of probabilities. This function does not support weighted sampling without replacement. Example: Generate a random sequence of the characters ACGT, with replacement, according to specified probabilities. R = randsample('ACGT',48,true,[0.15 0.35 0.35 0.15]) See also RAND, RANDPERM. >> >> >> >> >> p=1 p = 1 >> >> >> bootix = ceil(n*rand(1,n)); % indices to use for the bootstrap sample >> >> >> bootix bootix = Columns 1 through 17 82 54 36 94 88 56 63 59 21 31 48 24 85 20 23 18 23 Columns 18 through 34 44 32 93 44 19 91 98 44 12 26 41 60 27 61 72 23 12 Columns 35 through 51 30 32 43 51 9 27 81 3 93 74 49 58 24 46 97 55 53 Columns 52 through 68 24 49 63 68 40 37 99 4 89 92 80 10 27 34 68 14 73 Columns 69 through 85 11 66 50 78 72 91 90 34 70 20 4 75 51 48 91 61 62 Columns 86 through 100 86 81 58 19 24 89 3 49 17 98 72 51 48 6 69 >> union([],bootix) ans = Columns 1 through 17 3 4 6 9 10 11 12 14 17 18 19 20 21 23 24 26 27 Columns 18 through 34 30 31 32 34 36 37 40 41 43 44 46 48 49 50 51 53 54 Columns 35 through 51 55 56 58 59 60 61 62 63 66 68 69 70 72 73 74 75 78 Columns 52 through 66 80 81 82 85 86 88 89 90 91 92 93 94 97 98 99 >> bootix bootix = Columns 1 through 17 82 54 36 94 88 56 63 59 21 31 48 24 85 20 23 18 23 Columns 18 through 34 44 32 93 44 19 91 98 44 12 26 41 60 27 61 72 23 12 Columns 35 through 51 30 32 43 51 9 27 81 3 93 74 49 58 24 46 97 55 53 Columns 52 through 68 24 49 63 68 40 37 99 4 89 92 80 10 27 34 68 14 73 Columns 69 through 85 11 66 50 78 72 91 90 34 70 20 4 75 51 48 91 61 62 Columns 86 through 100 86 81 58 19 24 89 3 49 17 98 72 51 48 6 69 >> size(data) ans = 1 100 >> data(10) ans = 0.1746 >> data(bootix) ans = Columns 1 through 10 0.7990 1.6924 0.6686 -0.5596 -0.7420 -0.6436 1.0950 -0.0195 0.2944 -0.3999 Columns 11 through 20 -0.9219 1.6236 0.2120 -0.8323 0.7143 0.0593 0.7143 1.4151 0.6900 -0.6355 Columns 21 through 30 1.4151 -0.0956 0.3899 0.5690 1.4151 0.7258 0.8580 -1.6041 -0.0482 1.2540 Columns 31 through 40 0.0000 -0.2556 0.7143 0.7258 0.5711 0.6900 -1.0565 -1.0106 0.3273 1.2540 Columns 41 through 50 0.6232 0.1253 -0.6355 -0.2959 -2.1707 -1.0091 1.6236 0.5287 0.7812 0.5913 Columns 51 through 60 0.5077 1.6236 -2.1707 1.0950 0.5779 -0.1567 1.1908 -0.8217 0.2877 1.0823 Columns 61 through 70 0.0880 -0.3510 0.1746 1.2540 0.7119 0.5779 2.1832 -0.3775 -0.1867 0.8956 Columns 71 through 80 -0.0592 0.3148 -0.2556 0.3899 -0.1315 0.7119 0.6771 -0.8323 0.2877 -1.4751 Columns 81 through 90 -1.0106 -0.9219 0.3899 0.0000 -0.3179 0.2379 0.6232 -1.0091 -0.0956 1.6236 Columns 91 through 100 1.0823 0.1253 -2.1707 1.0668 0.5690 -0.2556 -1.0106 -0.9219 1.1909 0.0403 >> data(bootix) >> >> >> >> numboots = 1000; % how many bootstraps to perform >> for p=1:numboots bootix = ceil(n*rand(1,n)); % indices to use for the bootstrap sample % data(bootix) gives the bootstrap sample. end >> p=1 p = 1 >> >> >> trainix = setdiff(1:n,p); % indices to use for training >> trainix trainix = Columns 1 through 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 Columns 18 through 34 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 Columns 35 through 51 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 Columns 52 through 68 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 Columns 69 through 85 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 Columns 86 through 99 87 88 89 90 91 92 93 94 95 96 97 98 99 100 >> >> >> trainix trainix = Columns 1 through 17 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 Columns 18 through 34 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 Columns 35 through 51 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 Columns 52 through 68 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 Columns 69 through 85 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 Columns 86 through 99 87 88 89 90 91 92 93 94 95 96 97 98 99 100 >> trainix >> >> >> setdiff([5 10 15],[5]) ans = 10 15 >> setdiff([5 10 15],[5 10]) ans = 15 >> setdiff([5 10 15],[5 10 2 3]) ans = 15 >> setdiff([5 10 15],[]) ans = 5 10 15 >> >> testix = p; % indices to use for testing >> >> testix testix = 1 >> size(data) ans = 1 100 >> data(trainix) ans = Columns 1 through 10 -1.6656 0.1253 0.2877 -1.1465 1.1909 1.1892 -0.0376 0.3273 0.1746 -0.1867 Columns 11 through 20 0.7258 -0.5883 2.1832 -0.1364 0.1139 1.0668 0.0593 -0.0956 -0.8323 0.2944 Columns 21 through 30 -1.3362 0.7143 1.6236 -0.6918 0.8580 1.2540 -1.5937 -1.4410 0.5711 -0.3999 Columns 31 through 40 0.6900 0.8156 0.7119 1.2902 0.6686 1.1908 -1.2025 -0.0198 -0.1567 -1.6041 Columns 41 through 50 0.2573 -1.0565 1.4151 -0.8051 0.5287 0.2193 -0.9219 -2.1707 -0.0592 -1.0106 Columns 51 through 60 0.6145 0.5077 1.6924 0.5913 -0.6436 0.3803 -1.0091 -0.0195 -0.0482 0.0000 Columns 61 through 70 -0.3179 1.0950 -1.8740 0.4282 0.8956 0.7310 0.5779 0.0403 0.6771 0.5689 Columns 71 through 80 -0.2556 -0.3775 -0.2959 -1.4751 -0.2340 0.1184 0.3148 1.4435 -0.3510 0.6232 Columns 81 through 90 0.7990 0.9409 -0.9921 0.2120 0.2379 -1.0078 -0.7420 1.0823 -0.1315 0.3899 Columns 91 through 99 0.0880 -0.6355 -0.5596 0.4437 -0.9499 0.7812 0.5690 -0.8217 -0.2656 >> data(testix) ans = -0.4326 >> k = 8; >> >> >> allix = randperm(n); % all data indices, randomly ordered >> numineach = ceil(n/k); % at least one part must have this many data points >> % add NaNs and reshape data indices into a 2D matrix. >> % this process ensures that the numbers of data points in each part >> % are as evenly balanced as possible. >> allix = reshape([allix NaN(1,k*numineach-n)],k,numineach); >> >> >> allix allix = 46 76 45 75 73 74 70 33 6 98 20 19 28 17 8 66 13 3 87 26 5 97 79 37 11 89 58 72 15 31 49 68 80 35 16 2 54 56 43 25 77 88 99 83 93 18 71 67 59 92 30 42 52 21 47 22 12 14 62 95 36 64 23 9 NaN 1 50 10 91 29 90 94 82 100 7 63 51 NaN 38 84 61 34 24 69 57 55 40 60 86 96 NaN 65 39 4 27 81 41 48 85 44 53 78 32 NaN >> union(allix(:),[]) ans = 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 NaN NaN NaN NaN >> allix allix = 46 76 45 75 73 74 70 33 6 98 20 19 28 17 8 66 13 3 87 26 5 97 79 37 11 89 58 72 15 31 49 68 80 35 16 2 54 56 43 25 77 88 99 83 93 18 71 67 59 92 30 42 52 21 47 22 12 14 62 95 36 64 23 9 NaN 1 50 10 91 29 90 94 82 100 7 63 51 NaN 38 84 61 34 24 69 57 55 40 60 86 96 NaN 65 39 4 27 81 41 48 85 44 53 78 32 NaN >> allix = randperm(n); % all data indices, randomly ordered >> allix allix = Columns 1 through 17 32 12 94 55 46 24 69 48 85 100 84 35 44 64 21 53 37 Columns 18 through 34 92 22 28 93 65 19 78 72 77 82 96 6 57 23 87 75 62 Columns 35 through 51 34 66 8 14 99 76 50 83 54 31 13 80 38 5 18 43 3 Columns 52 through 68 89 88 95 49 42 67 81 16 26 52 1 40 45 36 61 25 4 Columns 69 through 85 86 27 90 2 17 30 58 60 7 47 20 11 79 74 56 91 98 Columns 86 through 100 73 33 9 51 41 15 68 10 59 63 71 29 97 39 70 >> p=1 p = 1 >> testix = allix(p,:); % indices to use for testing >> testix testix = Columns 1 through 17 32 12 94 55 46 24 69 48 85 100 84 35 44 64 21 53 37 Columns 18 through 34 92 22 28 93 65 19 78 72 77 82 96 6 57 23 87 75 62 Columns 35 through 51 34 66 8 14 99 76 50 83 54 31 13 80 38 5 18 43 3 Columns 52 through 68 89 88 95 49 42 67 81 16 26 52 1 40 45 36 61 25 4 Columns 69 through 85 86 27 90 2 17 30 58 60 7 47 20 11 79 74 56 91 98 Columns 86 through 100 73 33 9 51 41 15 68 10 59 63 71 29 97 39 70 >> >> testix testix = Columns 1 through 17 32 12 94 55 46 24 69 48 85 100 84 35 44 64 21 53 37 Columns 18 through 34 92 22 28 93 65 19 78 72 77 82 96 6 57 23 87 75 62 Columns 35 through 51 34 66 8 14 99 76 50 83 54 31 13 80 38 5 18 43 3 Columns 52 through 68 89 88 95 49 42 67 81 16 26 52 1 40 45 36 61 25 4 Columns 69 through 85 86 27 90 2 17 30 58 60 7 47 20 11 79 74 56 91 98 Columns 86 through 100 73 33 9 51 41 15 68 10 59 63 71 29 97 39 70 >> >> >> p=7 p = 7 >> testix = allix(p,:); % indices to use for testing ??? Attempted to access allix(7,:); index out of bounds because size(allix)=[1,100]. >> k = 8; % how many parts to use >> allix = randperm(n); % all data indices, randomly ordered >> numineach = ceil(n/k); % at least one part must have this many data points >> % add NaNs and reshape data indices into a 2D matrix. >> % this process ensures that the numbers of data points in each part >> % are as evenly balanced as possible. >> allix = reshape([allix NaN(1,k*numineach-n)],k,numineach); >> >> >> >> >> p=7 p = 7 >> testix = allix(p,:); % indices to use for testing >> >> testix testix = 75 26 79 46 91 5 4 100 19 3 93 47 NaN >> data(testix) ??? Subscript indices must either be real positive integers or logicals. >> testix(isnan(testix)) = []; % remove NaNs if necessary >> >> >> testix testix = 75 26 79 46 91 5 4 100 19 3 93 47 >> trainix = setdiff(1:n,testix); % indices to use for training >> >> >> data(trainix) ans = Columns 1 through 10 -0.4326 -1.6656 1.1909 1.1892 -0.0376 0.3273 0.1746 -0.1867 0.7258 -0.5883 Columns 11 through 20 2.1832 -0.1364 0.1139 1.0668 0.0593 -0.8323 0.2944 -1.3362 0.7143 1.6236 Columns 21 through 30 -0.6918 1.2540 -1.5937 -1.4410 0.5711 -0.3999 0.6900 0.8156 0.7119 1.2902 Columns 31 through 40 0.6686 1.1908 -1.2025 -0.0198 -0.1567 -1.6041 0.2573 -1.0565 1.4151 -0.8051 Columns 41 through 50 -0.9219 -2.1707 -0.0592 -1.0106 0.6145 0.5077 1.6924 0.5913 -0.6436 0.3803 Columns 51 through 60 -1.0091 -0.0195 -0.0482 0.0000 -0.3179 1.0950 -1.8740 0.4282 0.8956 0.7310 Columns 61 through 70 0.5779 0.0403 0.6771 0.5689 -0.2556 -0.3775 -0.2959 -0.2340 0.1184 0.3148 Columns 71 through 80 -0.3510 0.6232 0.7990 0.9409 -0.9921 0.2120 0.2379 -1.0078 -0.7420 1.0823 Columns 81 through 88 -0.1315 0.0880 -0.5596 0.4437 -0.9499 0.7812 0.5690 -0.8217 >> data(testix) ans = Columns 1 through 10 -1.4751 0.8580 1.4435 0.5287 0.3899 -1.1465 0.2877 -0.2656 -0.0956 0.1253 Columns 11 through 12 -0.6355 0.2193 >> % generate some data >> n = 100; >> x = randn(1,n); >> y = 10*x + 2 + 4*randn(1,n); >> >> >> >> figure;scatter(x,y) >> numboots = 10000; % number of bootstraps to perform >> xvals = -3:.5:3; % x-values to evaluate the model at >> xvals xvals = Columns 1 through 10 -3.0000 -2.5000 -2.0000 -1.5000 -1.0000 -0.5000 0 0.5000 1.0000 1.5000 Columns 11 through 13 2.0000 2.5000 3.0000 >> >> >> modelfit = zeros(numboots,length(xvals)); >> params = zeros(numboots,2); >> >> >> p=1 p = 1 >> ix = ceil(n*rand(1,n)); >> ix ix = Columns 1 through 17 54 38 48 70 15 59 96 85 94 34 49 15 22 50 81 48 39 Columns 18 through 34 29 97 6 68 100 18 100 36 53 10 73 73 61 40 16 51 35 Columns 35 through 51 91 52 83 20 98 66 69 89 16 51 86 16 20 25 48 85 85 Columns 52 through 68 40 45 18 41 63 97 64 87 14 65 22 59 67 90 2 42 16 Columns 69 through 85 94 85 46 82 37 51 52 89 47 71 75 8 19 73 85 55 6 Columns 86 through 100 70 42 12 100 43 74 71 22 90 46 82 22 37 10 65 >> X = [x(ix)' ones(n,1)]; >> size(X) ans = 100 2 >> h = inv(X'*X)*X'*y(ix)'; >> size(h) ans = 2 1 >> h h = 9.6280 1.5065 >> params(boot,:) = h; ??? Undefined function or variable 'boot'. >> boot=1 boot = 1 >> params(boot,:) = h; >> size(xvals) ans = 1 13 >> >> >> % perform the bootstraps >> modelfit = zeros(numboots,length(xvals)); >> params = zeros(numboots,2); >> for boot=1:numboots % prepare data indices ix = ceil(n*rand(1,n)); % construct regressor matrix X = [x(ix)' ones(n,1)]; % estimate parameters h = inv(X'*X)*X'*y(ix)'; % evaluate the model modelfit(boot,:) = xvals*h(1) + h(2); % record the parameters params(boot,:) = h; end >> >> >> >> >> figure; >> hold on; >> h1 = scatter(x,y,'k.'); >> size(modelfit) ans = 10000 13 >> modelfitP = prctile(modelfit,[2.5 50 97.5],1); >> size(modelfitP) ans = 3 13 >> h2 = patch([xvals fliplr(xvals)],[modelfitP(1,:) fliplr(modelfitP(3,:))],[1 .7 .7]); >> set(h2,'EdgeColor','none'); >> h3 = plot(xvals,modelfitP(2,:),'r-','LineWidth',2); >> uistack(h1,'top'); % make sure data points are visible by bringing them to the top >> xlabel('x'); >> ylabel('y'); >> >> >> size(params) ans = 10000 2 >> paramsP = prctile(params,[2.5 97.5],1); >> paramsP paramsP = 9.1490 0.9187 10.8947 2.4708 >> title(sprintf('y=ax+b; 95%% confidence intervals: a=[%.1f %.1f], b=[%.1f %.1f]', ... paramsP(1,1),paramsP(2,1),paramsP(1,2),paramsP(2,2))); >> >> predictions = zeros(1,n); % this will hold the prediction for each data point >> >> p=1 p = 1 >> trainix = setdiff(1:n,p); >> testix = p; >> n n = 100 >> X = [x(trainix)' ones(length(trainix),1)]; % construct regressor matrix >> h = inv(X'*X)*X'*y(trainix)'; % estimate parameters >> h h = 10.0496 1.6643 >> p p = 1 >> predictions(p) = [x(testix)' ones(length(testix),1)]*h; >> predictions predictions = Columns 1 through 10 -10.2724 0 0 0 0 0 0 0 0 0 Columns 11 through 20 0 0 0 0 0 0 0 0 0 0 Columns 21 through 30 0 0 0 0 0 0 0 0 0 0 Columns 31 through 40 0 0 0 0 0 0 0 0 0 0 Columns 41 through 50 0 0 0 0 0 0 0 0 0 0 Columns 51 through 60 0 0 0 0 0 0 0 0 0 0 Columns 61 through 70 0 0 0 0 0 0 0 0 0 0 Columns 71 through 80 0 0 0 0 0 0 0 0 0 0 Columns 81 through 90 0 0 0 0 0 0 0 0 0 0 Columns 91 through 100 0 0 0 0 0 0 0 0 0 0 >> >> >> % perfom leave-one-out cross-validation >> predictions = zeros(1,n); % this will hold the prediction for each data point >> for p=1:n % figure out indices trainix = setdiff(1:n,p); testix = p; % train the model X = [x(trainix)' ones(length(trainix),1)]; % construct regressor matrix h = inv(X'*X)*X'*y(trainix)'; % estimate parameters % test the model by computing the prediction for the left-out data point predictions(p) = [x(testix)' ones(length(testix),1)]*h; end >> >> >> >> predictions predictions = Columns 1 through 10 -10.2724 -20.2368 11.5051 -3.4561 4.9594 4.1226 1.9213 -8.4061 -7.7787 -2.1174 Columns 11 through 20 -10.1357 -8.7040 16.6230 2.1999 -10.4985 1.2577 -9.7000 -11.5975 -0.9022 11.2974 Columns 21 through 30 3.0183 8.2706 -10.1517 -2.9253 -0.9929 -10.4010 -11.5337 11.0804 1.7743 -4.7625 Columns 31 through 40 9.7130 3.9742 -8.2228 14.9673 4.4728 16.4021 13.0786 -5.2164 -11.2631 1.0006 Columns 41 through 50 -1.6454 -6.7641 6.6857 16.3733 -3.8778 -6.7621 -0.7852 8.3798 -6.9145 -10.1256 Columns 51 through 60 0.4456 1.0232 6.5296 -4.3397 0.1983 -2.6622 0.9430 17.3367 -4.3525 -11.8439 Columns 61 through 70 6.4162 -7.2737 2.0006 -4.5623 7.0469 7.2213 -0.3437 -18.9893 3.0248 17.9309 Columns 71 through 80 11.9852 -14.1833 0.9492 -5.1226 -8.5040 -10.5652 4.6271 -2.5956 2.2374 -1.9932 Columns 81 through 90 -3.0195 5.4129 9.0899 22.8182 -11.9204 -8.5825 12.0227 -2.2736 -12.0901 4.8324 Columns 91 through 100 17.2335 8.8150 20.7358 6.7525 20.6721 -1.7924 -9.7609 -0.3827 13.5239 -9.6053 >> R2 = 100 * (1 - sum((y-predictions).^2) / sum((y-mean(y)).^2)); >> fprintf('Cross-validated R^2 = %.2f%%\n',R2); Cross-validated R^2 = 85.29% >>