summaryrefslogtreecommitdiffstats
path: root/SD-VBS/benchmarks/texture_synthesis/src/matlabPyrTools/TUTORIALS/pyramids.m
diff options
context:
space:
mode:
Diffstat (limited to 'SD-VBS/benchmarks/texture_synthesis/src/matlabPyrTools/TUTORIALS/pyramids.m')
-rwxr-xr-xSD-VBS/benchmarks/texture_synthesis/src/matlabPyrTools/TUTORIALS/pyramids.m903
1 files changed, 0 insertions, 903 deletions
diff --git a/SD-VBS/benchmarks/texture_synthesis/src/matlabPyrTools/TUTORIALS/pyramids.m b/SD-VBS/benchmarks/texture_synthesis/src/matlabPyrTools/TUTORIALS/pyramids.m
deleted file mode 100755
index 2123c69..0000000
--- a/SD-VBS/benchmarks/texture_synthesis/src/matlabPyrTools/TUTORIALS/pyramids.m
+++ /dev/null
@@ -1,903 +0,0 @@
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%%% IMAGE PYRAMID TUTORIAL
3%%%
4%%% A brief introduction to multi-scale pyramid decompositions for image
5%%% processing. You should go through this, reading the comments, and
6%%% executing the corresponding MatLab instructions. This file assumes
7%%% a basic familiarity with matrix algebra, with linear systems and Fourier
8%%% theory, and with MatLab. If you don't understand a particular
9%%% function call, execute "help <functionName>" to see documentation.
10%%%
11%%% EPS, 6/96.
12%%% Based on the original OBVIUS tutorial.
13%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
14
15%% Determine a subsampling factor for images, based on machine speed:
16oim = pgmRead('einstein.pgm');
17tic; corrDn(oim,[1 1; 1 1]/4,'reflect1',[2 2]); time = toc;
18imSubSample = min(max(floor(log2(time)/2+3),0),2);
19im = blurDn(oim, imSubSample,'qmf9');
20clear oim;
21clf; showIm(im, 'auto2', 'auto', 'im');
22
23%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
24
25%%% LAPLACIAN PYRAMIDS:
26
27%% Images may be decomposed into information at different scales.
28%% Blurring eliminates the fine scales (detail):
29
30binom5 = binomialFilter(5);
31lo_filt = binom5*binom5';
32blurred = rconv2(im,lo_filt);
33subplot(1,2,1); showIm(im, 'auto2', 'auto', 'im');
34subplot(1,2,2); showIm(blurred, 'auto2', 'auto', 'blurred');
35
36%% Subtracting the blurred image from the original leaves ONLY the
37%% fine scale detail:
38fine0 = im - blurred;
39subplot(1,2,1); showIm(fine0, 'auto2', 'auto', 'fine0');
40
41%% The blurred and fine images contain all the information found in
42%% the original image. Trivially, adding the blurred image to the
43%% fine scale detail will reconstruct the original. We can compare
44%% the original image to the sum of blurred and fine using the
45%% "imStats" function, which reports on the statistics of the
46%% difference between it's arguments:
47imStats(im, blurred+fine0);
48
49%% Since the filter is a lowpass filter, we might want to subsample
50%% the blurred image. This may cause some aliasing (depends on the
51%% filter), but the decomposition structure given above will still be
52%% possible. The corrDn function correlates (same as convolution, but
53%% flipped filter) and downsamples in a single operation (for
54%% efficiency). The string 'reflect1' tells the function to handle
55%% boundaries by reflecting the image about the edge pixels. Notice
56%% that the blurred1 image is half the size (in each dimension) of the
57%% original image.
58lo_filt = 2*binom5*binom5'; %construct a separable 2D filter
59blurred1 = corrDn(im,lo_filt,'reflect1',[2 2]);
60subplot(1,2,2); showIm(blurred1,'auto2','auto','blurred1');
61
62%% Now, to extract fine scale detail, we must interpolate the image
63%% back up to full size before subtracting it from the original. The
64%% upConv function does upsampling (padding with zeros between
65%% samples) followed by convolution. This can be done using the
66%% lowpass filter that was applied before subsampling or it can be
67%% done with a different filter.
68fine1 = im - upConv(blurred1,lo_filt,'reflect1',[2 2],[1 1],size(im));
69subplot(1,2,1); showIm(fine1,'auto2','auto','fine1');
70
71%% We now have a technique that takes an image, computes two new
72%% images (blurred1 and fine1) containing the coarse scale information
73%% and the fine scale information. We can also (trivially)
74%% reconstruct the original from these two (even if the subsampling of
75%% the blurred1 image caused aliasing):
76
77recon = fine1 + upConv(blurred1,lo_filt,'reflect1',[2 2],[1 1],size(im));
78imStats(im, recon);
79
80%% Thus, we have described an INVERTIBLE linear transform that maps an
81%% input image to the two images blurred1 and fine1. The inverse
82%% transformation maps blurred1 and fine1 to the result. This is
83%% depicted graphically with a system diagram:
84%%
85%% IM --> blur/down2 ---------> BLURRED1 --> up2/blur --> add --> RECON
86%% | | ^
87%% | | |
88%% | V |
89%% | up2/blur |
90%% | | |
91%% | | |
92%% | V |
93%% --------------> subtract --> FINE1 -------------------
94%%
95%% Note that the number of samples in the representation (i.e., total
96%% samples in BLURRED1 and FINE1) is 1.5 times the number of samples
97%% in the original IM. Thus, this representation is OVERCOMPLETE.
98
99%% Often, we will want further subdivisions of scale. We can
100%% decompose the (coarse-scale) BLURRED1 image into medium coarse and
101%% very coarse images by applying the same splitting technique:
102blurred2 = corrDn(blurred1,lo_filt,'reflect1',[2 2]);
103showIm(blurred2)
104
105fine2 = blurred1 - upConv(blurred2,lo_filt,'reflect1',[2 2],[1 1],size(blurred1));
106showIm(fine2)
107
108%% Since blurred2 and fine2 can be used to reconstruct blurred1, and
109%% blurred1 and fine1 can be used to reconstruct the original image,
110%% the set of THREE images (also known as "subbands") {blurred2,
111%% fine2, fine1} constitute a complete representation of the original
112%% image. Note that the three subbands are displayed at the same size,
113%% but they are actually three different sizes.
114
115subplot(1,3,1); showIm(fine1,'auto2',2^(imSubSample-1),'fine1');
116subplot(1,3,2); showIm(fine2,'auto2',2^(imSubSample),'fine2');
117subplot(1,3,3); showIm(blurred2,'auto2',2^(imSubSample+1),'blurred2');
118
119%% It is useful to consider exactly what information is stored in each
120%% of the pyramid subbands. The reconstruction process involves
121%% recursively interpolating these images and then adding them to the
122%% image at the next finer scale. To see the contribution of ONE of
123%% the representation images (say blurred2) to the reconstruction, we
124%% imagine filling all the other subbands with zeros and then
125%% following our reconstruction procedure. For the blurred2 subband,
126%% this is equivalent to simply calling upConv twice:
127blurred2_full = upConv(upConv(blurred2,lo_filt,'reflect1',[2 2],[1 1],size(blurred1)),...
128 lo_filt,'reflect1',[2 2],[1 1],size(im));
129subplot(1,3,3); showIm(blurred2_full,'auto2',2^(imSubSample-1),'blurred2-full');
130
131%% For the fine2 subband, this is equivalent to calling upConv once:
132fine2_full = upConv(fine2,lo_filt,'reflect1',[2 2],[1 1],size(im));
133subplot(1,3,2); showIm(fine2_full,'auto2',2^(imSubSample-1),'fine2-full');
134
135%% If we did everything correctly, we should be able to add together
136%% these three full-size images to reconstruct the original image:
137recon = blurred2_full + fine2_full + fine1;
138imStats(im, recon)
139
140%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141
142%%% FUNCTIONS for CONSTRUCTING/MANIPULATING LAPLACIAN PYRAMIDS
143
144%% We can continue this process, recursively splitting off finer and
145%% finer details from the blurred image (like peeling off the outer
146%% layers of an onion). The resulting data structure is known as a
147%% "Laplacian Pyramid". To make things easier, we have written a
148%% MatLab function called buildLpyr to construct this object. The
149%% function returns two items: a long vector containing the subbands
150%% of the pyramid, and an index matrix that is used to access these
151%% subbands. The display routine showLpyr shows all the subbands of the
152%% pyramid, at the their correct relative sizes. It should now be
153%% clearer why these data structures are called "pyramids".
154[pyr,pind] = buildLpyr(im,5-imSubSample);
155showLpyr(pyr,pind);
156
157%% There are also "accessor" functions for pulling out a single subband:
158showIm(pyrBand(pyr,pind,2));
159
160%% The reconLpyr function allows you to reconstruct from a laplacian pyramid.
161%% The third (optional) arg allows you to select any subset of pyramid bands
162%% (default is to use ALL of them).
163clf; showIm(reconLpyr(pyr,pind,[1 3]),'auto2','auto','bands 1 and 3 only');
164
165fullres = reconLpyr(pyr,pind);
166showIm(fullres,'auto2','auto','Full reconstruction');
167imStats(im,fullres);
168
169%% buildLpyr uses 5-tap filters by default for building Laplacian
170%% pyramids. You can specify other filters:
171namedFilter('binom3')
172[pyr3,pind3] = buildLpyr(im,5-imSubSample,'binom3');
173showLpyr(pyr3,pind3);
174fullres3 = reconLpyr(pyr3,pind3,'all','binom3');
175imStats(im,fullres3);
176
177%% Here we build a "Laplacian" pyramid using random filters. filt1 is
178%% used with the downsampling operations and filt2 is used with the
179%% upsampling operations. We normalize the filters for display
180%% purposes. Of course, these filters are (almost certainly) not very
181%% "Gaussian", and the subbands of such a pyramid will be garbage!
182%% Nevertheless, it is a simple property of the Laplacian pyramid that
183%% we can use ANY filters and we will still be able to reconstruct
184%% perfectly.
185
186filt1 = rand(1,5); filt1 = sqrt(2)*filt1/sum(filt1)
187filt2 = rand(1,3); filt2 = sqrt(2)*filt2/sum(filt2)
188[pyrr,pindr] = buildLpyr(im,5-imSubSample,filt1,filt2);
189showLpyr(pyrr,pindr);
190fullresr = reconLpyr(pyrr,pindr,'all',filt2);
191imStats(im,fullresr);
192
193%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
194
195%%% ALIASING in the Gaussian and Laplacian pyramids:
196
197%% Unless one is careful, the subsampling operations will introduce aliasing
198%% artifacts in these pyramid transforms. This is true even though the
199%% Laplacian pyramid can be used to reconstruct the original image perfectly.
200%% When reconstructing, the pyramid is designed in such a way that these
201%% aliasing artifacts cancel out. So it's not a problem if the only thing we
202%% want to do is reconstruct. However, it can be a serious problem if we
203%% intend to process each of the subbands independently.
204
205%% One way to see the consequences of the aliasing artifacts is by
206%% examining variations that occur when the input is shifted. We
207%% choose an image and shift it by some number of pixels. Then blur
208%% (filter-downsample-upsample-filter) the original image and blur the
209%% shifted image. If there's no aliasing, then the blur and shift
210%% operations should commute (i.e.,
211%% shift-filter-downsample-upsample-filter is the same as
212%% filter-downsample-upsample-filter-shift). Try this for 2 different
213%% filters (by replacing 'binom3' with 'binom5' or 'binom7' below),
214%% and you'll see that the aliasing is much worse for the 3 tap
215%% filter.
216
217sig = 100*randn([1 16]);
218sh = [0 7]; %shift amount
219lev = 2; % level of pyramid to look at
220flt = 'binom3'; %filter to use:
221
222shiftIm = shift(sig,sh);
223[pyr,pind] = buildLpyr(shiftIm, lev, flt, flt, 'circular');
224shiftBlur = reconLpyr(pyr, pind, lev, flt, 'circular');
225
226[pyr,pind] = buildLpyr(sig, lev, flt, flt, 'circular');
227res = reconLpyr(pyr, pind, lev, flt, 'circular');
228blurShift = shift(res,sh);
229
230subplot(2,1,1); r = showIm(shiftBlur,'auto2','auto','shiftBlur');
231subplot(2,1,2); showIm(blurShift,r,'auto','blurShift');
232imStats(blurShift,shiftBlur);
233
234%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
235
236%%% PROJECTION and BASIS functions:
237
238%% An invertible, linear transform can be characterized in terms
239%% of a set of PROJECTION and BASIS functions. In matlab matrix
240%% notation:
241%
242%% c = P' * x
243%% x = B * c
244%
245%% where x is an input, c are the transform coefficients, P and B
246%% are matrices. The columns of P are the projection functions (the
247%% input is projected onto the the columns of P to get each successive
248%% transform coefficient). The columns of B are the basis
249%% functions (x is a linear combination of the columns of B).
250
251%% Since the Laplacian pyramid is a linear transform, we can ask: what
252%% are its BASIS functions? We consider these in one dimension for
253%% simplicity. The BASIS function corresponding to a given
254%% coefficient tells us how much that coefficient contributes to each
255%% pixel in the reconstructed image. We can construct a single basis
256%% function by setting one sample of one subband equal to 1.0 (and all
257%% others to zero) and reconstructing. To build the entire matrix, we
258%% have to do this for every sample of every subband:
259sz = min(round(48/(sqrt(2)^imSubSample)),36);
260sig = zeros(sz,1);
261[pyr,pind] = buildLpyr(sig);
262basis = zeros(sz,size(pyr,1));
263for n=1:size(pyr,1)
264 pyr = zeros(size(pyr));
265 pyr(n) = 1;
266 basis(:,n) = reconLpyr(pyr,pind);
267end
268clf; showIm(basis)
269
270%% The columns of the basis matrix are the basis functions. The
271%% matrix is short and fat, corresponding to the fact that the
272%% representation is OVERCOMPLETE. Below, we plot the middle one from
273%% each subband, starting with the finest scale. Note that all of
274%% these basis functions are lowpass (Gaussian-like) functions.
275locations = round(sz * (2 - 3./2.^[1:max(4,size(pind,1))]))+1;
276for lev=1:size(locations,2)
277 subplot(2,2,lev);
278 showIm(basis(:,locations(lev)));
279 axis([0 sz 0 1.1]);
280end
281
282%% Now, we'd also like see the inverse (we'll them PROJECTION)
283%% functions. We need to ask how much of each sample of the input
284%% image contributes to a given pyramid coefficient. Thus, the matrix
285%% is constructed by building pyramids on the set of images with
286%% impulses at each possible location. The rows of this matrix are
287%% the projection functions.
288projection = zeros(size(pyr,1),sz);
289for pos=1:sz
290 [pyr,pind] = buildLpyr(mkImpulse([1 sz], [1 pos]));
291 projection(:,pos) = pyr;
292end
293clf; showIm(projection);
294
295%% Building a pyramid corresponds to multiplication by the projection
296%% matrix. Reconstructing from this pyramid corresponds to
297%% multiplication by the basis matrix. Thus, the product of the two
298%% matrices (in this order) should be the identity matrix:
299showIm(basis*projection);
300
301%% We can plot a few example projection functions at different scales.
302%% Note that all of the projection functions are bandpass functions,
303%% except for the coarsest subband which is lowpass.
304for lev=1:size(locations,2)
305 subplot(2,2,lev);
306 showIm(projection(locations(lev),:));
307 axis([0 sz -0.3 0.8]);
308end
309
310%% Now consider the frequency response of these functions, plotted over the
311%% range [-pi,pi]:
312for lev=1:size(locations,2)
313 subplot(2,2,lev);
314 proj = projection(locations(lev),:);
315 plot(pi*[-32:31]/32,fftshift(abs(fft(proj',64))));
316 axis([-pi pi -0.1 3]);
317end
318
319%% The first projection function is highpass, and the second is bandpass. Both
320%% of these look something like the Laplacian (2nd derivative) of a Gaussian.
321%% The last is lowpass, as are the basis functions. Thus, the basic operation
322%% used to create each level of the pyramid involves a simple highpass/lowpass
323%% split.
324
325%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
326
327%%% QMF/WAVELET PYRAMIDS.
328
329%% Two things about Laplacian pyramids are a bit unsatisfactory.
330%% First, there are more pixels (coefficients) in the representation
331%% than in the original image. Specifically, the 1-dimensional
332%% transform is overcomplete by a factor of 4/3, and the 2-dimensional
333%% transform is overcomplete by a factor of 2. Secondly, the
334%% "bandpass" images (fineN) do not segregate information according to
335%% orientation.
336
337%% There are other varieties of pyramid. One type that arose in the
338%% speech coding community is based on a particular pairs of filters
339%% known as a "Quadrature Mirror Filters" or QMFs. These are closely
340%% related to Wavelets (essentially, they are approximate wavelet
341%% filters).
342
343%% Recall that the Laplacian pyramid is formed by simple hi/low
344%% splitting at each level. The lowpass band is subsampled by a
345%% factor of 2, but the highpass band is NOT subsampled. In the QMF
346%% pyramid, we apply two filters (hi- and lo- pass) and subsample BOTH
347%% by a factor of 2, thus eliminating the excess coefficients of the
348%% Laplacian pyramid.
349
350%% The two filters must have a specific relationship to each
351%% other. In particular, let n be an index for the filter samples.
352%% The highpass filter may be constructed from the lowpass filter by
353%% (1) modulating (multiplying) by (-1)^n (equivalent to shifting by
354%% pi in the Fourier domain), (2) flipping (i.e., reversing the order
355%% of the taps), (3) spatially shifting by one sample. Try to
356%% convince yourself that the resulting filters will always be
357%% orthogonal to each other (i.e., their inner products will be zero)
358%% when shifted by any multiple of two.
359
360%% The function modulateFlip performs the first two of these operations. The
361%% third (spatial shifting) step is built into the convolution code.
362flo = namedFilter('qmf9')';
363fhi = modulateFlip(flo)';
364subplot(2,1,1); lplot(flo); axis([0 10 -0.5 1.0]); title('lowpass');
365subplot(2,1,2); lplot(fhi); axis([0 10 -0.5 1.0]); title('highpass');
366
367%% In the Fourier domain, these filters are (approximately)
368%% "power-complementary": the sum of their squared power spectra is
369%% (approximately) a constant. But note that neither is a perfect
370%% bandlimiter (i.e., a sinc function), and thus subsampling by a
371%% factor of 2 will cause aliasing in each of the subbands. See below
372%% for a discussion of the effect of this aliasing.
373
374%% Plot the two frequency responses:
375freq = pi*[-32:31]/32;
376subplot(2,1,1);
377plot(freq,fftshift(abs(fft(flo,64))),'--',freq,fftshift(abs(fft(fhi,64))),'-');
378axis([-pi pi 0 1.5]); title('FFT magnitudes');
379subplot(2,1,2);
380plot(freq,fftshift(abs(fft(flo,64)).^2)+fftshift(abs(fft(fhi,64)).^2));
381axis([-pi pi 0 2.2]); title('Sum of squared magnitudes');
382
383%% We can split an input signal into two bands as follows:
384sig = mkFract([1,64],1.6);
385subplot(2,1,1); showIm(sig,'auto1','auto','sig');
386lo1 = corrDn(sig,flo,'reflect1',[1 2],[1 1]);
387hi1 = corrDn(sig,fhi,'reflect1',[1 2],[1 2]);
388subplot(2,1,2);
389showIm(lo1,'auto1','auto','low and high bands'); hold on; plot(hi1,'--r'); hold off;
390
391%% Notice that the two subbands are half the size of the original
392%% image, due to the subsampling by a factor of 2. One subtle point:
393%% the highpass and lowpass bands are subsampled on different
394%% lattices: the lowpass band retains the odd-numbered samples and the
395%% highpass band retains the even-numbered samples. This was the
396%% 1-sample shift relating the high and lowpass kernels (mentioned
397%% above). We've used the 'reflect1' to handle boundaries, which
398%% works properly for symmetric odd-length QMFs.
399
400%% We can reconstruct the original image by interpolating these two subbands
401%% USING THE SAME FILTERS:
402reconlo = upConv(lo1,flo,'reflect1',[1 2]);
403reconhi = upConv(hi1,fhi,'reflect1',[1 2],[1 2]);
404subplot(2,1,2); showIm(reconlo+reconhi,'auto1','auto','reconstructed');
405imStats(sig,reconlo+reconhi);
406
407%% We have described an INVERTIBLE linear transform that maps an input
408%% image to the two images lo1 and hi1. The inverse transformation
409%% maps these two images to the result. This is depicted graphically
410%% with a system diagram:
411%%
412%% IM ---> flo/down2 --> LO1 --> up2/flo --> add --> RECON
413%% | ^
414%% | |
415%% | |
416%% -> fhi/down2 --> HI1 --> up2/fhi -----
417%%
418%% Note that the number of samples in the representation (i.e., total
419%% samples in LO1 and HI1) is equal to the number of samples in the
420%% original IM. Thus, this representation is exactly COMPLETE, or
421%% "critically sampled".
422
423%% So we've fixed one of the problems that we had with Laplacian
424%% pyramid. But the system diagram above places strong constraints on
425%% the filters. In particular, for these filters the reconstruction
426%% is no longer perfect. Turns out there are NO
427%% perfect-reconstruction symmetric filters that are
428%% power-complementary, except for the trivial case [1] and the
429%% nearly-trivial case [1 1]/sqrt(2).
430
431%% Let's consider the projection functions of this 2-band splitting
432%% operation. We can construct these by applying the transform to
433%% impulse input signals, for all possible impulse locations. The
434%% rows of the following matrix are the projection functions for each
435%% coefficient in the transform.
436M = [corrDn(eye(32),flo','circular',[1 2]), ...
437 corrDn(eye(32),fhi','circular',[1 2],[1 2])]';
438clf; showIm(M,'auto1','auto','M');
439
440%% The transform matrix is composed of two sub-matrices. The top half
441%% contains the lowpass kernel, shifted by increments of 2 samples.
442%% The bottom half contains the highpass. Now we compute the inverse
443%% of this matrix:
444M_inv = inv(M);
445showIm(M_inv,'auto1','auto','M_inv');
446
447%% The inverse is (very close to) the transpose of the original
448%% matrix! In other words, the transform is orthonormal.
449imStats(M_inv',M);
450
451%% This also points out a nice relationship between the corrDn and
452%% upConv functions, and the matrix representation. corrDn is
453%% equivalent to multiplication by a matrix with copies of the filter
454%% on the ROWS, translated in multiples of the downsampling factor.
455%% upConv is equivalent to multiplication by a matrix with copies of
456%% the filter on the COLUMNS, translated by the upsampling factor.
457
458%% As in the Laplacian pyramid, we can recursively apply this QMF
459%% band-splitting operation to the lowpass band:
460lo2 = corrDn(lo1,flo,'reflect1',[1 2]);
461hi2 = corrDn(lo1,fhi,'reflect1',[1 2],[1 2]);
462
463%% The representation of the original signal is now comprised of the
464%% three subbands {hi1, hi2, lo2} (we don't hold onto lo1, because it
465%% can be reconstructed from lo2 and hi2). Note that hi1 is at 1/2
466%% resolution, and hi2 and lo2 are at 1/4 resolution: The total number
467%% of samples in these three subbands is thus equal to the number of
468%% samples in the original signal.
469imnames=['hi1'; 'hi2'; 'lo2'];
470for bnum=1:3
471 band = eval(imnames(bnum,:));
472 subplot(3,1,bnum); showIm(band); ylabel(imnames(bnum,:));
473 axis([1 size(band,2) 1.1*min(lo2) 1.1*max(lo2)]);
474end
475
476%% Reconstruction proceeds as with the Laplacian pyramid: combine lo2 and hi2
477%% to reconstruct lo1, which is then combined with hi1 to reconstruct the
478%% original signal:
479recon_lo1 = upConv(hi2,fhi,'reflect1',[1 2],[1 2]) + ...
480 upConv(lo2,flo,'reflect1',[1 2],[1 1]);
481reconstructed = upConv(hi1,fhi,'reflect1',[1 2],[1 2]) + ...
482 upConv(recon_lo1,flo,'reflect1',[1 2],[1 1]);
483imStats(sig,reconstructed);
484
485%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
486
487%%% FUNCTIONS for CONSTRUCTING/MANIPULATING QMF/Wavelet PYRAMIDS
488
489%% To make things easier, we have bundled these qmf operations and
490%% data structures into an object in MATLAB.
491
492sig = mkFract([1 64], 1.5);
493[pyr,pind] = buildWpyr(sig);
494showWpyr(pyr,pind);
495
496nbands = size(pind,1);
497for b = 1:nbands
498 subplot(nbands,1,b); lplot(pyrBand(pyr,pind,b));
499end
500
501res = reconWpyr(pyr,pind);
502imStats(sig,res);
503
504%% Now for 2D, we use separable filters. There are 4 ways to apply the two
505%% filters to the input image (followed by the relavent subsampling operation):
506%% (1) lowpass in both x and y
507%% (2) lowpass in x and highpass in y
508%% (3) lowpass in y and highpass in x
509%% (4) highpass in both x and y.
510%% The pyramid is built by recursively subdividing the first of these bands
511%% into four new subbands.
512
513%% First, we'll take a look at some of the basis functions.
514sz = 40;
515zim = zeros(sz);
516flo = 'qmf9'; edges = 'reflect1';
517[pyr,pind] = buildWpyr(zim);
518
519% Put an impulse into the middle of each band:
520for lev=1:size(pind,1)
521 mid = sum(prod(pind(1:lev-1,:)'));
522 mid = mid + floor(pind(lev,2)/2)*pind(lev,1) + floor(pind(lev,1)/2) + 1;
523 pyr(mid,1) = 1;
524end
525
526% And take a look at the reconstruction of each band:
527for lnum=1:wpyrHt(pind)+1
528 for bnum=1:3
529 subplot(wpyrHt(pind)+1,3,(wpyrHt(pind)+1-lnum)*3+bnum);
530 showIm(reconWpyr(pyr, pind, flo, edges, lnum, bnum),'auto1',2,0);
531 end
532end
533
534%% Note that the first column contains horizontally oriented basis functions at
535%% different scales. The second contains vertically oriented basis functions.
536%% The third contains both diagonals (a checkerboard pattern). The bottom row
537%% shows (3 identical images of) a lowpass basis function.
538
539%% Now look at the corresponding Fourier transform magnitudes (these
540%% are plotted over the frequency range [-pi, pi] ):
541nextFig(2,1);
542freq = 2 * pi * [-sz/2:(sz/2-1)]/sz;
543for lnum=1:wpyrHt(pind)+1
544 for bnum=1:3
545 subplot(wpyrHt(pind)+1,3,(wpyrHt(pind)+1-lnum)*3+bnum);
546 basisFn = reconWpyr(pyr, pind, flo, edges, lnum, bnum);
547 basisFmag = fftshift(abs(fft2(basisFn,sz,sz)));
548 imagesc(freq,freq,basisFmag);
549 axis('square'); axis('xy'); colormap('gray');
550 end
551end
552nextFig(2,-1);
553
554%% The filters at a given scale sum to a squarish annular region:
555sumSpectra = zeros(sz);
556lnum = 2;
557for bnum=1:3
558 basisFn = reconWpyr(pyr, pind, flo, edges, lnum, bnum);
559 basisFmag = fftshift(abs(fft2(basisFn,sz,sz)));
560 sumSpectra = basisFmag.^2 + sumSpectra;
561end
562clf; imagesc(freq,freq,sumSpectra); axis('square'); axis('xy'); title('one scale');
563
564%% Now decompose an image:
565[pyr,pind] = buildWpyr(im);
566
567%% View all of the subbands (except lowpass), scaled to be the same size
568%% (requires a big figure window):
569nlevs = wpyrHt(pind);
570for lnum=1:nlevs
571 for bnum=1:3
572 subplot(nlevs,3,(lnum-1)*3+bnum);
573 showIm(wpyrBand(pyr,pind,lnum,bnum), 'auto2', 2^(lnum+imSubSample-2));
574 end
575end
576
577%% In addition to the bands shown above, there's a lowpass residual:
578nextFig(2,1);
579clf; showIm(pyrLow(pyr,pind));
580nextFig(2,-1);
581
582% Alternatively, display the pyramid with the subbands shown at their
583% correct relative sizes:
584clf; showWpyr(pyr, pind);
585
586%% The reconWpyr function can be used to reconstruct the entire pyramid:
587reconstructed = reconWpyr(pyr,pind);
588imStats(im,reconstructed);
589
590%% As with Laplacian pyramids, you can specify sub-levels and subbands
591%% to be included in the reconstruction. For example:
592clf
593showIm(reconWpyr(pyr,pind,'qmf9','reflect1',[1:wpyrHt(pind)],[1])); %Horizontal only
594showIm(reconWpyr(pyr,pind,'qmf9','reflect1',[2,3])); %two middle scales
595
596%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
597
598%%% PERFECT RECONSTRUCTION: HAAR AND DEBAUCHIES WAVELETS
599
600%% The symmetric QMF filters used above are not perfectly orthogonal.
601%% In fact, it's impossible to construct a symmetric filter of size
602%% greater than 2 that is perfectly orthogonal to shifted copies
603%% (shifted by multiples of 2) of itself. For example, consider a
604%% symmetric kernel of length 3. Shift by two and the right end of
605%% the original kernel is aligned with the left end of the shifted
606%% one. Thus, the inner product of these two will be the square of
607%% the end tap, which will be non-zero.
608
609%% However, one can easily create wavelet filters of length 2 that
610%% will do the job. This is the oldest known wavelet, known as the
611%% "Haar". The two kernels are [1,1]/sqrt(2) and [1,-1]/sqrt(2).
612%% These are trivially seen to be orthogonal to each other, and shifts
613%% by multiples of two are also trivially orthogonal. The projection
614%% functions of the Haar transform are in the rows of the following
615%% matrix, constructed by applying the transform to impulse input
616%% signals, for all possible impulse locations:
617
618haarLo = namedFilter('haar')
619haarHi = modulateFlip(haarLo)
620subplot(2,1,1); lplot(haarLo); axis([0 3 -1 1]); title('lowpass');
621subplot(2,1,2); lplot(haarHi); axis([0 3 -1 1]); title('highpass');
622
623M = [corrDn(eye(32), haarLo, 'reflect1', [2 1], [2 1]); ...
624 corrDn(eye(32), haarHi, 'reflect1', [2 1], [2 1])];
625clf; showIm(M)
626showIm(M*M') %identity!
627
628%% As before, the filters are power-complementary (although the
629%% frequency isolation is rather poor, and thus the subbands will be
630%% heavily aliased):
631plot(pi*[-32:31]/32,abs(fft(haarLo,64)).^2,'--',...
632 pi*[-32:31]/32,abs(fft(haarHi,64)).^2,'-');
633
634sig = mkFract([1,64],0.5);
635[pyr,pind] = buildWpyr(sig,4,'haar','reflect1');
636showWpyr(pyr,pind);
637
638%% check perfect reconstruction:
639res = reconWpyr(pyr,pind, 'haar', 'reflect1');
640imStats(sig,res)
641
642%% If you want perfect reconstruction, but don't like the Haar
643%% transform, there's another option: drop the symmetry requirement.
644%% Ingrid Daubechies developed one of the earliest sets of such
645%% perfect-reconstruction wavelets. The simplest of these is of
646%% length 4:
647
648daub_lo = namedFilter('daub2');
649daub_hi = modulateFlip(daub_lo);
650
651%% The daub_lo filter is constructed to be orthogonal to 2shifted
652%% copy of itself. For example:
653[daub_lo;0;0]'*[0;0;daub_lo]
654
655M = [corrDn(eye(32), daub_lo, 'circular', [2 1], [2 1]); ...
656 corrDn(eye(32), daub_hi, 'circular', [2 1], [2 1])];
657clf; showIm(M)
658showIm(M*M') % identity!
659
660%% Again, they're power complementary:
661plot(pi*[-32:31]/32,abs(fft(daub_lo,64)).^2,'--',...
662 pi*[-32:31]/32,abs(fft(daub_hi,64)).^2,'-');
663
664%% The sum of the power spectra is again flat
665plot(pi*[-32:31]/32,...
666 fftshift(abs(fft(daub_lo,64)).^2)+fftshift(abs(fft(daub_hi,64)).^2));
667
668%% Make a pyramid using the same code as before (except that we can't
669%% use reflected boundaries with asymmetric filters):
670[pyr,pind] = buildWpyr(sig, maxPyrHt(size(sig),size(daub_lo)), daub_lo, 'circular');
671showWpyr(pyr,pind,'indep1');
672
673res = reconWpyr(pyr,pind, daub_lo,'circular');
674imStats(sig,res);
675
676%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
677
678%%% ALIASING IN WAVELET TRANSFORMS
679
680%% All of these orthonormal pyramid/wavelet transforms have a lot
681%% of aliasing in the subbands. You can see that in the frequency
682%% response plots since the frequency response of each filter
683%% covers well more than half the frequency domain. The aliasing
684%% can have serious consequences...
685
686%% Get one of the basis functions of the 2D Daubechies wavelet transform:
687[pyr,pind] = buildWpyr(zeros(1,64),4,daub_lo,'circular');
688lev = 3;
689pyr(1+sum(pind(1:lev-1,2))+pind(lev,2)/2,1) = 1;
690sig = reconWpyr(pyr,pind, daub_lo,'circular');
691clf; lplot(sig)
692
693%% Since the basis functions are orthonormal, building a pyramid using this
694%% input will yield a single non-zero coefficient.
695[pyr,pind] = buildWpyr(sig, 4, daub_lo, 'circular');
696figure(1);
697nbands = size(pind,1)
698for b=1:nbands
699 subplot(nbands,1,b); lplot(pyrBand(pyr,pind,b));
700 axis([1 size(pyrBand(pyr,pind,b),2) -0.3 1.3]);
701end
702
703%% Now shift the input by one sample and re-build the pyramid.
704shifted_sig = [0,sig(1:size(sig,2)-1)];
705[spyr,spind] = buildWpyr(shifted_sig, 4, daub_lo, 'circular');
706
707%% Plot each band of the unshifted and shifted decomposition
708nextFig(2);
709nbands = size(spind,1)
710for b=1:nbands
711 subplot(nbands,1,b); lplot(pyrBand(spyr,spind,b));
712 axis([1 size(pyrBand(spyr,spind,b),2) -0.3 1.3]);
713end
714nextFig(2,-1);
715
716%% In the third band, we expected the coefficients to move around
717%% because the signal was shifted. But notice that in the original
718%% signal decomposition, the other bands were filled with zeros.
719%% After the shift, they have significant content. Although these
720%% subbands are supposed to represent information at different scales,
721%% their content also depends on the relative POSITION of the input
722%% signal.
723
724%% This problem is not unique to the Daubechies transform. The same
725%% is true for the QMF transform. Try it... In fact, the same kind
726%% of problem occurs for almost any orthogonal pyramid transform (the
727%% only exception is the limiting case in which the filter is a sinc
728%% function).
729
730%% Orthogonal pyramid transforms are not shift-invariant. Although
731%% orthogonality may be an important property for some applications
732%% (e.g., data compression), orthogonal pyramid transforms are
733%% generally not so good for image analysis.
734
735%% The overcompleteness of the Laplacian pyramid turns out to be a
736%% good thing in the end. By using an overcomplete representation
737%% (and by choosing the filters properly to avoid aliasing as much as
738%% possible), you end up with a representation that is useful for
739%% image analysis.
740
741%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
742
743%%% The "STEERABLE PYRAMID"
744
745%% The steerable pyramid is a multi-scale representation that is
746%% translation-invariant, but that also includes representation of
747%% orientation. Furthermore, the representation of orientation is
748%% designed to be rotation-invariant. The basis/projection functions
749%% are oriented (steerable) filters, localized in space and frequency.
750%% It is overcomplete to avoid aliasing. And it is "self-inverting"
751%% (like the QMF/Wavelet transform): the projection functions and
752%% basis functions are identical. The mathematical phrase for a
753%% transform obeying this property is "tight frame".
754
755%% The system diagram for the steerable pyramid (described in the
756%% reference given below) is as follows:
757%
758% IM ---> fhi0 -----------------> H0 ---------------- fhi0 ---> RESULT
759% | |
760% | |
761% |-> flo0 ---> fl1/down2 --> L1 --> up2/fl1 ---> flo0 -|
762% | |
763% |----> fb0 -----> B0 ----> fb0 ---|
764% | |
765% |----> fb1 -----> B1 ----> fb1 ---|
766% . .
767% . .
768% |----> fbK -----> BK ----> fbK ---|
769%
770%% The filters {fhi0,flo0} are used to initially split the image into
771%% a highpass residual band H0 and a lowpass subband. This lowpass
772%% band is then split into a low(er)pass band L1 and K+1 oriented
773%% subbands {B0,B1,...,BK}. The representatation is substantially
774%% overcomplete. The pyramid is built by recursively splitting the
775%% lowpass band (L1) using the inner portion of the diagram (i.e.,
776%% using the filters {fl1,fb0,fb1,...,fbK}). The resulting transform is
777%% overcomplete by a factor of 4k/3.
778
779%% The scale tuning of the filters is constrained by the recursive
780%% system diagram. The orientation tuning is constrained by requiring
781%% the property of steerability. A set of filters form a steerable
782%% basis if they 1) are rotated copies of each other, and 2) a copy of
783%% the filter at any orientation may be computed as a linear
784%% combination of the basis filters. The simplest examples of
785%% steerable filters is a set of N+1 Nth-order directional
786%% derivatives.
787
788%% Choose a filter set (options are 'sp0Filters', 'sp1Filters',
789%% 'sp3Filters', 'sp5Filters'):
790filts = 'sp3Filters';
791[lo0filt,hi0filt,lofilt,bfilts,steermtx,harmonics] = eval(filts);
792fsz = round(sqrt(size(bfilts,1))); fsz = [fsz fsz];
793nfilts = size(bfilts,2);
794nrows = floor(sqrt(nfilts));
795
796%% Look at the oriented bandpass filters:
797for f = 1:nfilts
798 subplot(nrows,ceil(nfilts/nrows),f);
799 showIm(conv2(reshape(bfilts(:,f),fsz),lo0filt));
800end
801
802%% Try "steering" to a new orientation (new_ori in degrees):
803new_ori = 360*rand(1)
804clf; showIm(conv2(reshape(steer(bfilts, new_ori*pi/180 ), fsz), lo0filt));
805
806%% Look at Fourier transform magnitudes:
807lo0 = fftshift(abs(fft2(lo0filt,64,64)));
808fsum = zeros(size(lo0));
809for f = 1:size(bfilts,2)
810 subplot(nrows,ceil(nfilts/nrows),f);
811 flt = reshape(bfilts(:,f),fsz);
812 freq = lo0 .* fftshift(abs(fft2(flt,64,64)));
813 fsum = fsum + freq.^2;
814 showIm(freq);
815end
816
817%% The filters sum to a smooth annular ring:
818clf; showIm(fsum);
819
820%% build a Steerable pyramid:
821[pyr,pind] = buildSpyr(im, 4-imSubSample, filts);
822
823%% Look at first (vertical) bands, different scales:
824for s = 1:min(4,spyrHt(pind))
825 band = spyrBand(pyr,pind,s,1);
826 subplot(2,2,s); showIm(band);
827end
828
829%% look at all orientation bands at one level (scale):
830for b = 1:spyrNumBands(pind)
831 band = spyrBand(pyr,pind,1,b);
832 subplot(nrows,ceil(nfilts/nrows),b);
833 showIm(band);
834end
835
836%% To access the high-pass and low-pass bands:
837low = pyrLow(pyr,pind);
838showIm(low);
839high = spyrHigh(pyr,pind);
840showIm(high);
841
842%% Display the whole pyramid (except for the highpass residual band),
843%% with images shown at proper relative sizes:
844showSpyr(pyr,pind);
845
846%% Spin a level of the pyramid, interpolating (steering to)
847%% intermediate orienations:
848
849[lev,lind] = spyrLev(pyr,pind,2);
850lev2 = reshape(lev,prod(lind(1,:)),size(bfilts,2));
851figure(1); subplot(1,1,1); showIm(spyrBand(pyr,pind,2,1));
852M = moviein(16);
853for frame = 1:16
854 steered_im = steer(lev2, 2*pi*(frame-1)/16, harmonics, steermtx);
855 showIm(reshape(steered_im, lind(1,:)),'auto2');
856 M(:,frame) = getframe;
857end
858
859%% Show the movie 3 times:
860movie(M,3);
861
862%% Reconstruct. Note that the filters are not perfect, although they are good
863%% enough for most applications.
864res = reconSpyr(pyr, pind, filts);
865showIm(im + i * res);
866imStats(im,res);
867
868%% As with previous pyramids, you can select subsets of the levels
869%% and orientation bands to be included in the reconstruction. For example:
870
871%% All levels (including highpass and lowpass residuals), one orientation:
872clf; showIm(reconSpyr(pyr,pind,filts,'reflect1','all', [1]));
873
874%% Without the highpass and lowpass:
875clf; showIm(reconSpyr(pyr,pind,filts,'reflect1',[1:spyrHt(pind)], [1]));
876
877%% We also provide an implementation of the Steerable pyramid in the
878%% Frequency domain. The advantages are perfect-reconstruction
879%% (within floating-point error), and any number of orientation
880%% bands. The disadvantages are that it is typically slower, and the
881%% boundary handling is always circular.
882
883[pyr,pind] = buildSFpyr(im,4,4); % 4 levels, 5 orientation bands
884showSpyr(pyr,pind);
885res = reconSFpyr(pyr,pind);
886imStats(im,res); % nearly perfect
887
888%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
889% The steerable pyramid transform given above is described in:
890%
891% E P Simoncelli and W T Freeman.
892% The Steerable Pyramid: A Flexible Architecture for Multi-Scale
893% Derivative Computation. IEEE Second Int'l Conf on Image Processing.
894% Washington DC, October 1995.
895%
896% Online access:
897% Abstract: http://www.cis.upenn.edu/~eero/ABSTRACTS/simoncelli95b-abstract.html
898% Full (PostScript): ftp://ftp.cis.upenn.edu/pub/eero/simoncelli95b.ps.Z
899%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
900
901%% Local Variables:
902%% buffer-read-only: t
903%% End: