1
|
# run in GRID, not ARC
|
2
|
# &run .aml ingrid sd prob bbox
|
3
|
# sd 0.0001 prob 0.05
|
4
|
#
|
5
|
# 9a - limit to 4 steps, see if that has any significant deterioration of smoothing performance. Should fix
|
6
|
# the problem with islands and headlands - turns out also need to remove water except
|
7
|
# for one cell adjacent to land, and give that a higher uncertainty. See cluster_multiscalesmooth9a_clean_216
|
8
|
#
|
9
|
# Version 9:
|
10
|
# Focus on implementing identical algorithm to directsmooth2 using multiscale method i.e. aggregating by factor of 3
|
11
|
# from already aggregated data, rather than from original resolution each time.
|
12
|
#
|
13
|
# Version 8:
|
14
|
# WARNING: have not yet checked that the additional weighting of the gaussian smoothing is not messing with
|
15
|
# the calculations of variance etc.
|
16
|
# Replaced simple 3x3 aggregation with gaussian smoothing
|
17
|
# Kernel is chosen to give appropriate level of smoothing and produce a fairly accurate approximation of the smoothed
|
18
|
# surface by interpolation of the coarser scale smoothed values
|
19
|
# Details in gaussian.xls on john's laptop
|
20
|
#
|
21
|
# Version 7:
|
22
|
# Further reworking of how the final values are selected - a mean is a candidate if its associated variance
|
23
|
# is less than the mean sample uncertainty, and the mean with the lowest variance among the candidates is the chosen one.
|
24
|
# Implement that in the nested sense by taking the lowest group variance divided by the chi^2 value, and its associated mean variance,
|
25
|
# and if that is lower than the data point variance the
|
26
|
#
|
27
|
# approximate critical value of chi^2/N with N degrees of freedom at 5% level as 1 + 2.45/sqrt(N) + 0.55/N
|
28
|
# for 1% level use 1 + 3.4/sqrt(N) + 2.4/N
|
29
|
#
|
30
|
# Version 6:
|
31
|
# Done from scratch after careful working through of theory.
|
32
|
# ingrid is the (potentially sparse) grid of data to be smoothed and
|
33
|
# interpolated, which can be of different extent to the output
|
34
|
# (resolution is assumed to be the same, could adapt this to relax that)
|
35
|
#
|
36
|
# var can be a constant or a grid
|
37
|
#
|
38
|
# bbox can be either a grid name or the 'xmin ymin xmax ymax' parameters
|
39
|
# for setwindow
|
40
|
|
41
|
# REGETZ NOTES
|
42
|
# - it looks like Ming used sd=0.001 (based on the Arc 'log' file in the
|
43
|
# topo experimental directory)
|
44
|
|
45
|
library(raster)
|
46
|
|
47
|
#&type NB - using standard deviation as noise specification now, not variance!
|
48
|
|
49
|
multiscalesmooth <- function(ingrid, sd=0.0001 , prob=0.05, bbox) {
|
50
|
# ingrid: grid to smooth
|
51
|
# sd: stdev
|
52
|
# prob: prob
|
53
|
# bbox: optional extent object used to subset ingrid
|
54
|
|
55
|
# set up chisq parameters
|
56
|
chisqa <- 2.807 - 0.6422 * log10(prob) - 3.410 * prob^0.3411
|
57
|
chisqb <- -5.871 - 3.675 * log10(prob) + 4.690 * prob^0.3377
|
58
|
message("chisq parameters: (", chisqa, ", ", chisqb, ")")
|
59
|
|
60
|
# subset ingrid if desired
|
61
|
if (!missing(bbox)) {
|
62
|
ingrid <- crop(ingrid, bbox)
|
63
|
}
|
64
|
|
65
|
# set number of levels of aggregation
|
66
|
NUM.LEVELS <- 4
|
67
|
# set aggregation factor
|
68
|
AGG.FACTOR <- 3
|
69
|
|
70
|
# naming:
|
71
|
# h - the value being smoothed/interpolated
|
72
|
# vg - total variance of group of data, or of individual measurement
|
73
|
# v_bg - variance between groups
|
74
|
# v_wg - variance within groups
|
75
|
# wa - weighting for aggregation, based on total variance
|
76
|
# vm - variance of the calculated mean
|
77
|
# mv - mean of finer scale variances
|
78
|
# n - effective number of measurements
|
79
|
|
80
|
# NB - only calculating sample variances here, not variances of estimated means.
|
81
|
# Also note that v0_bg is an uncertainty, not a sample variance
|
82
|
# and v1_bg is total variances, but both are labelled as "between-group" to simplify the smoothing
|
83
|
|
84
|
# expand grid to accommodate integer number of cells at coarsest
|
85
|
# level, by adding roungly equal number of cells on either side
|
86
|
# (with on extra on top/right if an odd number of cells needs to be
|
87
|
# added)
|
88
|
max.size <- AGG.FACTOR^NUM.LEVELS
|
89
|
addx <- if (0 < (extra<-ncol(ingrid)%%max.size)) {
|
90
|
(max.size-extra)/2
|
91
|
} else {
|
92
|
0
|
93
|
}
|
94
|
addy <- if (0 < (extra<-nrow(ingrid)%%max.size)) {
|
95
|
(max.size-extra)/2
|
96
|
} else {
|
97
|
0
|
98
|
}
|
99
|
h <- list(expand(ingrid, extent(
|
100
|
xmin(ingrid)-floor(addx)*xres(ingrid),
|
101
|
xmax(ingrid)+ceiling(addx)*xres(ingrid),
|
102
|
ymin(ingrid)-floor(addy)*yres(ingrid),
|
103
|
ymax(ingrid)+ceiling(addy)*yres(ingrid)
|
104
|
)))
|
105
|
v <- list(calc(h[[1]], function(x) ifelse(!is.na(x), sd^2, NA)))
|
106
|
|
107
|
vg = v[[1]]
|
108
|
w <- calc(v[[1]], function(x) ifelse(is.na(x), 0, 1/x))
|
109
|
wsq = w^2
|
110
|
n <- calc(h[[1]], function(x) ifelse(!is.na(x), 1, 0))
|
111
|
|
112
|
bigvar <- cellStats(v[[1]], max)
|
113
|
|
114
|
#setcell minof
|
115
|
|
116
|
# aggregate to broader scales
|
117
|
for (i in 1+seq.int(NUM.LEVELS)) {
|
118
|
|
119
|
message("Aggregate from ", i-1, " to ", i)
|
120
|
|
121
|
vg.prev <- vg
|
122
|
w.prev <- w
|
123
|
wsq.prev <- wsq
|
124
|
n.prev <- n
|
125
|
|
126
|
w <- aggregate(w.prev, 3, sum)
|
127
|
wsq <- aggregate(wsq.prev, 3, sum)
|
128
|
n <- aggregate(n.prev, 3, sum)
|
129
|
neff <- w^2 / wsq
|
130
|
h[[i]] <- aggregate(w.prev * h[[i-1]], 3, sum) / w
|
131
|
hdiff <- h[[i-1]] - disaggregate(h[[i]], 3)
|
132
|
vbg <- aggregate(w.prev * hdiff^2, 3, sum) / w
|
133
|
if (i==2) {
|
134
|
vwg <- n - n # zero, but with window and cell size set for us
|
135
|
} else {
|
136
|
vwg <- aggregate(w.prev * vg.prev, 3, sum) / w
|
137
|
}
|
138
|
vg <- vbg + vwg
|
139
|
vm <- 1 / w
|
140
|
mv <- n / w
|
141
|
|
142
|
chisq <- 1 + chisqa / sqrt(neff - 1) + chisqb / (neff - 1)
|
143
|
v[[i]] <- overlay(vg, chisq, mv, vm,
|
144
|
fun=function(vg, chisq, mv, vm) ifelse(vg/chisq < mv, vm, vg))
|
145
|
|
146
|
}
|
147
|
|
148
|
bigvar <- bigvar * 10
|
149
|
|
150
|
#copy h[NUM.LEVELS] hs[NUM.LEVELS]
|
151
|
#copy v[NUM.LEVELS] vs[NUM.LEVELS]
|
152
|
#kill h[NUM.LEVELS]
|
153
|
#kill v[NUM.LEVELS]
|
154
|
#setcell hs[NUM.LEVELS]
|
155
|
#setwindow hs[NUM.LEVELS]
|
156
|
|
157
|
# smooth, refine and combine each layer in turn
|
158
|
#hs <- h[[NUM.LEVELS+1]]
|
159
|
#vs <- v[[NUM.LEVELS+1]]
|
160
|
hs <- h
|
161
|
vs <- v
|
162
|
#todo: is this the same as circle with radius 2 in arc???
|
163
|
#circle2 <- matrix(c(0,1,0, 1,1,1, 0,1,0), nrow=3)
|
164
|
circle2 <- matrix(c(0,0,1,0,0, 0,1,1,1,0, 1,1,1,1,1, 0,1,1,1,0, 0,0,1,0,0), nrow=5)
|
165
|
circle2[circle2==0] <- NA
|
166
|
|
167
|
for (j in 1+rev(seq.int(NUM.LEVELS))) {
|
168
|
|
169
|
message("Refine from ", j, " to ", j-1)
|
170
|
|
171
|
# for the first stage where the coarser grid is refined and smoothed, set window to the coarse grid
|
172
|
#setcell h[j-1]
|
173
|
#setwindow maxof
|
174
|
|
175
|
# create smoothed higher resolution versions of h and v_bg, hopefully with no nulls!
|
176
|
# [suppressing warnings to avoid .couldBeLonLat complaints]
|
177
|
suppressWarnings({
|
178
|
hs.tmp <- disaggregate(focal(hs[[j]], w=circle2, fun=mean,
|
179
|
pad=TRUE, padValue=NA, na.rm=TRUE), 3)
|
180
|
vs.tmp <- disaggregate(focal(vs[[j]], w=circle2, fun=mean,
|
181
|
pad=TRUE, padValue=NA, na.rm=TRUE), 3)
|
182
|
})
|
183
|
|
184
|
# create no-null version of finer h and v
|
185
|
h_c <- calc(h[[j-1]], function(x) ifelse(is.na(x), 0, x))
|
186
|
v_c <- calc(v[[j-1]], function(x) ifelse(is.na(x), bigvar, x))
|
187
|
|
188
|
# combine two values using least variance
|
189
|
hs[[j-1]] <- (h_c/v_c + hs.tmp/vs.tmp ) / (1/v_c + 1/vs.tmp)
|
190
|
vs[[j-1]] <- 1 / (1/v_c + 1/vs.tmp)
|
191
|
|
192
|
#todo: mimic all the AML setwindow/setcell stuff???
|
193
|
# hs[[j-1]] <- expand(hs[[j-1]], 4)
|
194
|
# vs[[j-1]] <- expand(vs[[j-1]], 4)
|
195
|
|
196
|
}
|
197
|
|
198
|
result <- crop(stack(hs[[1]], vs[[1]]), extent(ingrid))
|
199
|
layerNames(result) <- c("hs", "vs")
|
200
|
return(result)
|
201
|
|
202
|
}
|