1 | commit 59c247716c71837b0071fb933f731e7b82c98dd8 |
1 | commit 4bbb86db6d6da5c05c452a810f20ffb19d6918de |
2 | Author: Gunnar Beutner <gunnar@beutner.name> |
2 | Author: Richard Yao <ryao@cs.stonybrook.edu> |
3 | Date: Mon Jun 18 11:44:34 2012 -0400 |
3 | Date: Sun Jun 24 20:11:40 2012 -0400 |
4 | |
4 | |
5 | Fix znode corruption when using xattr=sa. |
5 | Make callers responsible for memory allocation in zfs_range_lock() |
6 | |
6 | |
7 | Using a separate SA handle (rather than zp->z_sa_hdl) to update |
7 | zfs_range_lock() is used in zvols, and previously, it could deadlock due |
8 | attributes corrupts the znode's mode flags (and possibly other |
8 | to an allocation using KM_SLEEP. We avoid this by moving responsibility |
9 | attributes as well). |
9 | the memory allocation from zfs_range_lock() to the caller. This enables |
10 | |
10 | us to avoid such deadlocks and use stack allocations, which are more |
11 | This patch changes the zfs_sa_get_xattr/zfs_sa_set_xattr functions |
11 | efficient and prevents deadlocks. The contexts in which stack |
12 | so that they use zp->z_sa_hdl. |
12 | allocations are done do not appear to be stack heavy, so we do not risk |
|
|
13 | overflowing the stack from doing this. |
13 | |
14 | |
14 | Signed-off-by: Richard Yao <ryao@cs.stonybrook.edu> |
15 | Signed-off-by: Richard Yao <ryao@cs.stonybrook.edu> |
|
|
16 | |
|
|
17 | Conflicts: |
|
|
18 | |
|
|
19 | module/zfs/zvol.c |
15 | |
20 | |
|
|
21 | diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c |
|
|
22 | index 72d511b..c5dd0c2 100644 |
|
|
23 | --- a/cmd/ztest/ztest.c |
|
|
24 | +++ b/cmd/ztest/ztest.c |
|
|
25 | @@ -973,12 +973,11 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object) |
|
|
26 | } |
|
|
27 | |
|
|
28 | static rl_t * |
|
|
29 | -ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, |
|
|
30 | +ztest_range_lock(rl_t *rl, ztest_ds_t *zd, uint64_t object, uint64_t offset, |
|
|
31 | uint64_t size, rl_type_t type) |
|
|
32 | { |
|
|
33 | uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); |
|
|
34 | rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; |
|
|
35 | - rl_t *rl; |
|
|
36 | |
|
|
37 | rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); |
|
|
38 | rl->rl_object = object; |
|
|
39 | @@ -1389,7 +1388,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) |
|
|
40 | dmu_tx_t *tx; |
|
|
41 | dmu_buf_t *db; |
|
|
42 | arc_buf_t *abuf = NULL; |
|
|
43 | - rl_t *rl; |
|
|
44 | + rl_t rl; |
|
|
45 | |
|
|
46 | if (byteswap) |
|
|
47 | byteswap_uint64_array(lr, sizeof (*lr)); |
|
|
48 | @@ -1413,7 +1412,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) |
|
|
49 | bt = NULL; |
|
|
50 | |
|
|
51 | ztest_object_lock(zd, lr->lr_foid, RL_READER); |
|
|
52 | - rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER); |
|
|
53 | + ztest_range_lock(&rl, zd, lr->lr_foid, offset, length, RL_WRITER); |
|
|
54 | |
|
|
55 | VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); |
|
|
56 | |
|
|
57 | @@ -1438,7 +1437,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) |
|
|
58 | if (abuf != NULL) |
|
|
59 | dmu_return_arcbuf(abuf); |
|
|
60 | dmu_buf_rele(db, FTAG); |
|
|
61 | - ztest_range_unlock(rl); |
|
|
62 | + ztest_range_unlock(&rl); |
|
|
63 | ztest_object_unlock(zd, lr->lr_foid); |
|
|
64 | return (ENOSPC); |
|
|
65 | } |
|
|
66 | @@ -1495,7 +1494,7 @@ ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap) |
|
|
67 | |
|
|
68 | dmu_tx_commit(tx); |
|
|
69 | |
|
|
70 | - ztest_range_unlock(rl); |
|
|
71 | + ztest_range_unlock(&rl); |
|
|
72 | ztest_object_unlock(zd, lr->lr_foid); |
|
|
73 | |
|
|
74 | return (0); |
|
|
75 | @@ -1507,13 +1506,13 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) |
|
|
76 | objset_t *os = zd->zd_os; |
|
|
77 | dmu_tx_t *tx; |
|
|
78 | uint64_t txg; |
|
|
79 | - rl_t *rl; |
|
|
80 | + rl_t rl; |
|
|
81 | |
|
|
82 | if (byteswap) |
|
|
83 | byteswap_uint64_array(lr, sizeof (*lr)); |
|
|
84 | |
|
|
85 | ztest_object_lock(zd, lr->lr_foid, RL_READER); |
|
|
86 | - rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, |
|
|
87 | + ztest_range_lock(&rl, zd, lr->lr_foid, lr->lr_offset, lr->lr_length, |
|
|
88 | RL_WRITER); |
|
|
89 | |
|
|
90 | tx = dmu_tx_create(os); |
|
|
91 | @@ -1522,7 +1521,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) |
|
|
92 | |
|
|
93 | txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); |
|
|
94 | if (txg == 0) { |
|
|
95 | - ztest_range_unlock(rl); |
|
|
96 | + ztest_range_unlock(&rl); |
|
|
97 | ztest_object_unlock(zd, lr->lr_foid); |
|
|
98 | return (ENOSPC); |
|
|
99 | } |
|
|
100 | @@ -1534,7 +1533,7 @@ ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap) |
|
|
101 | |
|
|
102 | dmu_tx_commit(tx); |
|
|
103 | |
|
|
104 | - ztest_range_unlock(rl); |
|
|
105 | + ztest_range_unlock(&rl); |
|
|
106 | ztest_object_unlock(zd, lr->lr_foid); |
|
|
107 | |
|
|
108 | return (0); |
|
|
109 | @@ -1670,6 +1669,8 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
|
|
110 | dmu_object_info_t doi; |
|
|
111 | dmu_buf_t *db; |
|
|
112 | zgd_t *zgd; |
|
|
113 | + rl_t rl; |
|
|
114 | + |
|
|
115 | int error; |
|
|
116 | |
|
|
117 | ztest_object_lock(zd, object, RL_READER); |
|
|
118 | @@ -1694,9 +1695,10 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
|
|
119 | zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL); |
|
|
120 | zgd->zgd_zilog = zd->zd_zilog; |
|
|
121 | zgd->zgd_private = zd; |
|
|
122 | + zgd->zgd_rl = &rl; |
|
|
123 | |
|
|
124 | if (buf != NULL) { /* immediate write */ |
|
|
125 | - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, |
|
|
126 | + ztest_range_lock(zgd->zgd_rl, zd, object, offset, size, |
|
|
127 | RL_READER); |
|
|
128 | |
|
|
129 | error = dmu_read(os, object, offset, size, buf, |
|
|
130 | @@ -1711,7 +1713,7 @@ ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
|
|
131 | offset = 0; |
|
|
132 | } |
|
|
133 | |
|
|
134 | - zgd->zgd_rl = ztest_range_lock(zd, object, offset, size, |
|
|
135 | + ztest_range_lock(zgd->zgd_rl, zd, object, offset, size, |
|
|
136 | RL_READER); |
|
|
137 | |
|
|
138 | error = dmu_buf_hold(os, object, offset, zgd, &db, |
|
|
139 | @@ -1953,12 +1955,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) |
|
|
140 | objset_t *os = zd->zd_os; |
|
|
141 | dmu_tx_t *tx; |
|
|
142 | uint64_t txg; |
|
|
143 | - rl_t *rl; |
|
|
144 | + rl_t rl; |
|
|
145 | |
|
|
146 | txg_wait_synced(dmu_objset_pool(os), 0); |
|
|
147 | |
|
|
148 | ztest_object_lock(zd, object, RL_READER); |
|
|
149 | - rl = ztest_range_lock(zd, object, offset, size, RL_WRITER); |
|
|
150 | + ztest_range_lock(&rl, zd, object, offset, size, RL_WRITER); |
|
|
151 | |
|
|
152 | tx = dmu_tx_create(os); |
|
|
153 | |
|
|
154 | @@ -1974,7 +1976,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) |
|
|
155 | (void) dmu_free_long_range(os, object, offset, size); |
|
|
156 | } |
|
|
157 | |
|
|
158 | - ztest_range_unlock(rl); |
|
|
159 | + ztest_range_unlock(&rl); |
|
|
160 | ztest_object_unlock(zd, object); |
|
|
161 | } |
|
|
162 | |
|
|
163 | diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h |
|
|
164 | index da18b1f..85dc16a 100644 |
|
|
165 | --- a/include/sys/zfs_rlock.h |
|
|
166 | +++ b/include/sys/zfs_rlock.h |
|
|
167 | @@ -63,7 +63,7 @@ typedef struct rl { |
|
|
168 | * is converted to WRITER that specified to lock from the start of the |
|
|
169 | * end of file. zfs_range_lock() returns the range lock structure. |
|
|
170 | */ |
|
|
171 | -rl_t *zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type); |
|
|
172 | +rl_t *zfs_range_lock(rl_t *rl, znode_t *zp, uint64_t off, uint64_t len, rl_type_t type); |
|
|
173 | |
|
|
174 | /* |
|
|
175 | * Unlock range and destroy range lock structure. |
|
|
176 | diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c |
|
|
177 | index f3ada17..6e9afc0 100644 |
|
|
178 | --- a/module/zfs/zfs_rlock.c |
|
|
179 | +++ b/module/zfs/zfs_rlock.c |
|
|
180 | @@ -31,9 +31,9 @@ |
|
|
181 | * Interface |
|
|
182 | * --------- |
|
|
183 | * Defined in zfs_rlock.h but essentially: |
|
|
184 | - * rl = zfs_range_lock(zp, off, len, lock_type); |
|
|
185 | - * zfs_range_unlock(rl); |
|
|
186 | - * zfs_range_reduce(rl, off, len); |
|
|
187 | + * zfs_range_lock(&rl, zp, off, len, lock_type); |
|
|
188 | + * zfs_range_unlock(&rl); |
|
|
189 | + * zfs_range_reduce(&rl, off, len); |
|
|
190 | * |
|
|
191 | * AVL tree |
|
|
192 | * -------- |
|
|
193 | @@ -420,13 +420,11 @@ got_lock: |
|
|
194 | * previously locked as RL_WRITER). |
|
|
195 | */ |
|
|
196 | rl_t * |
|
|
197 | -zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) |
|
|
198 | +zfs_range_lock(rl_t *new, znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) |
|
|
199 | { |
|
|
200 | - rl_t *new; |
|
|
201 | |
|
|
202 | ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); |
|
|
203 | |
|
|
204 | - new = kmem_alloc(sizeof (rl_t), KM_SLEEP); |
|
|
205 | new->r_zp = zp; |
|
|
206 | new->r_off = off; |
|
|
207 | if (len + off < off) /* overflow */ |
16 | diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c |
208 | diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c |
17 | index f35f6f6..7f14706 100644 |
209 | index 2da5fec..039269a 100644 |
18 | --- a/module/zfs/zfs_sa.c |
210 | --- a/module/zfs/zfs_vnops.c |
19 | +++ b/module/zfs/zfs_sa.c |
211 | +++ b/module/zfs/zfs_vnops.c |
20 | @@ -188,7 +188,6 @@ int |
212 | @@ -370,7 +370,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) |
21 | zfs_sa_get_xattr(znode_t *zp) |
213 | objset_t *os; |
|
|
214 | ssize_t n, nbytes; |
|
|
215 | int error = 0; |
|
|
216 | - rl_t *rl; |
|
|
217 | + rl_t rl; |
|
|
218 | #ifdef HAVE_UIO_ZEROCOPY |
|
|
219 | xuio_t *xuio = NULL; |
|
|
220 | #endif /* HAVE_UIO_ZEROCOPY */ |
|
|
221 | @@ -418,7 +418,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) |
|
|
222 | /* |
|
|
223 | * Lock the range against changes. |
|
|
224 | */ |
|
|
225 | - rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); |
|
|
226 | + zfs_range_lock(&rl, zp, uio->uio_loffset, uio->uio_resid, RL_READER); |
|
|
227 | |
|
|
228 | /* |
|
|
229 | * If we are reading past end-of-file we can skip |
|
|
230 | @@ -482,7 +482,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) |
|
|
231 | n -= nbytes; |
|
|
232 | } |
|
|
233 | out: |
|
|
234 | - zfs_range_unlock(rl); |
|
|
235 | + zfs_range_unlock(&rl); |
|
|
236 | |
|
|
237 | ZFS_ACCESSTIME_STAMP(zsb, zp); |
|
|
238 | zfs_inode_update(zp); |
|
|
239 | @@ -524,7 +524,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) |
|
|
240 | zilog_t *zilog; |
|
|
241 | offset_t woff; |
|
|
242 | ssize_t n, nbytes; |
|
|
243 | - rl_t *rl; |
|
|
244 | + rl_t rl; |
|
|
245 | int max_blksz = zsb->z_max_blksz; |
|
|
246 | int error = 0; |
|
|
247 | arc_buf_t *abuf; |
|
|
248 | @@ -608,9 +608,9 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) |
|
|
249 | * Obtain an appending range lock to guarantee file append |
|
|
250 | * semantics. We reset the write offset once we have the lock. |
|
|
251 | */ |
|
|
252 | - rl = zfs_range_lock(zp, 0, n, RL_APPEND); |
|
|
253 | - woff = rl->r_off; |
|
|
254 | - if (rl->r_len == UINT64_MAX) { |
|
|
255 | + zfs_range_lock(&rl, zp, 0, n, RL_APPEND); |
|
|
256 | + woff = rl.r_off; |
|
|
257 | + if (rl.r_len == UINT64_MAX) { |
|
|
258 | /* |
|
|
259 | * We overlocked the file because this write will cause |
|
|
260 | * the file block size to increase. |
|
|
261 | @@ -625,11 +625,11 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) |
|
|
262 | * this write, then this range lock will lock the entire file |
|
|
263 | * so that we can re-write the block safely. |
|
|
264 | */ |
|
|
265 | - rl = zfs_range_lock(zp, woff, n, RL_WRITER); |
|
|
266 | + zfs_range_lock(&rl, zp, woff, n, RL_WRITER); |
|
|
267 | } |
|
|
268 | |
|
|
269 | if (woff >= limit) { |
|
|
270 | - zfs_range_unlock(rl); |
|
|
271 | + zfs_range_unlock(&rl); |
|
|
272 | ZFS_EXIT(zsb); |
|
|
273 | return (EFBIG); |
|
|
274 | } |
|
|
275 | @@ -719,7 +719,7 @@ again: |
|
|
276 | * on the first iteration since zfs_range_reduce() will |
|
|
277 | * shrink down r_len to the appropriate size. |
|
|
278 | */ |
|
|
279 | - if (rl->r_len == UINT64_MAX) { |
|
|
280 | + if (rl.r_len == UINT64_MAX) { |
|
|
281 | uint64_t new_blksz; |
|
|
282 | |
|
|
283 | if (zp->z_blksz > max_blksz) { |
|
|
284 | @@ -729,7 +729,7 @@ again: |
|
|
285 | new_blksz = MIN(end_size, max_blksz); |
|
|
286 | } |
|
|
287 | zfs_grow_blocksize(zp, new_blksz, tx); |
|
|
288 | - zfs_range_reduce(rl, woff, n); |
|
|
289 | + zfs_range_reduce(&rl, woff, n); |
|
|
290 | } |
|
|
291 | |
|
|
292 | /* |
|
|
293 | @@ -842,7 +842,7 @@ again: |
|
|
294 | uio_prefaultpages(MIN(n, max_blksz), uio); |
|
|
295 | } |
|
|
296 | |
|
|
297 | - zfs_range_unlock(rl); |
|
|
298 | + zfs_range_unlock(&rl); |
|
|
299 | |
|
|
300 | /* |
|
|
301 | * If we're in replay mode, or we made no progress, return error. |
|
|
302 | @@ -946,7 +946,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
|
|
303 | * we don't have to write the data twice. |
|
|
304 | */ |
|
|
305 | if (buf != NULL) { /* immediate write */ |
|
|
306 | - zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); |
|
|
307 | + zfs_range_lock(zgd->zgd_rl, zp, offset, size, RL_READER); |
|
|
308 | /* test for truncation needs to be done while range locked */ |
|
|
309 | if (offset >= zp->z_size) { |
|
|
310 | error = ENOENT; |
|
|
311 | @@ -967,7 +967,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
|
|
312 | size = zp->z_blksz; |
|
|
313 | blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; |
|
|
314 | offset -= blkoff; |
|
|
315 | - zgd->zgd_rl = zfs_range_lock(zp, offset, size, |
|
|
316 | + zfs_range_lock(zgd->zgd_rl, zp, offset, size, |
|
|
317 | RL_READER); |
|
|
318 | if (zp->z_blksz == size) |
|
|
319 | break; |
|
|
320 | diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c |
|
|
321 | index 3a6872f..e363839 100644 |
|
|
322 | --- a/module/zfs/zfs_znode.c |
|
|
323 | +++ b/module/zfs/zfs_znode.c |
|
|
324 | @@ -1158,20 +1158,20 @@ zfs_extend(znode_t *zp, uint64_t end) |
22 | { |
325 | { |
23 | zfs_sb_t *zsb = ZTOZSB(zp); |
326 | zfs_sb_t *zsb = ZTOZSB(zp); |
24 | - sa_handle_t *sa; |
327 | dmu_tx_t *tx; |
25 | char *obj; |
328 | - rl_t *rl; |
26 | int size; |
329 | + rl_t rl; |
|
|
330 | uint64_t newblksz; |
27 | int error; |
331 | int error; |
28 | @@ -197,14 +196,8 @@ zfs_sa_get_xattr(znode_t *zp) |
|
|
29 | ASSERT(!zp->z_xattr_cached); |
|
|
30 | ASSERT(zp->z_is_sa); |
|
|
31 | |
332 | |
32 | - error = sa_handle_get(zsb->z_os, zp->z_id, NULL, SA_HDL_PRIVATE, &sa); |
333 | /* |
33 | - if (error) |
334 | * We will change zp_size, lock the whole file. |
|
|
335 | */ |
|
|
336 | - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); |
|
|
337 | + zfs_range_lock(&rl, zp, 0, UINT64_MAX, RL_WRITER); |
|
|
338 | |
|
|
339 | /* |
|
|
340 | * Nothing to do if file already at desired length. |
|
|
341 | */ |
|
|
342 | if (end <= zp->z_size) { |
|
|
343 | - zfs_range_unlock(rl); |
|
|
344 | + zfs_range_unlock(&rl); |
34 | - return (error); |
345 | return (0); |
35 | - |
346 | } |
36 | - error = sa_size(sa, SA_ZPL_DXATTR(zsb), &size); |
347 | top: |
37 | + error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &size); |
348 | @@ -1202,7 +1202,7 @@ top: |
38 | if (error) { |
349 | goto top; |
39 | - sa_handle_destroy(sa); |
350 | } |
40 | - |
351 | dmu_tx_abort(tx); |
41 | if (error == ENOENT) |
352 | - zfs_range_unlock(rl); |
42 | return nvlist_alloc(&zp->z_xattr_cached, |
353 | + zfs_range_unlock(&rl); |
43 | NV_UNIQUE_NAME, KM_SLEEP); |
|
|
44 | @@ -212,14 +205,13 @@ zfs_sa_get_xattr(znode_t *zp) |
|
|
45 | return (error); |
354 | return (error); |
46 | } |
355 | } |
47 | |
356 | |
48 | - obj = sa_spill_alloc(KM_SLEEP); |
357 | @@ -1214,7 +1214,7 @@ top: |
49 | + obj = kmem_alloc(size, KM_SLEEP); |
358 | VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), |
|
|
359 | &zp->z_size, sizeof (zp->z_size), tx)); |
50 | |
360 | |
51 | - error = sa_lookup(sa, SA_ZPL_DXATTR(zsb), obj, size); |
361 | - zfs_range_unlock(rl); |
52 | + error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size); |
362 | + zfs_range_unlock(&rl); |
53 | if (error == 0) |
|
|
54 | error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP); |
|
|
55 | |
363 | |
56 | - sa_spill_free(obj); |
364 | dmu_tx_commit(tx); |
57 | - sa_handle_destroy(sa); |
365 | |
58 | + kmem_free(obj, size); |
366 | @@ -1235,19 +1235,19 @@ static int |
|
|
367 | zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) |
|
|
368 | { |
|
|
369 | zfs_sb_t *zsb = ZTOZSB(zp); |
|
|
370 | - rl_t *rl; |
|
|
371 | + rl_t rl; |
|
|
372 | int error; |
|
|
373 | |
|
|
374 | /* |
|
|
375 | * Lock the range being freed. |
|
|
376 | */ |
|
|
377 | - rl = zfs_range_lock(zp, off, len, RL_WRITER); |
|
|
378 | + zfs_range_lock(&rl, zp, off, len, RL_WRITER); |
|
|
379 | |
|
|
380 | /* |
|
|
381 | * Nothing to do if file already at desired length. |
|
|
382 | */ |
|
|
383 | if (off >= zp->z_size) { |
|
|
384 | - zfs_range_unlock(rl); |
|
|
385 | + zfs_range_unlock(&rl); |
|
|
386 | return (0); |
|
|
387 | } |
|
|
388 | |
|
|
389 | @@ -1256,7 +1256,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) |
|
|
390 | |
|
|
391 | error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len); |
|
|
392 | |
|
|
393 | - zfs_range_unlock(rl); |
|
|
394 | + zfs_range_unlock(&rl); |
59 | |
395 | |
60 | return (error); |
396 | return (error); |
61 | } |
397 | } |
62 | @@ -228,7 +220,6 @@ int |
398 | @@ -1275,7 +1275,7 @@ zfs_trunc(znode_t *zp, uint64_t end) |
63 | zfs_sa_set_xattr(znode_t *zp) |
|
|
64 | { |
399 | { |
65 | zfs_sb_t *zsb = ZTOZSB(zp); |
400 | zfs_sb_t *zsb = ZTOZSB(zp); |
66 | - sa_handle_t *sa; |
|
|
67 | dmu_tx_t *tx; |
401 | dmu_tx_t *tx; |
68 | char *obj; |
402 | - rl_t *rl; |
69 | size_t size; |
403 | + rl_t rl; |
70 | @@ -242,44 +233,27 @@ zfs_sa_set_xattr(znode_t *zp) |
404 | int error; |
|
|
405 | sa_bulk_attr_t bulk[2]; |
|
|
406 | int count = 0; |
|
|
407 | @@ -1283,19 +1283,19 @@ zfs_trunc(znode_t *zp, uint64_t end) |
|
|
408 | /* |
|
|
409 | * We will change zp_size, lock the whole file. |
|
|
410 | */ |
|
|
411 | - rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); |
|
|
412 | + zfs_range_lock(&rl, zp, 0, UINT64_MAX, RL_WRITER); |
|
|
413 | |
|
|
414 | /* |
|
|
415 | * Nothing to do if file already at desired length. |
|
|
416 | */ |
|
|
417 | if (end >= zp->z_size) { |
|
|
418 | - zfs_range_unlock(rl); |
|
|
419 | + zfs_range_unlock(&rl); |
|
|
420 | return (0); |
|
|
421 | } |
|
|
422 | |
|
|
423 | error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1); |
71 | if (error) |
424 | if (error) { |
|
|
425 | - zfs_range_unlock(rl); |
|
|
426 | + zfs_range_unlock(&rl); |
|
|
427 | return (error); |
|
|
428 | } |
|
|
429 | top: |
|
|
430 | @@ -1310,7 +1310,7 @@ top: |
72 | goto out; |
431 | goto top; |
|
|
432 | } |
|
|
433 | dmu_tx_abort(tx); |
|
|
434 | - zfs_range_unlock(rl); |
|
|
435 | + zfs_range_unlock(&rl); |
|
|
436 | return (error); |
|
|
437 | } |
73 | |
438 | |
74 | - obj = sa_spill_alloc(KM_SLEEP); |
439 | @@ -1327,7 +1327,7 @@ top: |
75 | + obj = kmem_alloc(size, KM_SLEEP); |
|
|
76 | |
440 | |
77 | error = nvlist_pack(zp->z_xattr_cached, &obj, &size, |
441 | dmu_tx_commit(tx); |
78 | NV_ENCODE_XDR, KM_SLEEP); |
|
|
79 | if (error) |
|
|
80 | - goto out_free; |
|
|
81 | - |
|
|
82 | - /* |
|
|
83 | - * A private SA handle must be used to ensure we can drop the hold |
|
|
84 | - * on the spill block prior to calling dmu_tx_commit(). If we call |
|
|
85 | - * dmu_tx_commit() before sa_handle_destroy(), then our hold will |
|
|
86 | - * trigger a copy of the buffer at txg sync time. This is done to |
|
|
87 | - * prevent data from leaking in to the syncing txg. As a result |
|
|
88 | - * the original dirty spill block will be remain dirty in the arc |
|
|
89 | - * while the copy is written and laundered. |
|
|
90 | - */ |
|
|
91 | - error = sa_handle_get(zsb->z_os, zp->z_id, NULL, SA_HDL_PRIVATE, &sa); |
|
|
92 | - if (error) |
|
|
93 | - goto out_free; |
|
|
94 | + goto out; |
|
|
95 | |
442 | |
|
|
443 | - zfs_range_unlock(rl); |
|
|
444 | + zfs_range_unlock(&rl); |
|
|
445 | |
|
|
446 | return (0); |
|
|
447 | } |
|
|
448 | diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c |
|
|
449 | index 125d58d..5cae597 100644 |
|
|
450 | --- a/module/zfs/zvol.c |
|
|
451 | +++ b/module/zfs/zvol.c |
|
|
452 | @@ -537,7 +537,7 @@ zvol_write(void *arg) |
|
|
453 | uint64_t size = blk_rq_bytes(req); |
|
|
454 | int error = 0; |
|
|
455 | dmu_tx_t *tx; |
|
|
456 | - rl_t *rl; |
|
|
457 | + rl_t rl; |
|
|
458 | |
|
|
459 | if (req->cmd_flags & VDEV_REQ_FLUSH) |
|
|
460 | zil_commit(zv->zv_zilog, ZVOL_OBJ); |
|
|
461 | @@ -550,7 +550,7 @@ zvol_write(void *arg) |
|
|
462 | return; |
|
|
463 | } |
|
|
464 | |
|
|
465 | - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); |
|
|
466 | + zfs_range_lock(&rl, &zv->zv_znode, offset, size, RL_WRITER); |
|
|
467 | |
96 | tx = dmu_tx_create(zsb->z_os); |
468 | tx = dmu_tx_create(zv->zv_objset); |
97 | dmu_tx_hold_sa_create(tx, size); |
469 | dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size); |
98 | - dmu_tx_hold_sa(tx, sa, B_TRUE); |
470 | @@ -559,7 +559,7 @@ zvol_write(void *arg) |
99 | + dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); |
|
|
100 | |
|
|
101 | error = dmu_tx_assign(tx, TXG_WAIT); |
471 | error = dmu_tx_assign(tx, TXG_WAIT); |
102 | if (error) { |
472 | if (error) { |
103 | dmu_tx_abort(tx); |
473 | dmu_tx_abort(tx); |
104 | - sa_handle_destroy(sa); |
474 | - zfs_range_unlock(rl); |
105 | } else { |
475 | + zfs_range_unlock(&rl); |
106 | - error = sa_update(sa, SA_ZPL_DXATTR(zsb), obj, size, tx); |
476 | blk_end_request(req, -error, size); |
107 | - sa_handle_destroy(sa); |
477 | return; |
108 | - if (error) |
478 | } |
109 | - dmu_tx_abort(tx); |
479 | @@ -570,7 +570,7 @@ zvol_write(void *arg) |
110 | - else |
480 | req->cmd_flags & VDEV_REQ_FUA); |
111 | - dmu_tx_commit(tx); |
481 | |
112 | + VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, |
|
|
113 | + size, tx)); |
|
|
114 | + dmu_tx_commit(tx); |
482 | dmu_tx_commit(tx); |
|
|
483 | - zfs_range_unlock(rl); |
|
|
484 | + zfs_range_unlock(&rl); |
|
|
485 | |
|
|
486 | if ((req->cmd_flags & VDEV_REQ_FUA) || |
|
|
487 | zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) |
|
|
488 | @@ -589,7 +589,7 @@ zvol_discard(void *arg) |
|
|
489 | uint64_t offset = blk_rq_pos(req) << 9; |
|
|
490 | uint64_t size = blk_rq_bytes(req); |
|
|
491 | int error; |
|
|
492 | - rl_t *rl; |
|
|
493 | + rl_t rl; |
|
|
494 | |
|
|
495 | if (offset + size > zv->zv_volsize) { |
|
|
496 | blk_end_request(req, -EIO, size); |
|
|
497 | @@ -601,7 +601,7 @@ zvol_discard(void *arg) |
|
|
498 | return; |
115 | } |
499 | } |
116 | -out_free: |
500 | |
117 | - sa_spill_free(obj); |
501 | - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); |
118 | + |
502 | + zfs_range_lock(&rl, &zv->zv_znode, offset, size, RL_WRITER); |
119 | + kmem_free(obj, size); |
503 | |
120 | out: |
504 | error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, size); |
121 | return (error); |
505 | |
|
|
506 | @@ -609,7 +609,7 @@ zvol_discard(void *arg) |
|
|
507 | * TODO: maybe we should add the operation to the log. |
|
|
508 | */ |
|
|
509 | |
|
|
510 | - zfs_range_unlock(rl); |
|
|
511 | + zfs_range_unlock(&rl); |
|
|
512 | |
|
|
513 | blk_end_request(req, -error, size); |
122 | } |
514 | } |
|
|
515 | @@ -630,18 +630,18 @@ zvol_read(void *arg) |
|
|
516 | uint64_t offset = blk_rq_pos(req) << 9; |
|
|
517 | uint64_t size = blk_rq_bytes(req); |
|
|
518 | int error; |
|
|
519 | - rl_t *rl; |
|
|
520 | + rl_t rl; |
|
|
521 | |
|
|
522 | if (size == 0) { |
|
|
523 | blk_end_request(req, 0, size); |
|
|
524 | return; |
|
|
525 | } |
|
|
526 | |
|
|
527 | - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); |
|
|
528 | + zfs_range_lock(&rl, &zv->zv_znode, offset, size, RL_READER); |
|
|
529 | |
|
|
530 | error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req); |
|
|
531 | |
|
|
532 | - zfs_range_unlock(rl); |
|
|
533 | + zfs_range_unlock(&rl); |
|
|
534 | |
|
|
535 | /* convert checksum errors into IO errors */ |
|
|
536 | if (error == ECKSUM) |
|
|
537 | @@ -744,6 +744,7 @@ zvol_get_done(zgd_t *zgd, int error) |
|
|
538 | if (error == 0 && zgd->zgd_bp) |
|
|
539 | zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); |
|
|
540 | |
|
|
541 | + kmem_free(zgd->zgd_rl, sizeof(rl_t)); |
|
|
542 | kmem_free(zgd, sizeof (zgd_t)); |
|
|
543 | } |
|
|
544 | |
|
|
545 | @@ -766,7 +767,8 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) |
|
|
546 | |
|
|
547 | zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); |
|
|
548 | zgd->zgd_zilog = zv->zv_zilog; |
|
|
549 | - zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); |
|
|
550 | + zgd->zgd_rl = kmem_alloc(sizeof (rl_t), KM_SLEEP); |
|
|
551 | + zfs_range_lock(zgd->zgd_rl, &zv->zv_znode, offset, size, RL_READER); |
|
|
552 | |
|
|
553 | /* |
|
|
554 | * Write records come in two flavors: immediate and indirect. |