/[linux-patches]/genpatches-2.6/tags/2.6.32-15/1000_linux-2.6.32.1.patch
Gentoo

Contents of /genpatches-2.6/tags/2.6.32-15/1000_linux-2.6.32.1.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1735 - (show annotations) (download)
Wed Aug 4 11:25:09 2010 UTC (3 years, 11 months ago) by mpagano
File size: 61207 byte(s)
2.6.32-15 release
1 diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
2 index 6d94e06..af6885c 100644
3 --- a/Documentation/filesystems/ext4.txt
4 +++ b/Documentation/filesystems/ext4.txt
5 @@ -153,8 +153,8 @@ journal_dev=devnum When the external journal device's major/minor numbers
6 identified through its new major/minor numbers encoded
7 in devnum.
8
9 -noload Don't load the journal on mounting. Note that
10 - if the filesystem was not unmounted cleanly,
11 +norecovery Don't load the journal on mounting. Note that
12 +noload if the filesystem was not unmounted cleanly,
13 skipping the journal replay will lead to the
14 filesystem containing inconsistencies that can
15 lead to any number of problems.
16 @@ -353,6 +353,12 @@ noauto_da_alloc replacing existing files via patterns such as
17 system crashes before the delayed allocation
18 blocks are forced to disk.
19
20 +discard Controls whether ext4 should issue discard/TRIM
21 +nodiscard(*) commands to the underlying block device when
22 + blocks are freed. This is useful for SSD devices
23 + and sparse/thinly-provisioned LUNs, but it is off
24 + by default until sufficient testing has been done.
25 +
26 Data Mode
27 =========
28 There are 3 different data modes:
29 diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
30 index c968cc3..554626e 100644
31 --- a/drivers/scsi/hosts.c
32 +++ b/drivers/scsi/hosts.c
33 @@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *shost)
34 EXPORT_SYMBOL(scsi_remove_host);
35
36 /**
37 - * scsi_add_host - add a scsi host
38 + * scsi_add_host_with_dma - add a scsi host with dma device
39 * @shost: scsi host pointer to add
40 * @dev: a struct device of type scsi class
41 + * @dma_dev: dma device for the host
42 + *
43 + * Note: You rarely need to worry about this unless you're in a
44 + * virtualised host environments, so use the simpler scsi_add_host()
45 + * function instead.
46 *
47 * Return value:
48 * 0 on success / != 0 for error
49 **/
50 -int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
51 +int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
52 + struct device *dma_dev)
53 {
54 struct scsi_host_template *sht = shost->hostt;
55 int error = -EINVAL;
56 @@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
57
58 if (!shost->shost_gendev.parent)
59 shost->shost_gendev.parent = dev ? dev : &platform_bus;
60 + shost->dma_dev = dma_dev;
61
62 error = device_add(&shost->shost_gendev);
63 if (error)
64 @@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
65 fail:
66 return error;
67 }
68 -EXPORT_SYMBOL(scsi_add_host);
69 +EXPORT_SYMBOL(scsi_add_host_with_dma);
70
71 static void scsi_host_dev_release(struct device *dev)
72 {
73 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
74 index 562d8ce..f913f1e 100644
75 --- a/drivers/scsi/lpfc/lpfc_init.c
76 +++ b/drivers/scsi/lpfc/lpfc_init.c
77 @@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
78 vport->els_tmofunc.function = lpfc_els_timeout;
79 vport->els_tmofunc.data = (unsigned long)vport;
80
81 - error = scsi_add_host(shost, dev);
82 + error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
83 if (error)
84 goto out_put_shost;
85
86 diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
87 index a39addc..507ccc6 100644
88 --- a/drivers/scsi/megaraid/megaraid_sas.c
89 +++ b/drivers/scsi/megaraid/megaraid_sas.c
90 @@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
91 int error = 0, i;
92 void *sense = NULL;
93 dma_addr_t sense_handle;
94 - u32 *sense_ptr;
95 + unsigned long *sense_ptr;
96
97 memset(kbuff_arr, 0, sizeof(kbuff_arr));
98
99 @@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
100 }
101
102 sense_ptr =
103 - (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
104 + (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
105 *sense_ptr = sense_handle;
106 }
107
108 @@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
109 * sense_ptr points to the location that has the user
110 * sense buffer address
111 */
112 - sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
113 - ioc->sense_off);
114 + sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw +
115 + ioc->sense_off);
116
117 if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
118 sense, ioc->sense_len)) {
119 diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
120 index fbcb82a..21e2bc4 100644
121 --- a/drivers/scsi/qla2xxx/qla_attr.c
122 +++ b/drivers/scsi/qla2xxx/qla_attr.c
123 @@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
124 fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
125 }
126
127 - if (scsi_add_host(vha->host, &fc_vport->dev)) {
128 + if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
129 + &ha->pdev->dev)) {
130 DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
131 vha->host_no, vha->vp_idx));
132 goto vport_create_failed_2;
133 diff --git a/drivers/scsi/scsi_lib_dma.c b/drivers/scsi/scsi_lib_dma.c
134 index ac6855c..dcd1285 100644
135 --- a/drivers/scsi/scsi_lib_dma.c
136 +++ b/drivers/scsi/scsi_lib_dma.c
137 @@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd)
138 int nseg = 0;
139
140 if (scsi_sg_count(cmd)) {
141 - struct device *dev = cmd->device->host->shost_gendev.parent;
142 + struct device *dev = cmd->device->host->dma_dev;
143
144 nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
145 cmd->sc_data_direction);
146 @@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map);
147 void scsi_dma_unmap(struct scsi_cmnd *cmd)
148 {
149 if (scsi_sg_count(cmd)) {
150 - struct device *dev = cmd->device->host->shost_gendev.parent;
151 + struct device *dev = cmd->device->host->dma_dev;
152
153 dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
154 cmd->sc_data_direction);
155 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
156 index 1d04189..f3032c9 100644
157 --- a/fs/ext4/balloc.c
158 +++ b/fs/ext4/balloc.c
159 @@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
160 static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
161 ext4_group_t group)
162 {
163 - return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
164 + if (!ext4_bg_has_super(sb, group))
165 + return 0;
166 +
167 + if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
168 + return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
169 + else
170 + return EXT4_SB(sb)->s_gdb_count;
171 }
172
173 /**
174 diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
175 index 50784ef..dc79b75 100644
176 --- a/fs/ext4/block_validity.c
177 +++ b/fs/ext4/block_validity.c
178 @@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
179 if (ext4_bg_has_super(sb, i) &&
180 ((i < 5) || ((i % flex_size) == 0)))
181 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
182 - sbi->s_gdb_count + 1);
183 + ext4_bg_num_gdb(sb, i) + 1);
184 gdp = ext4_get_group_desc(sb, i, NULL);
185 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
186 if (ret)
187 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
188 index 8825515..bd2a9dd 100644
189 --- a/fs/ext4/ext4.h
190 +++ b/fs/ext4/ext4.h
191 @@ -703,6 +703,13 @@ struct ext4_inode_info {
192 struct list_head i_aio_dio_complete_list;
193 /* current io_end structure for async DIO write*/
194 ext4_io_end_t *cur_aio_dio;
195 +
196 + /*
197 + * Transactions that contain inode's metadata needed to complete
198 + * fsync and fdatasync, respectively.
199 + */
200 + tid_t i_sync_tid;
201 + tid_t i_datasync_tid;
202 };
203
204 /*
205 @@ -750,6 +757,7 @@ struct ext4_inode_info {
206 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
207 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
208 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
209 +#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
210
211 #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
212 #define set_opt(o, opt) o |= EXT4_MOUNT_##opt
213 diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
214 index a286598..1892a77 100644
215 --- a/fs/ext4/ext4_jbd2.h
216 +++ b/fs/ext4/ext4_jbd2.h
217 @@ -49,7 +49,7 @@
218
219 #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
220 EXT4_XATTR_TRANS_BLOCKS - 2 + \
221 - 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
222 + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
223
224 /*
225 * Define the number of metadata blocks we need to account to modify data.
226 @@ -57,7 +57,7 @@
227 * This include super block, inode block, quota blocks and xattr blocks
228 */
229 #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
230 - 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
231 + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
232
233 /* Delete operations potentially hit one directory's namespace plus an
234 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
235 @@ -92,6 +92,7 @@
236 * but inode, sb and group updates are done only once */
237 #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
238 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
239 +
240 #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
241 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
242 #else
243 @@ -99,6 +100,9 @@
244 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
245 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
246 #endif
247 +#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
248 +#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
249 +#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
250
251 int
252 ext4_mark_iloc_dirty(handle_t *handle,
253 @@ -254,6 +258,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254 return 0;
255 }
256
257 +static inline void ext4_update_inode_fsync_trans(handle_t *handle,
258 + struct inode *inode,
259 + int datasync)
260 +{
261 + struct ext4_inode_info *ei = EXT4_I(inode);
262 +
263 + if (ext4_handle_valid(handle)) {
264 + ei->i_sync_tid = handle->h_transaction->t_tid;
265 + if (datasync)
266 + ei->i_datasync_tid = handle->h_transaction->t_tid;
267 + }
268 +}
269 +
270 /* super.c */
271 int ext4_force_commit(struct super_block *sb);
272
273 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
274 index 715264b..8b8bae4 100644
275 --- a/fs/ext4/extents.c
276 +++ b/fs/ext4/extents.c
277 @@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
278 while (block < last && block != EXT_MAX_BLOCK) {
279 num = last - block;
280 /* find extent for this block */
281 + down_read(&EXT4_I(inode)->i_data_sem);
282 path = ext4_ext_find_extent(inode, block, path);
283 + up_read(&EXT4_I(inode)->i_data_sem);
284 if (IS_ERR(path)) {
285 err = PTR_ERR(path);
286 path = NULL;
287 @@ -2074,7 +2076,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
288 ext_debug("free last %u blocks starting %llu\n", num, start);
289 for (i = 0; i < num; i++) {
290 bh = sb_find_get_block(inode->i_sb, start + i);
291 - ext4_forget(handle, 0, inode, bh, start + i);
292 + ext4_forget(handle, metadata, inode, bh, start + i);
293 }
294 ext4_free_blocks(handle, inode, start, num, metadata);
295 } else if (from == le32_to_cpu(ex->ee_block)
296 @@ -2167,7 +2169,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
297 correct_index = 1;
298 credits += (ext_depth(inode)) + 1;
299 }
300 - credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
301 + credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
302
303 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
304 if (err)
305 @@ -3064,6 +3066,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
306 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
307 ret = ext4_convert_unwritten_extents_dio(handle, inode,
308 path);
309 + if (ret >= 0)
310 + ext4_update_inode_fsync_trans(handle, inode, 1);
311 goto out2;
312 }
313 /* buffered IO case */
314 @@ -3091,6 +3095,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
315 ret = ext4_ext_convert_to_initialized(handle, inode,
316 path, iblock,
317 max_blocks);
318 + if (ret >= 0)
319 + ext4_update_inode_fsync_trans(handle, inode, 1);
320 out:
321 if (ret <= 0) {
322 err = ret;
323 @@ -3329,10 +3335,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
324 allocated = ext4_ext_get_actual_len(&newex);
325 set_buffer_new(bh_result);
326
327 - /* Cache only when it is _not_ an uninitialized extent */
328 - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
329 + /*
330 + * Cache the extent and update transaction to commit on fdatasync only
331 + * when it is _not_ an uninitialized extent.
332 + */
333 + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
334 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
335 EXT4_EXT_CACHE_EXTENT);
336 + ext4_update_inode_fsync_trans(handle, inode, 1);
337 + } else
338 + ext4_update_inode_fsync_trans(handle, inode, 0);
339 out:
340 if (allocated > max_blocks)
341 allocated = max_blocks;
342 @@ -3720,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
343 * Walk the extent tree gathering extent information.
344 * ext4_ext_fiemap_cb will push extents back to user.
345 */
346 - down_read(&EXT4_I(inode)->i_data_sem);
347 error = ext4_ext_walk_space(inode, start_blk, len_blks,
348 ext4_ext_fiemap_cb, fieinfo);
349 - up_read(&EXT4_I(inode)->i_data_sem);
350 }
351
352 return error;
353 diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
354 index 2b15312..d6049e4 100644
355 --- a/fs/ext4/fsync.c
356 +++ b/fs/ext4/fsync.c
357 @@ -51,25 +51,30 @@
358 int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
359 {
360 struct inode *inode = dentry->d_inode;
361 + struct ext4_inode_info *ei = EXT4_I(inode);
362 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
363 - int err, ret = 0;
364 + int ret;
365 + tid_t commit_tid;
366
367 J_ASSERT(ext4_journal_current_handle() == NULL);
368
369 trace_ext4_sync_file(file, dentry, datasync);
370
371 + if (inode->i_sb->s_flags & MS_RDONLY)
372 + return 0;
373 +
374 ret = flush_aio_dio_completed_IO(inode);
375 if (ret < 0)
376 - goto out;
377 + return ret;
378 +
379 + if (!journal)
380 + return simple_fsync(file, dentry, datasync);
381 +
382 /*
383 - * data=writeback:
384 + * data=writeback,ordered:
385 * The caller's filemap_fdatawrite()/wait will sync the data.
386 - * sync_inode() will sync the metadata
387 - *
388 - * data=ordered:
389 - * The caller's filemap_fdatawrite() will write the data and
390 - * sync_inode() will write the inode if it is dirty. Then the caller's
391 - * filemap_fdatawait() will wait on the pages.
392 + * Metadata is in the journal, we wait for proper transaction to
393 + * commit here.
394 *
395 * data=journal:
396 * filemap_fdatawrite won't do anything (the buffers are clean).
397 @@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
398 * (they were dirtied by commit). But that's OK - the blocks are
399 * safe in-journal, which is all fsync() needs to ensure.
400 */
401 - if (ext4_should_journal_data(inode)) {
402 - ret = ext4_force_commit(inode->i_sb);
403 - goto out;
404 - }
405 + if (ext4_should_journal_data(inode))
406 + return ext4_force_commit(inode->i_sb);
407
408 - if (!journal)
409 - ret = sync_mapping_buffers(inode->i_mapping);
410 -
411 - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
412 - goto out;
413 -
414 - /*
415 - * The VFS has written the file data. If the inode is unaltered
416 - * then we need not start a commit.
417 - */
418 - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
419 - struct writeback_control wbc = {
420 - .sync_mode = WB_SYNC_ALL,
421 - .nr_to_write = 0, /* sys_fsync did this */
422 - };
423 - err = sync_inode(inode, &wbc);
424 - if (ret == 0)
425 - ret = err;
426 - }
427 -out:
428 - if (journal && (journal->j_flags & JBD2_BARRIER))
429 + commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
430 + if (jbd2_log_start_commit(journal, commit_tid))
431 + jbd2_log_wait_commit(journal, commit_tid);
432 + else if (journal->j_flags & JBD2_BARRIER)
433 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
434 return ret;
435 }
436 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
437 index 2c8caa5..1dae9a4 100644
438 --- a/fs/ext4/inode.c
439 +++ b/fs/ext4/inode.c
440 @@ -1021,10 +1021,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
441 if (!err)
442 err = ext4_splice_branch(handle, inode, iblock,
443 partial, indirect_blks, count);
444 - else
445 + if (err)
446 goto cleanup;
447
448 set_buffer_new(bh_result);
449 +
450 + ext4_update_inode_fsync_trans(handle, inode, 1);
451 got_it:
452 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
453 if (count > blocks_to_boundary)
454 @@ -1052,7 +1054,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
455 EXT4_I(inode)->i_reserved_meta_blocks;
456 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
457
458 - return total;
459 + return (total << inode->i_blkbits);
460 }
461 /*
462 * Calculate the number of metadata blocks need to reserve
463 @@ -1534,6 +1536,16 @@ static int do_journal_get_write_access(handle_t *handle,
464 return ext4_journal_get_write_access(handle, bh);
465 }
466
467 +/*
468 + * Truncate blocks that were not used by write. We have to truncate the
469 + * pagecache as well so that corresponding buffers get properly unmapped.
470 + */
471 +static void ext4_truncate_failed_write(struct inode *inode)
472 +{
473 + truncate_inode_pages(inode->i_mapping, inode->i_size);
474 + ext4_truncate(inode);
475 +}
476 +
477 static int ext4_write_begin(struct file *file, struct address_space *mapping,
478 loff_t pos, unsigned len, unsigned flags,
479 struct page **pagep, void **fsdata)
480 @@ -1599,7 +1611,7 @@ retry:
481
482 ext4_journal_stop(handle);
483 if (pos + len > inode->i_size) {
484 - ext4_truncate(inode);
485 + ext4_truncate_failed_write(inode);
486 /*
487 * If truncate failed early the inode might
488 * still be on the orphan list; we need to
489 @@ -1709,7 +1721,7 @@ static int ext4_ordered_write_end(struct file *file,
490 ret = ret2;
491
492 if (pos + len > inode->i_size) {
493 - ext4_truncate(inode);
494 + ext4_truncate_failed_write(inode);
495 /*
496 * If truncate failed early the inode might still be
497 * on the orphan list; we need to make sure the inode
498 @@ -1751,7 +1763,7 @@ static int ext4_writeback_write_end(struct file *file,
499 ret = ret2;
500
501 if (pos + len > inode->i_size) {
502 - ext4_truncate(inode);
503 + ext4_truncate_failed_write(inode);
504 /*
505 * If truncate failed early the inode might still be
506 * on the orphan list; we need to make sure the inode
507 @@ -1814,7 +1826,7 @@ static int ext4_journalled_write_end(struct file *file,
508 if (!ret)
509 ret = ret2;
510 if (pos + len > inode->i_size) {
511 - ext4_truncate(inode);
512 + ext4_truncate_failed_write(inode);
513 /*
514 * If truncate failed early the inode might still be
515 * on the orphan list; we need to make sure the inode
516 @@ -2788,7 +2800,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
517 * number of contiguous block. So we will limit
518 * number of contiguous block to a sane value
519 */
520 - if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
521 + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
522 (max_blocks > EXT4_MAX_TRANS_DATA))
523 max_blocks = EXT4_MAX_TRANS_DATA;
524
525 @@ -3091,7 +3103,7 @@ retry:
526 * i_size_read because we hold i_mutex.
527 */
528 if (pos + len > inode->i_size)
529 - ext4_truncate(inode);
530 + ext4_truncate_failed_write(inode);
531 }
532
533 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
534 @@ -4120,6 +4132,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
535 __le32 *last)
536 {
537 __le32 *p;
538 + int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
539 +
540 if (try_to_extend_transaction(handle, inode)) {
541 if (bh) {
542 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
543 @@ -4150,11 +4164,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
544
545 *p = 0;
546 tbh = sb_find_get_block(inode->i_sb, nr);
547 - ext4_forget(handle, 0, inode, tbh, nr);
548 + ext4_forget(handle, is_metadata, inode, tbh, nr);
549 }
550 }
551
552 - ext4_free_blocks(handle, inode, block_to_free, count, 0);
553 + ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
554 }
555
556 /**
557 @@ -4781,8 +4795,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
558 struct ext4_iloc iloc;
559 struct ext4_inode *raw_inode;
560 struct ext4_inode_info *ei;
561 - struct buffer_head *bh;
562 struct inode *inode;
563 + journal_t *journal = EXT4_SB(sb)->s_journal;
564 long ret;
565 int block;
566
567 @@ -4793,11 +4807,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
568 return inode;
569
570 ei = EXT4_I(inode);
571 + iloc.bh = 0;
572
573 ret = __ext4_get_inode_loc(inode, &iloc, 0);
574 if (ret < 0)
575 goto bad_inode;
576 - bh = iloc.bh;
577 raw_inode = ext4_raw_inode(&iloc);
578 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
579 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
580 @@ -4820,7 +4834,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
581 if (inode->i_mode == 0 ||
582 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
583 /* this inode is deleted */
584 - brelse(bh);
585 ret = -ESTALE;
586 goto bad_inode;
587 }
588 @@ -4848,11 +4861,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
589 ei->i_data[block] = raw_inode->i_block[block];
590 INIT_LIST_HEAD(&ei->i_orphan);
591
592 + /*
593 + * Set transaction id's of transactions that have to be committed
594 + * to finish f[data]sync. We set them to currently running transaction
595 + * as we cannot be sure that the inode or some of its metadata isn't
596 + * part of the transaction - the inode could have been reclaimed and
597 + * now it is reread from disk.
598 + */
599 + if (journal) {
600 + transaction_t *transaction;
601 + tid_t tid;
602 +
603 + spin_lock(&journal->j_state_lock);
604 + if (journal->j_running_transaction)
605 + transaction = journal->j_running_transaction;
606 + else
607 + transaction = journal->j_committing_transaction;
608 + if (transaction)
609 + tid = transaction->t_tid;
610 + else
611 + tid = journal->j_commit_sequence;
612 + spin_unlock(&journal->j_state_lock);
613 + ei->i_sync_tid = tid;
614 + ei->i_datasync_tid = tid;
615 + }
616 +
617 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
618 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
619 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
620 EXT4_INODE_SIZE(inode->i_sb)) {
621 - brelse(bh);
622 ret = -EIO;
623 goto bad_inode;
624 }
625 @@ -4884,10 +4921,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
626
627 ret = 0;
628 if (ei->i_file_acl &&
629 - ((ei->i_file_acl <
630 - (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
631 - EXT4_SB(sb)->s_gdb_count)) ||
632 - (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
633 + !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
634 ext4_error(sb, __func__,
635 "bad extended attribute block %llu in inode #%lu",
636 ei->i_file_acl, inode->i_ino);
637 @@ -4905,10 +4939,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
638 /* Validate block references which are part of inode */
639 ret = ext4_check_inode_blockref(inode);
640 }
641 - if (ret) {
642 - brelse(bh);
643 + if (ret)
644 goto bad_inode;
645 - }
646
647 if (S_ISREG(inode->i_mode)) {
648 inode->i_op = &ext4_file_inode_operations;
649 @@ -4936,7 +4968,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
650 init_special_inode(inode, inode->i_mode,
651 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
652 } else {
653 - brelse(bh);
654 ret = -EIO;
655 ext4_error(inode->i_sb, __func__,
656 "bogus i_mode (%o) for inode=%lu",
657 @@ -4949,6 +4980,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
658 return inode;
659
660 bad_inode:
661 + brelse(iloc.bh);
662 iget_failed(inode);
663 return ERR_PTR(ret);
664 }
665 @@ -5108,6 +5140,7 @@ static int ext4_do_update_inode(handle_t *handle,
666 err = rc;
667 ei->i_state &= ~EXT4_STATE_NEW;
668
669 + ext4_update_inode_fsync_trans(handle, inode, 0);
670 out_brelse:
671 brelse(bh);
672 ext4_std_error(inode->i_sb, err);
673 @@ -5227,8 +5260,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
674
675 /* (user+group)*(old+new) structure, inode write (sb,
676 * inode block, ? - but truncate inode update has it) */
677 - handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
678 - EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
679 + handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
680 + EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
681 if (IS_ERR(handle)) {
682 error = PTR_ERR(handle);
683 goto err_out;
684 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
685 index c1cdf61..b63d193 100644
686 --- a/fs/ext4/ioctl.c
687 +++ b/fs/ext4/ioctl.c
688 @@ -221,31 +221,38 @@ setversion_out:
689 struct file *donor_filp;
690 int err;
691
692 + if (!(filp->f_mode & FMODE_READ) ||
693 + !(filp->f_mode & FMODE_WRITE))
694 + return -EBADF;
695 +
696 if (copy_from_user(&me,
697 (struct move_extent __user *)arg, sizeof(me)))
698 return -EFAULT;
699 + me.moved_len = 0;
700
701 donor_filp = fget(me.donor_fd);
702 if (!donor_filp)
703 return -EBADF;
704
705 - if (!capable(CAP_DAC_OVERRIDE)) {
706 - if ((current->real_cred->fsuid != inode->i_uid) ||
707 - !(inode->i_mode & S_IRUSR) ||
708 - !(donor_filp->f_dentry->d_inode->i_mode &
709 - S_IRUSR)) {
710 - fput(donor_filp);
711 - return -EACCES;
712 - }
713 + if (!(donor_filp->f_mode & FMODE_WRITE)) {
714 + err = -EBADF;
715 + goto mext_out;
716 }
717
718 + err = mnt_want_write(filp->f_path.mnt);
719 + if (err)
720 + goto mext_out;
721 +
722 err = ext4_move_extents(filp, donor_filp, me.orig_start,
723 me.donor_start, me.len, &me.moved_len);
724 - fput(donor_filp);
725 + mnt_drop_write(filp->f_path.mnt);
726 + if (me.moved_len > 0)
727 + file_remove_suid(donor_filp);
728
729 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
730 - return -EFAULT;
731 -
732 + err = -EFAULT;
733 +mext_out:
734 + fput(donor_filp);
735 return err;
736 }
737
738 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
739 index bba1282..7d71148 100644
740 --- a/fs/ext4/mballoc.c
741 +++ b/fs/ext4/mballoc.c
742 @@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
743 struct ext4_group_info *db;
744 int err, count = 0, count2 = 0;
745 struct ext4_free_data *entry;
746 - ext4_fsblk_t discard_block;
747 struct list_head *l, *ltmp;
748
749 list_for_each_safe(l, ltmp, &txn->t_private_list) {
750 @@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
751 page_cache_release(e4b.bd_bitmap_page);
752 }
753 ext4_unlock_group(sb, entry->group);
754 - discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
755 - + entry->start_blk
756 - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
757 - trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
758 - entry->count);
759 - sb_issue_discard(sb, discard_block, entry->count);
760 -
761 + if (test_opt(sb, DISCARD)) {
762 + ext4_fsblk_t discard_block;
763 + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
764 +
765 + discard_block = (ext4_fsblk_t)entry->group *
766 + EXT4_BLOCKS_PER_GROUP(sb)
767 + + entry->start_blk
768 + + le32_to_cpu(es->s_first_data_block);
769 + trace_ext4_discard_blocks(sb,
770 + (unsigned long long)discard_block,
771 + entry->count);
772 + sb_issue_discard(sb, discard_block, entry->count);
773 + }
774 kmem_cache_free(ext4_free_ext_cachep, entry);
775 ext4_mb_release_desc(&e4b);
776 }
777 @@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
778 }
779
780 /*
781 + * Called on failure; free up any blocks from the inode PA for this
782 + * context. We don't need this for MB_GROUP_PA because we only change
783 + * pa_free in ext4_mb_release_context(), but on failure, we've already
784 + * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
785 + */
786 +static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
787 +{
788 + struct ext4_prealloc_space *pa = ac->ac_pa;
789 + int len;
790 +
791 + if (pa && pa->pa_type == MB_INODE_PA) {
792 + len = ac->ac_b_ex.fe_len;
793 + pa->pa_free += len;
794 + }
795 +
796 +}
797 +
798 +/*
799 * use blocks preallocated to inode
800 */
801 static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
802 @@ -4290,6 +4313,7 @@ repeat:
803 ac->ac_status = AC_STATUS_CONTINUE;
804 goto repeat;
805 } else if (*errp) {
806 + ext4_discard_allocated_blocks(ac);
807 ac->ac_b_ex.fe_len = 0;
808 ar->len = 0;
809 ext4_mb_show_ac(ac);
810 diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
811 index a93d5b8..8646149 100644
812 --- a/fs/ext4/migrate.c
813 +++ b/fs/ext4/migrate.c
814 @@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
815 * So allocate a credit of 3. We may update
816 * quota (user and group).
817 */
818 - needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
819 + needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
820
821 if (ext4_journal_extend(handle, needed) != 0)
822 retval = ext4_journal_restart(handle, needed);
823 @@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode)
824 handle = ext4_journal_start(inode,
825 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
826 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
827 - 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
828 + EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
829 + 1);
830 if (IS_ERR(handle)) {
831 retval = PTR_ERR(handle);
832 diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
833 index 25b6b14..f5b03a1 100644
834 --- a/fs/ext4/move_extent.c
835 +++ b/fs/ext4/move_extent.c
836 @@ -77,12 +77,14 @@ static int
837 mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
838 struct ext4_extent **extent)
839 {
840 + struct ext4_extent_header *eh;
841 int ppos, leaf_ppos = path->p_depth;
842
843 ppos = leaf_ppos;
844 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
845 /* leaf block */
846 *extent = ++path[ppos].p_ext;
847 + path[ppos].p_block = ext_pblock(path[ppos].p_ext);
848 return 0;
849 }
850
851 @@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
852 ext_block_hdr(path[cur_ppos+1].p_bh);
853 }
854
855 + path[leaf_ppos].p_ext = *extent = NULL;
856 +
857 + eh = path[leaf_ppos].p_hdr;
858 + if (le16_to_cpu(eh->eh_entries) == 0)
859 + /* empty leaf is found */
860 + return -ENODATA;
861 +
862 /* leaf block */
863 path[leaf_ppos].p_ext = *extent =
864 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
865 + path[leaf_ppos].p_block =
866 + ext_pblock(path[leaf_ppos].p_ext);
867 return 0;
868 }
869 }
870 @@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
871 }
872
873 /**
874 - * mext_double_down_read - Acquire two inodes' read semaphore
875 - *
876 - * @orig_inode: original inode structure
877 - * @donor_inode: donor inode structure
878 - * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
879 - */
880 -static void
881 -mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
882 -{
883 - struct inode *first = orig_inode, *second = donor_inode;
884 -
885 - /*
886 - * Use the inode number to provide the stable locking order instead
887 - * of its address, because the C language doesn't guarantee you can
888 - * compare pointers that don't come from the same array.
889 - */
890 - if (donor_inode->i_ino < orig_inode->i_ino) {
891 - first = donor_inode;
892 - second = orig_inode;
893 - }
894 -
895 - down_read(&EXT4_I(first)->i_data_sem);
896 - down_read(&EXT4_I(second)->i_data_sem);
897 -}
898 -
899 -/**
900 - * mext_double_down_write - Acquire two inodes' write semaphore
901 + * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
902 *
903 * @orig_inode: original inode structure
904 * @donor_inode: donor inode structure
905 - * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
906 + * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
907 + * i_ino order.
908 */
909 static void
910 -mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
911 +double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
912 {
913 struct inode *first = orig_inode, *second = donor_inode;
914
915 @@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
916 }
917
918 down_write(&EXT4_I(first)->i_data_sem);
919 - down_write(&EXT4_I(second)->i_data_sem);
920 + down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
921 }
922
923 /**
924 - * mext_double_up_read - Release two inodes' read semaphore
925 + * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
926 *
927 * @orig_inode: original inode structure to be released its lock first
928 * @donor_inode: donor inode structure to be released its lock second
929 - * Release read semaphore of two inodes (orig and donor).
930 + * Release write lock of i_data_sem of two inodes (orig and donor).
931 */
932 static void
933 -mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
934 -{
935 - up_read(&EXT4_I(orig_inode)->i_data_sem);
936 - up_read(&EXT4_I(donor_inode)->i_data_sem);
937 -}
938 -
939 -/**
940 - * mext_double_up_write - Release two inodes' write semaphore
941 - *
942 - * @orig_inode: original inode structure to be released its lock first
943 - * @donor_inode: donor inode structure to be released its lock second
944 - * Release write semaphore of two inodes (orig and donor).
945 - */
946 -static void
947 -mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
948 +double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
949 {
950 up_write(&EXT4_I(orig_inode)->i_data_sem);
951 up_write(&EXT4_I(donor_inode)->i_data_sem);
952 @@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
953 * @donor_inode: donor inode
954 * @from: block offset of orig_inode
955 * @count: block count to be replaced
956 + * @err: pointer to save return value
957 *
958 * Replace original inode extents and donor inode extents page by page.
959 * We implement this replacement in the following three steps:
960 @@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
961 * 3. Change the block information of donor inode to point at the saved
962 * original inode blocks in the dummy extents.
963 *
964 - * Return 0 on success, or a negative error value on failure.
965 + * Return replaced block count.
966 */
967 static int
968 mext_replace_branches(handle_t *handle, struct inode *orig_inode,
969 struct inode *donor_inode, ext4_lblk_t from,
970 - ext4_lblk_t count)
971 + ext4_lblk_t count, int *err)
972 {
973 struct ext4_ext_path *orig_path = NULL;
974 struct ext4_ext_path *donor_path = NULL;
975 struct ext4_extent *oext, *dext;
976 struct ext4_extent tmp_dext, tmp_oext;
977 ext4_lblk_t orig_off = from, donor_off = from;
978 - int err = 0;
979 int depth;
980 int replaced_count = 0;
981 int dext_alen;
982
983 - mext_double_down_write(orig_inode, donor_inode);
984 + /* Protect extent trees against block allocations via delalloc */
985 + double_down_write_data_sem(orig_inode, donor_inode);
986
987 /* Get the original extent for the block "orig_off" */
988 - err = get_ext_path(orig_inode, orig_off, &orig_path);
989 - if (err)
990 + *err = get_ext_path(orig_inode, orig_off, &orig_path);
991 + if (*err)
992 goto out;
993
994 /* Get the donor extent for the head */
995 - err = get_ext_path(donor_inode, donor_off, &donor_path);
996 - if (err)
997 + *err = get_ext_path(donor_inode, donor_off, &donor_path);
998 + if (*err)
999 goto out;
1000 depth = ext_depth(orig_inode);
1001 oext = orig_path[depth].p_ext;
1002 @@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
1003 dext = donor_path[depth].p_ext;
1004 tmp_dext = *dext;
1005
1006 - err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
1007 + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
1008 donor_off, count);
1009 - if (err)
1010 + if (*err)
1011 goto out;
1012
1013 /* Loop for the donor extents */
1014 @@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
1015 if (!dext) {
1016 ext4_error(donor_inode->i_sb, __func__,
1017 "The extent for donor must be found");
1018 - err = -EIO;
1019 + *err = -EIO;
1020 goto out;
1021 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
1022 ext4_error(donor_inode->i_sb, __func__,
1023 @@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
1024 "extent(%u) should be equal",
1025 donor_off,
1026 le32_to_cpu(tmp_dext.ee_block));
1027 - err = -EIO;
1028 + *err = -EIO;
1029 goto out;
1030 }
1031
1032 /* Set donor extent to orig extent */
1033 - err = mext_leaf_block(handle, orig_inode,
1034 + *err = mext_leaf_block(handle, orig_inode,
1035 orig_path, &tmp_dext, &orig_off);
1036 - if (err < 0)
1037 + if (*err)
1038 goto out;
1039
1040 /* Set orig extent to donor extent */
1041 - err = mext_leaf_block(handle, donor_inode,
1042 + *err = mext_leaf_block(handle, donor_inode,
1043 donor_path, &tmp_oext, &donor_off);
1044 - if (err < 0)
1045 + if (*err)
1046 goto out;
1047
1048 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
1049 @@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
1050
1051 if (orig_path)
1052 ext4_ext_drop_refs(orig_path);
1053 - err = get_ext_path(orig_inode, orig_off, &orig_path);
1054 - if (err)
1055 + *err = get_ext_path(orig_inode, orig_off, &orig_path);
1056 + if (*err)
1057 goto out;
1058 depth = ext_depth(orig_inode);
1059 oext = orig_path[depth].p_ext;
1060 - if (le32_to_cpu(oext->ee_block) +
1061 - ext4_ext_get_actual_len(oext) <= orig_off) {
1062 - err = 0;
1063 - goto out;
1064 - }
1065 tmp_oext = *oext;
1066
1067 if (donor_path)
1068 ext4_ext_drop_refs(donor_path);
1069 - err = get_ext_path(donor_inode, donor_off, &donor_path);
1070 - if (err)
1071 + *err = get_ext_path(donor_inode, donor_off, &donor_path);
1072 + if (*err)
1073 goto out;
1074 depth = ext_depth(donor_inode);
1075 dext = donor_path[depth].p_ext;
1076 - if (le32_to_cpu(dext->ee_block) +
1077 - ext4_ext_get_actual_len(dext) <= donor_off) {
1078 - err = 0;
1079 - goto out;
1080 - }
1081 tmp_dext = *dext;
1082
1083 - err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
1084 + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
1085 donor_off, count - replaced_count);
1086 - if (err)
1087 + if (*err)
1088 goto out;
1089 }
1090
1091 @@ -795,8 +758,12 @@ out:
1092 kfree(donor_path);
1093 }
1094
1095 - mext_double_up_write(orig_inode, donor_inode);
1096 - return err;
1097 + ext4_ext_invalidate_cache(orig_inode);
1098 + ext4_ext_invalidate_cache(donor_inode);
1099 +
1100 + double_up_write_data_sem(orig_inode, donor_inode);
1101 +
1102 + return replaced_count;
1103 }
1104
1105 /**
1106 @@ -808,16 +775,17 @@ out:
1107 * @data_offset_in_page: block index where data swapping starts
1108 * @block_len_in_page: the number of blocks to be swapped
1109 * @uninit: orig extent is uninitialized or not
1110 + * @err: pointer to save return value
1111 *
1112 * Save the data in original inode blocks and replace original inode extents
1113 * with donor inode extents by calling mext_replace_branches().
1114 - * Finally, write out the saved data in new original inode blocks. Return 0
1115 - * on success, or a negative error value on failure.
1116 + * Finally, write out the saved data in new original inode blocks. Return
1117 + * replaced block count.
1118 */
1119 static int
1120 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
1121 pgoff_t orig_page_offset, int data_offset_in_page,
1122 - int block_len_in_page, int uninit)
1123 + int block_len_in_page, int uninit, int *err)
1124 {
1125 struct inode *orig_inode = o_filp->f_dentry->d_inode;
1126 struct address_space *mapping = orig_inode->i_mapping;
1127 @@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
1128 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
1129 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
1130 unsigned int w_flags = 0;
1131 - unsigned int tmp_data_len, data_len;
1132 + unsigned int tmp_data_size, data_size, replaced_size;
1133 void *fsdata;
1134 - int ret, i, jblocks;
1135 + int i, jblocks;
1136 + int err2 = 0;
1137 + int replaced_count = 0;
1138 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1139
1140 /*
1141 @@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
1142 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
1143 handle = ext4_journal_start(orig_inode, jblocks);
1144 if (IS_ERR(handle)) {
1145 - ret = PTR_ERR(handle);
1146 - return ret;
1147 + *err = PTR_ERR(handle);
1148 + return 0;
1149 }
1150
1151 if (segment_eq(get_fs(), KERNEL_DS))
1152 @@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
1153 * Just swap data blocks between orig and donor.
1154 */
1155 if (uninit) {
1156 - ret = mext_replace_branches(handle, orig_inode,
1157 - donor_inode, orig_blk_offset,
1158 - block_len_in_page);
1159 -
1160 - /* Clear the inode cache not to refer to the old data */
1161 - ext4_ext_invalidate_cache(orig_inode);
1162 - ext4_ext_invalidate_cache(donor_inode);
1163 + replaced_count = mext_replace_branches(handle, orig_inode,
1164 + donor_inode, orig_blk_offset,
1165 + block_len_in_page, err);
1166 goto out2;
1167 }
1168
1169 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
1170
1171 - /* Calculate data_len */
1172 + /* Calculate data_size */
1173 if ((orig_blk_offset + block_len_in_page - 1) ==
1174 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
1175 /* Replace the last block */
1176 - tmp_data_len = orig_inode->i_size & (blocksize - 1);
1177 + tmp_data_size = orig_inode->i_size & (blocksize - 1);
1178 /*
1179 - * If data_len equal zero, it shows data_len is multiples of
1180 + * If data_size equal zero, it shows data_size is multiples of
1181 * blocksize. So we set appropriate value.
1182 */
1183 - if (tmp_data_len == 0)
1184 - tmp_data_len = blocksize;
1185 + if (tmp_data_size == 0)
1186 + tmp_data_size = blocksize;
1187
1188 - data_len = tmp_data_len +
1189 + data_size = tmp_data_size +
1190 ((block_len_in_page - 1) << orig_inode->i_blkbits);
1191 - } else {
1192 - data_len = block_len_in_page << orig_inode->i_blkbits;
1193 - }
1194 + } else
1195 + data_size = block_len_in_page << orig_inode->i_blkbits;
1196 +
1197 + replaced_size = data_size;
1198
1199 - ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
1200 + *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
1201 &page, &fsdata);
1202 - if (unlikely(ret < 0))
1203 + if (unlikely(*err < 0))
1204 goto out;
1205
1206 if (!PageUptodate(page)) {
1207 @@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
1208 /* Release old bh and drop refs */
1209 try_to_release_page(page, 0);
1210
1211 - ret = mext_replace_branches(handle, orig_inode, donor_inode,
1212 - orig_blk_offset, block_len_in_page);
1213 - if (ret < 0)
1214 - goto out;
1215 -
1216 - /* Clear the inode cache not to refer to the old data */
1217 - ext4_ext_invalidate_cache(orig_inode);
1218 - ext4_ext_invalidate_cache(donor_inode);
1219 + replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
1220 + orig_blk_offset, block_len_in_page,
1221 + &err2);
1222 + if (err2) {
1223 + if (replaced_count) {
1224 + block_len_in_page = replaced_count;
1225 + replaced_size =
1226 + block_len_in_page << orig_inode->i_blkbits;
1227 + } else
1228 + goto out;
1229 + }
1230
1231 if (!page_has_buffers(page))
1232 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
1233 @@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
1234 bh = bh->b_this_page;
1235
1236 for (i = 0; i < block_len_in_page; i++) {
1237 - ret = ext4_get_block(orig_inode,
1238 + *err = ext4_get_block(orig_inode,
1239 (sector_t)(orig_blk_offset + i), bh, 0);
1240 - if (ret < 0)
1241 + if (*err < 0)
1242 goto out;
1243
1244 if (bh->b_this_page != NULL)
1245 bh = bh->b_this_page;
1246 }
1247
1248 - ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
1249 + *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
1250 page, fsdata);
1251 page = NULL;
1252
1253 @@ -951,7 +921,10 @@ out:
1254 out2:
1255 ext4_journal_stop(handle);
1256
1257 - return ret < 0 ? ret : 0;
1258 + if (err2)
1259 + *err = err2;
1260 +
1261 + return replaced_count;
1262 }
1263
1264 /**
1265 @@ -962,7 +935,6 @@ out2:
1266 * @orig_start: logical start offset in block for orig
1267 * @donor_start: logical start offset in block for donor
1268 * @len: the number of blocks to be moved
1269 - * @moved_len: moved block length
1270 *
1271 * Check the arguments of ext4_move_extents() whether the files can be
1272 * exchanged with each other.
1273 @@ -970,8 +942,8 @@ out2:
1274 */
1275 static int
1276 mext_check_arguments(struct inode *orig_inode,
1277 - struct inode *donor_inode, __u64 orig_start,
1278 - __u64 donor_start, __u64 *len, __u64 moved_len)
1279 + struct inode *donor_inode, __u64 orig_start,
1280 + __u64 donor_start, __u64 *len)
1281 {
1282 ext4_lblk_t orig_blocks, donor_blocks;
1283 unsigned int blkbits = orig_inode->i_blkbits;
1284 @@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
1285 return -EINVAL;
1286 }
1287
1288 + if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
1289 + ext4_debug("ext4 move extent: suid or sgid is set"
1290 + " to donor file [ino:orig %lu, donor %lu]\n",
1291 + orig_inode->i_ino, donor_inode->i_ino);
1292 + return -EINVAL;
1293 + }
1294 +
1295 /* Ext4 move extent does not support swapfile */
1296 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
1297 ext4_debug("ext4 move extent: The argument files should "
1298 @@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
1299 return -EINVAL;
1300 }
1301
1302 - if (moved_len) {
1303 - ext4_debug("ext4 move extent: moved_len should be 0 "
1304 - "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1305 - donor_inode->i_ino);
1306 - return -EINVAL;
1307 - }
1308 -
1309 if ((orig_start > EXT_MAX_BLOCK) ||
1310 (donor_start > EXT_MAX_BLOCK) ||
1311 (*len > EXT_MAX_BLOCK) ||
1312 @@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1313 return -EINVAL;
1314 }
1315
1316 - /* protect orig and donor against a truncate */
1317 + /* Protect orig and donor inodes against a truncate */
1318 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1319 if (ret1 < 0)
1320 return ret1;
1321
1322 - mext_double_down_read(orig_inode, donor_inode);
1323 + /* Protect extent tree against block allocations via delalloc */
1324 + double_down_write_data_sem(orig_inode, donor_inode);
1325 /* Check the filesystem environment whether move_extent can be done */
1326 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1327 - donor_start, &len, *moved_len);
1328 - mext_double_up_read(orig_inode, donor_inode);
1329 + donor_start, &len);
1330 if (ret1)
1331 goto out;
1332
1333 @@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1334 seq_start = le32_to_cpu(ext_cur->ee_block);
1335 rest_blocks = seq_blocks;
1336
1337 - /* Discard preallocations of two inodes */
1338 - down_write(&EXT4_I(orig_inode)->i_data_sem);
1339 - ext4_discard_preallocations(orig_inode);
1340 - up_write(&EXT4_I(orig_inode)->i_data_sem);
1341 -
1342 - down_write(&EXT4_I(donor_inode)->i_data_sem);
1343 - ext4_discard_preallocations(donor_inode);
1344 - up_write(&EXT4_I(donor_inode)->i_data_sem);
1345 + /*
1346 + * Up semaphore to avoid following problems:
1347 + * a. transaction deadlock among ext4_journal_start,
1348 + * ->write_begin via pagefault, and jbd2_journal_commit
1349 + * b. racing with ->readpage, ->write_begin, and ext4_get_block
1350 + * in move_extent_per_page
1351 + */
1352 + double_up_write_data_sem(orig_inode, donor_inode);
1353
1354 while (orig_page_offset <= seq_end_page) {
1355
1356 /* Swap original branches with new branches */
1357 - ret1 = move_extent_per_page(o_filp, donor_inode,
1358 + block_len_in_page = move_extent_per_page(
1359 + o_filp, donor_inode,
1360 orig_page_offset,
1361 data_offset_in_page,
1362 - block_len_in_page, uninit);
1363 - if (ret1 < 0)
1364 - goto out;
1365 - orig_page_offset++;
1366 + block_len_in_page, uninit,
1367 + &ret1);
1368 +
1369 /* Count how many blocks we have exchanged */
1370 *moved_len += block_len_in_page;
1371 + if (ret1 < 0)
1372 + break;
1373 if (*moved_len > len) {
1374 ext4_error(orig_inode->i_sb, __func__,
1375 "We replaced blocks too much! "
1376 "sum of replaced: %llu requested: %llu",
1377 *moved_len, len);
1378 ret1 = -EIO;
1379 - goto out;
1380 + break;
1381 }
1382
1383 + orig_page_offset++;
1384 data_offset_in_page = 0;
1385 rest_blocks -= block_len_in_page;
1386 if (rest_blocks > blocks_per_page)
1387 @@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1388 block_len_in_page = rest_blocks;
1389 }
1390
1391 + double_down_write_data_sem(orig_inode, donor_inode);
1392 + if (ret1 < 0)
1393 + break;
1394 +
1395 /* Decrease buffer counter */
1396 if (holecheck_path)
1397 ext4_ext_drop_refs(holecheck_path);
1398 @@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1399
1400 }
1401 out:
1402 + if (*moved_len) {
1403 + ext4_discard_preallocations(orig_inode);
1404 + ext4_discard_preallocations(donor_inode);
1405 + }
1406 +
1407 if (orig_path) {
1408 ext4_ext_drop_refs(orig_path);
1409 kfree(orig_path);
1410 @@ -1422,7 +1406,7 @@ out:
1411 ext4_ext_drop_refs(holecheck_path);
1412 kfree(holecheck_path);
1413 }
1414 -
1415 + double_up_write_data_sem(orig_inode, donor_inode);
1416 ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1417
1418 if (ret1)
1419 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
1420 index 6d2c1b8..17a17e1 100644
1421 --- a/fs/ext4/namei.c
1422 +++ b/fs/ext4/namei.c
1423 @@ -1292,9 +1292,6 @@ errout:
1424 * add_dirent_to_buf will attempt search the directory block for
1425 * space. It will return -ENOSPC if no space is available, and -EIO
1426 * and -EEXIST if directory entry already exists.
1427 - *
1428 - * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1429 - * all other cases bh is released.
1430 */
1431 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1432 struct inode *inode, struct ext4_dir_entry_2 *de,
1433 @@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1434 top = bh->b_data + blocksize - reclen;
1435 while ((char *) de <= top) {
1436 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1437 - bh, offset)) {
1438 - brelse(bh);
1439 + bh, offset))
1440 return -EIO;
1441 - }
1442 - if (ext4_match(namelen, name, de)) {
1443 - brelse(bh);
1444 + if (ext4_match(namelen, name, de))
1445 return -EEXIST;
1446 - }
1447 nlen = EXT4_DIR_REC_LEN(de->name_len);
1448 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1449 if ((de->inode? rlen - nlen: rlen) >= reclen)
1450 @@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1451 err = ext4_journal_get_write_access(handle, bh);
1452 if (err) {
1453 ext4_std_error(dir->i_sb, err);
1454 - brelse(bh);
1455 return err;
1456 }
1457
1458 @@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1459 err = ext4_handle_dirty_metadata(handle, dir, bh);
1460 if (err)
1461 ext4_std_error(dir->i_sb, err);
1462 - brelse(bh);
1463 return 0;
1464 }
1465
1466 @@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1467 if (!(de))
1468 return retval;
1469
1470 - return add_dirent_to_buf(handle, dentry, inode, de, bh);
1471 + retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1472 + brelse(bh);
1473 + return retval;
1474 }
1475
1476 /*
1477 @@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1478 if(!bh)
1479 return retval;
1480 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1481 - if (retval != -ENOSPC)
1482 + if (retval != -ENOSPC) {
1483 + brelse(bh);
1484 return retval;
1485 + }
1486
1487 if (blocks == 1 && !dx_fallback &&
1488 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1489 @@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1490 de = (struct ext4_dir_entry_2 *) bh->b_data;
1491 de->inode = 0;
1492 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1493 - return add_dirent_to_buf(handle, dentry, inode, de, bh);
1494 + retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1495 + brelse(bh);
1496 + return retval;
1497 }
1498
1499 /*
1500 @@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1501 goto journal_error;
1502
1503 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1504 - if (err != -ENOSPC) {
1505 - bh = NULL;
1506 + if (err != -ENOSPC)
1507 goto cleanup;
1508 - }
1509
1510 /* Block full, should compress but for now just split */
1511 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1512 @@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1513 if (!de)
1514 goto cleanup;
1515 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1516 - bh = NULL;
1517 goto cleanup;
1518
1519 journal_error:
1520 @@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1521 retry:
1522 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1523 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1524 - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1525 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1526 if (IS_ERR(handle))
1527 return PTR_ERR(handle);
1528
1529 @@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1530 retry:
1531 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1532 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1533 - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1534 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1535 if (IS_ERR(handle))
1536 return PTR_ERR(handle);
1537
1538 @@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1539 retry:
1540 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1541 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1542 - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1543 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1544 if (IS_ERR(handle))
1545 return PTR_ERR(handle);
1546
1547 @@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
1548 retry:
1549 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1550 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
1551 - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
1552 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1553 if (IS_ERR(handle))
1554 return PTR_ERR(handle);
1555
1556 diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
1557 index 3cfc343..3b2c554 100644
1558 --- a/fs/ext4/resize.c
1559 +++ b/fs/ext4/resize.c
1560 @@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
1561 goto exit_bh;
1562
1563 if (IS_ERR(gdb = bclean(handle, sb, block))) {
1564 - err = PTR_ERR(bh);
1565 + err = PTR_ERR(gdb);
1566 goto exit_bh;
1567 }
1568 ext4_handle_dirty_metadata(handle, NULL, gdb);
1569 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
1570 index d4ca92a..9ae5217 100644
1571 --- a/fs/ext4/super.c
1572 +++ b/fs/ext4/super.c
1573 @@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
1574 if (sb->s_dirt)
1575 ext4_commit_super(sb, 1);
1576
1577 - ext4_release_system_zone(sb);
1578 - ext4_mb_release(sb);
1579 - ext4_ext_release(sb);
1580 - ext4_xattr_put_super(sb);
1581 if (sbi->s_journal) {
1582 err = jbd2_journal_destroy(sbi->s_journal);
1583 sbi->s_journal = NULL;
1584 @@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
1585 ext4_abort(sb, __func__,
1586 "Couldn't clean up the journal");
1587 }
1588 +
1589 + ext4_release_system_zone(sb);
1590 + ext4_mb_release(sb);
1591 + ext4_ext_release(sb);
1592 + ext4_xattr_put_super(sb);
1593 +
1594 if (!(sb->s_flags & MS_RDONLY)) {
1595 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1596 es->s_state = cpu_to_le16(sbi->s_mount_state);
1597 @@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
1598 spin_lock_init(&(ei->i_block_reservation_lock));
1599 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
1600 ei->cur_aio_dio = NULL;
1601 + ei->i_sync_tid = 0;
1602 + ei->i_datasync_tid = 0;
1603
1604 return &ei->vfs_inode;
1605 }
1606 @@ -899,6 +903,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1607 if (test_opt(sb, NO_AUTO_DA_ALLOC))
1608 seq_puts(seq, ",noauto_da_alloc");
1609
1610 + if (test_opt(sb, DISCARD))
1611 + seq_puts(seq, ",discard");
1612 +
1613 + if (test_opt(sb, NOLOAD))
1614 + seq_puts(seq, ",norecovery");
1615 +
1616 ext4_show_quota_options(seq, sb);
1617
1618 return 0;
1619 @@ -1079,7 +1089,8 @@ enum {
1620 Opt_usrquota, Opt_grpquota, Opt_i_version,
1621 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1622 Opt_block_validity, Opt_noblock_validity,
1623 - Opt_inode_readahead_blks, Opt_journal_ioprio
1624 + Opt_inode_readahead_blks, Opt_journal_ioprio,
1625 + Opt_discard, Opt_nodiscard,
1626 };
1627
1628 static const match_table_t tokens = {
1629 @@ -1104,6 +1115,7 @@ static const match_table_t tokens = {
1630 {Opt_acl, "acl"},
1631 {Opt_noacl, "noacl"},
1632 {Opt_noload, "noload"},
1633 + {Opt_noload, "norecovery"},
1634 {Opt_nobh, "nobh"},
1635 {Opt_bh, "bh"},
1636 {Opt_commit, "commit=%u"},
1637 @@ -1144,6 +1156,8 @@ static const match_table_t tokens = {
1638 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1639 {Opt_auto_da_alloc, "auto_da_alloc"},
1640 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1641 + {Opt_discard, "discard"},
1642 + {Opt_nodiscard, "nodiscard"},
1643 {Opt_err, NULL},
1644 };
1645
1646 @@ -1565,6 +1579,12 @@ set_qf_format:
1647 else
1648 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1649 break;
1650 + case Opt_discard:
1651 + set_opt(sbi->s_mount_opt, DISCARD);
1652 + break;
1653 + case Opt_nodiscard:
1654 + clear_opt(sbi->s_mount_opt, DISCARD);
1655 + break;
1656 default:
1657 ext4_msg(sb, KERN_ERR,
1658 "Unrecognized mount option \"%s\" "
1659 @@ -1673,14 +1693,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
1660 size_t size;
1661 int i;
1662
1663 - if (!sbi->s_es->s_log_groups_per_flex) {
1664 + sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1665 + groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1666 +
1667 + if (groups_per_flex < 2) {
1668 sbi->s_log_groups_per_flex = 0;
1669 return 1;
1670 }
1671
1672 - sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1673 - groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1674 -
1675 /* We allocate both existing and potentially added groups */
1676 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1677 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1678 @@ -3668,13 +3688,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
1679 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
1680 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
1681 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
1682 - ext4_free_blocks_count_set(es, buf->f_bfree);
1683 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
1684 if (buf->f_bfree < ext4_r_blocks_count(es))
1685 buf->f_bavail = 0;
1686 buf->f_files = le32_to_cpu(es->s_inodes_count);
1687 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
1688 - es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
1689 buf->f_namelen = EXT4_NAME_LEN;
1690 fsid = le64_to_cpup((void *)es->s_uuid) ^
1691 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
1692 diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
1693 index fed5b01..0257019 100644
1694 --- a/fs/ext4/xattr.c
1695 +++ b/fs/ext4/xattr.c
1696 @@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1697 if (error)
1698 goto cleanup;
1699
1700 + error = ext4_journal_get_write_access(handle, is.iloc.bh);
1701 + if (error)
1702 + goto cleanup;
1703 +
1704 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
1705 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
1706 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1707 @@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1708 if (flags & XATTR_CREATE)
1709 goto cleanup;
1710 }
1711 - error = ext4_journal_get_write_access(handle, is.iloc.bh);
1712 - if (error)
1713 - goto cleanup;
1714 if (!value) {
1715 if (!is.s.not_found)
1716 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1717 diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
1718 index d4cfd6d..8896c1d 100644
1719 --- a/fs/jbd2/commit.c
1720 +++ b/fs/jbd2/commit.c
1721 @@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
1722 JBUFFER_TRACE(jh, "ph3: write metadata");
1723 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
1724 jh, &new_jh, blocknr);
1725 + if (flags < 0) {
1726 + jbd2_journal_abort(journal, flags);
1727 + continue;
1728 + }
1729 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
1730 wbuf[bufs++] = jh2bh(new_jh);
1731
1732 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
1733 index fed8538..82c295d 100644
1734 --- a/fs/jbd2/journal.c
1735 +++ b/fs/jbd2/journal.c
1736 @@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
1737 EXPORT_SYMBOL(jbd2_journal_ack_err);
1738 EXPORT_SYMBOL(jbd2_journal_clear_err);
1739 EXPORT_SYMBOL(jbd2_log_wait_commit);
1740 +EXPORT_SYMBOL(jbd2_log_start_commit);
1741 EXPORT_SYMBOL(jbd2_journal_start_commit);
1742 EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
1743 EXPORT_SYMBOL(jbd2_journal_wipe);
1744 @@ -358,6 +359,10 @@ repeat:
1745
1746 jbd_unlock_bh_state(bh_in);
1747 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
1748 + if (!tmp) {
1749 + jbd2_journal_put_journal_head(new_jh);
1750 + return -ENOMEM;
1751 + }
1752 jbd_lock_bh_state(bh_in);
1753 if (jh_in->b_frozen_data) {
1754 jbd2_free(tmp, bh_in->b_size);
1755 diff --git a/include/linux/sched.h b/include/linux/sched.h
1756 index 75e6e60..0f67914 100644
1757 --- a/include/linux/sched.h
1758 +++ b/include/linux/sched.h
1759 @@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info)
1760 return info <= SEND_SIG_FORCED;
1761 }
1762
1763 -/* True if we are on the alternate signal stack. */
1764 -
1765 +/*
1766 + * True if we are on the alternate signal stack.
1767 + */
1768 static inline int on_sig_stack(unsigned long sp)
1769 {
1770 - return (sp - current->sas_ss_sp < current->sas_ss_size);
1771 +#ifdef CONFIG_STACK_GROWSUP
1772 + return sp >= current->sas_ss_sp &&
1773 + sp - current->sas_ss_sp < current->sas_ss_size;
1774 +#else
1775 + return sp > current->sas_ss_sp &&
1776 + sp - current->sas_ss_sp <= current->sas_ss_size;
1777 +#endif
1778 }
1779
1780 static inline int sas_ss_flags(unsigned long sp)
1781 diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
1782 index 2cc8e8b..6856612 100644
1783 --- a/include/scsi/osd_protocol.h
1784 +++ b/include/scsi/osd_protocol.h
1785 @@ -17,6 +17,7 @@
1786 #define __OSD_PROTOCOL_H__
1787
1788 #include <linux/types.h>
1789 +#include <linux/kernel.h>
1790 #include <asm/unaligned.h>
1791 #include <scsi/scsi.h>
1792
1793 diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
1794 index 47941fc..0b4baba 100644
1795 --- a/include/scsi/scsi_host.h
1796 +++ b/include/scsi/scsi_host.h
1797 @@ -677,6 +677,12 @@ struct Scsi_Host {
1798 void *shost_data;
1799
1800 /*
1801 + * Points to the physical bus device we'd use to do DMA
1802 + * Needed just in case we have virtual hosts.
1803 + */
1804 + struct device *dma_dev;
1805 +
1806 + /*
1807 * We should ensure that this is aligned, both for better performance
1808 * and also because some compilers (m68k) don't automatically force
1809 * alignment to a long boundary.
1810 @@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
1811 extern void scsi_flush_work(struct Scsi_Host *);
1812
1813 extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
1814 -extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
1815 +extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
1816 + struct device *,
1817 + struct device *);
1818 extern void scsi_scan_host(struct Scsi_Host *);
1819 extern void scsi_rescan_device(struct device *);
1820 extern void scsi_remove_host(struct Scsi_Host *);
1821 @@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(enum scsi_host_state);
1822
1823 extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
1824
1825 +static inline int __must_check scsi_add_host(struct Scsi_Host *host,
1826 + struct device *dev)
1827 +{
1828 + return scsi_add_host_with_dma(host, dev, dev);
1829 +}
1830 +
1831 static inline struct device *scsi_get_device(struct Scsi_Host *shost)
1832 {
1833 return shost->shost_gendev.parent;

  ViewVC Help
Powered by ViewVC 1.1.20