| 1 |
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
|
| 2 |
index 6d94e06..af6885c 100644
|
| 3 |
--- a/Documentation/filesystems/ext4.txt
|
| 4 |
+++ b/Documentation/filesystems/ext4.txt
|
| 5 |
@@ -153,8 +153,8 @@ journal_dev=devnum When the external journal device's major/minor numbers
|
| 6 |
identified through its new major/minor numbers encoded
|
| 7 |
in devnum.
|
| 8 |
|
| 9 |
-noload Don't load the journal on mounting. Note that
|
| 10 |
- if the filesystem was not unmounted cleanly,
|
| 11 |
+norecovery Don't load the journal on mounting. Note that
|
| 12 |
+noload if the filesystem was not unmounted cleanly,
|
| 13 |
skipping the journal replay will lead to the
|
| 14 |
filesystem containing inconsistencies that can
|
| 15 |
lead to any number of problems.
|
| 16 |
@@ -353,6 +353,12 @@ noauto_da_alloc replacing existing files via patterns such as
|
| 17 |
system crashes before the delayed allocation
|
| 18 |
blocks are forced to disk.
|
| 19 |
|
| 20 |
+discard Controls whether ext4 should issue discard/TRIM
|
| 21 |
+nodiscard(*) commands to the underlying block device when
|
| 22 |
+ blocks are freed. This is useful for SSD devices
|
| 23 |
+ and sparse/thinly-provisioned LUNs, but it is off
|
| 24 |
+ by default until sufficient testing has been done.
|
| 25 |
+
|
| 26 |
Data Mode
|
| 27 |
=========
|
| 28 |
There are 3 different data modes:
|
| 29 |
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
|
| 30 |
index c968cc3..554626e 100644
|
| 31 |
--- a/drivers/scsi/hosts.c
|
| 32 |
+++ b/drivers/scsi/hosts.c
|
| 33 |
@@ -180,14 +180,20 @@ void scsi_remove_host(struct Scsi_Host *shost)
|
| 34 |
EXPORT_SYMBOL(scsi_remove_host);
|
| 35 |
|
| 36 |
/**
|
| 37 |
- * scsi_add_host - add a scsi host
|
| 38 |
+ * scsi_add_host_with_dma - add a scsi host with dma device
|
| 39 |
* @shost: scsi host pointer to add
|
| 40 |
* @dev: a struct device of type scsi class
|
| 41 |
+ * @dma_dev: dma device for the host
|
| 42 |
+ *
|
| 43 |
+ * Note: You rarely need to worry about this unless you're in a
|
| 44 |
+ * virtualised host environments, so use the simpler scsi_add_host()
|
| 45 |
+ * function instead.
|
| 46 |
*
|
| 47 |
* Return value:
|
| 48 |
* 0 on success / != 0 for error
|
| 49 |
**/
|
| 50 |
-int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
|
| 51 |
+int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
|
| 52 |
+ struct device *dma_dev)
|
| 53 |
{
|
| 54 |
struct scsi_host_template *sht = shost->hostt;
|
| 55 |
int error = -EINVAL;
|
| 56 |
@@ -207,6 +213,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
|
| 57 |
|
| 58 |
if (!shost->shost_gendev.parent)
|
| 59 |
shost->shost_gendev.parent = dev ? dev : &platform_bus;
|
| 60 |
+ shost->dma_dev = dma_dev;
|
| 61 |
|
| 62 |
error = device_add(&shost->shost_gendev);
|
| 63 |
if (error)
|
| 64 |
@@ -262,7 +269,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device *dev)
|
| 65 |
fail:
|
| 66 |
return error;
|
| 67 |
}
|
| 68 |
-EXPORT_SYMBOL(scsi_add_host);
|
| 69 |
+EXPORT_SYMBOL(scsi_add_host_with_dma);
|
| 70 |
|
| 71 |
static void scsi_host_dev_release(struct device *dev)
|
| 72 |
{
|
| 73 |
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
|
| 74 |
index 562d8ce..f913f1e 100644
|
| 75 |
--- a/drivers/scsi/lpfc/lpfc_init.c
|
| 76 |
+++ b/drivers/scsi/lpfc/lpfc_init.c
|
| 77 |
@@ -2408,7 +2408,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
|
| 78 |
vport->els_tmofunc.function = lpfc_els_timeout;
|
| 79 |
vport->els_tmofunc.data = (unsigned long)vport;
|
| 80 |
|
| 81 |
- error = scsi_add_host(shost, dev);
|
| 82 |
+ error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
|
| 83 |
if (error)
|
| 84 |
goto out_put_shost;
|
| 85 |
|
| 86 |
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
|
| 87 |
index a39addc..507ccc6 100644
|
| 88 |
--- a/drivers/scsi/megaraid/megaraid_sas.c
|
| 89 |
+++ b/drivers/scsi/megaraid/megaraid_sas.c
|
| 90 |
@@ -3032,7 +3032,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
|
| 91 |
int error = 0, i;
|
| 92 |
void *sense = NULL;
|
| 93 |
dma_addr_t sense_handle;
|
| 94 |
- u32 *sense_ptr;
|
| 95 |
+ unsigned long *sense_ptr;
|
| 96 |
|
| 97 |
memset(kbuff_arr, 0, sizeof(kbuff_arr));
|
| 98 |
|
| 99 |
@@ -3109,7 +3109,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
|
| 100 |
}
|
| 101 |
|
| 102 |
sense_ptr =
|
| 103 |
- (u32 *) ((unsigned long)cmd->frame + ioc->sense_off);
|
| 104 |
+ (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
|
| 105 |
*sense_ptr = sense_handle;
|
| 106 |
}
|
| 107 |
|
| 108 |
@@ -3140,8 +3140,8 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
|
| 109 |
* sense_ptr points to the location that has the user
|
| 110 |
* sense buffer address
|
| 111 |
*/
|
| 112 |
- sense_ptr = (u32 *) ((unsigned long)ioc->frame.raw +
|
| 113 |
- ioc->sense_off);
|
| 114 |
+ sense_ptr = (unsigned long *) ((unsigned long)ioc->frame.raw +
|
| 115 |
+ ioc->sense_off);
|
| 116 |
|
| 117 |
if (copy_to_user((void __user *)((unsigned long)(*sense_ptr)),
|
| 118 |
sense, ioc->sense_len)) {
|
| 119 |
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
|
| 120 |
index fbcb82a..21e2bc4 100644
|
| 121 |
--- a/drivers/scsi/qla2xxx/qla_attr.c
|
| 122 |
+++ b/drivers/scsi/qla2xxx/qla_attr.c
|
| 123 |
@@ -1654,7 +1654,8 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
|
| 124 |
fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
|
| 125 |
}
|
| 126 |
|
| 127 |
- if (scsi_add_host(vha->host, &fc_vport->dev)) {
|
| 128 |
+ if (scsi_add_host_with_dma(vha->host, &fc_vport->dev,
|
| 129 |
+ &ha->pdev->dev)) {
|
| 130 |
DEBUG15(printk("scsi(%ld): scsi_add_host failure for VP[%d].\n",
|
| 131 |
vha->host_no, vha->vp_idx));
|
| 132 |
goto vport_create_failed_2;
|
| 133 |
diff --git a/drivers/scsi/scsi_lib_dma.c b/drivers/scsi/scsi_lib_dma.c
|
| 134 |
index ac6855c..dcd1285 100644
|
| 135 |
--- a/drivers/scsi/scsi_lib_dma.c
|
| 136 |
+++ b/drivers/scsi/scsi_lib_dma.c
|
| 137 |
@@ -23,7 +23,7 @@ int scsi_dma_map(struct scsi_cmnd *cmd)
|
| 138 |
int nseg = 0;
|
| 139 |
|
| 140 |
if (scsi_sg_count(cmd)) {
|
| 141 |
- struct device *dev = cmd->device->host->shost_gendev.parent;
|
| 142 |
+ struct device *dev = cmd->device->host->dma_dev;
|
| 143 |
|
| 144 |
nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
|
| 145 |
cmd->sc_data_direction);
|
| 146 |
@@ -41,7 +41,7 @@ EXPORT_SYMBOL(scsi_dma_map);
|
| 147 |
void scsi_dma_unmap(struct scsi_cmnd *cmd)
|
| 148 |
{
|
| 149 |
if (scsi_sg_count(cmd)) {
|
| 150 |
- struct device *dev = cmd->device->host->shost_gendev.parent;
|
| 151 |
+ struct device *dev = cmd->device->host->dma_dev;
|
| 152 |
|
| 153 |
dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
|
| 154 |
cmd->sc_data_direction);
|
| 155 |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
|
| 156 |
index 1d04189..f3032c9 100644
|
| 157 |
--- a/fs/ext4/balloc.c
|
| 158 |
+++ b/fs/ext4/balloc.c
|
| 159 |
@@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
|
| 160 |
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
|
| 161 |
ext4_group_t group)
|
| 162 |
{
|
| 163 |
- return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
|
| 164 |
+ if (!ext4_bg_has_super(sb, group))
|
| 165 |
+ return 0;
|
| 166 |
+
|
| 167 |
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
|
| 168 |
+ return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
|
| 169 |
+ else
|
| 170 |
+ return EXT4_SB(sb)->s_gdb_count;
|
| 171 |
}
|
| 172 |
|
| 173 |
/**
|
| 174 |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
|
| 175 |
index 50784ef..dc79b75 100644
|
| 176 |
--- a/fs/ext4/block_validity.c
|
| 177 |
+++ b/fs/ext4/block_validity.c
|
| 178 |
@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
|
| 179 |
if (ext4_bg_has_super(sb, i) &&
|
| 180 |
((i < 5) || ((i % flex_size) == 0)))
|
| 181 |
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
|
| 182 |
- sbi->s_gdb_count + 1);
|
| 183 |
+ ext4_bg_num_gdb(sb, i) + 1);
|
| 184 |
gdp = ext4_get_group_desc(sb, i, NULL);
|
| 185 |
ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
|
| 186 |
if (ret)
|
| 187 |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
|
| 188 |
index 8825515..bd2a9dd 100644
|
| 189 |
--- a/fs/ext4/ext4.h
|
| 190 |
+++ b/fs/ext4/ext4.h
|
| 191 |
@@ -703,6 +703,13 @@ struct ext4_inode_info {
|
| 192 |
struct list_head i_aio_dio_complete_list;
|
| 193 |
/* current io_end structure for async DIO write*/
|
| 194 |
ext4_io_end_t *cur_aio_dio;
|
| 195 |
+
|
| 196 |
+ /*
|
| 197 |
+ * Transactions that contain inode's metadata needed to complete
|
| 198 |
+ * fsync and fdatasync, respectively.
|
| 199 |
+ */
|
| 200 |
+ tid_t i_sync_tid;
|
| 201 |
+ tid_t i_datasync_tid;
|
| 202 |
};
|
| 203 |
|
| 204 |
/*
|
| 205 |
@@ -750,6 +757,7 @@ struct ext4_inode_info {
|
| 206 |
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
|
| 207 |
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
|
| 208 |
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
|
| 209 |
+#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
|
| 210 |
|
| 211 |
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
| 212 |
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
|
| 213 |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
|
| 214 |
index a286598..1892a77 100644
|
| 215 |
--- a/fs/ext4/ext4_jbd2.h
|
| 216 |
+++ b/fs/ext4/ext4_jbd2.h
|
| 217 |
@@ -49,7 +49,7 @@
|
| 218 |
|
| 219 |
#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
|
| 220 |
EXT4_XATTR_TRANS_BLOCKS - 2 + \
|
| 221 |
- 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
| 222 |
+ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
|
| 223 |
|
| 224 |
/*
|
| 225 |
* Define the number of metadata blocks we need to account to modify data.
|
| 226 |
@@ -57,7 +57,7 @@
|
| 227 |
* This include super block, inode block, quota blocks and xattr blocks
|
| 228 |
*/
|
| 229 |
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
|
| 230 |
- 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
| 231 |
+ EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
|
| 232 |
|
| 233 |
/* Delete operations potentially hit one directory's namespace plus an
|
| 234 |
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
|
| 235 |
@@ -92,6 +92,7 @@
|
| 236 |
* but inode, sb and group updates are done only once */
|
| 237 |
#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
|
| 238 |
(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
|
| 239 |
+
|
| 240 |
#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
|
| 241 |
(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
|
| 242 |
#else
|
| 243 |
@@ -99,6 +100,9 @@
|
| 244 |
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
|
| 245 |
#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
|
| 246 |
#endif
|
| 247 |
+#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
|
| 248 |
+#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
|
| 249 |
+#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
|
| 250 |
|
| 251 |
int
|
| 252 |
ext4_mark_iloc_dirty(handle_t *handle,
|
| 253 |
@@ -254,6 +258,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
|
| 254 |
return 0;
|
| 255 |
}
|
| 256 |
|
| 257 |
+static inline void ext4_update_inode_fsync_trans(handle_t *handle,
|
| 258 |
+ struct inode *inode,
|
| 259 |
+ int datasync)
|
| 260 |
+{
|
| 261 |
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
| 262 |
+
|
| 263 |
+ if (ext4_handle_valid(handle)) {
|
| 264 |
+ ei->i_sync_tid = handle->h_transaction->t_tid;
|
| 265 |
+ if (datasync)
|
| 266 |
+ ei->i_datasync_tid = handle->h_transaction->t_tid;
|
| 267 |
+ }
|
| 268 |
+}
|
| 269 |
+
|
| 270 |
/* super.c */
|
| 271 |
int ext4_force_commit(struct super_block *sb);
|
| 272 |
|
| 273 |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
|
| 274 |
index 715264b..8b8bae4 100644
|
| 275 |
--- a/fs/ext4/extents.c
|
| 276 |
+++ b/fs/ext4/extents.c
|
| 277 |
@@ -1761,7 +1761,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
|
| 278 |
while (block < last && block != EXT_MAX_BLOCK) {
|
| 279 |
num = last - block;
|
| 280 |
/* find extent for this block */
|
| 281 |
+ down_read(&EXT4_I(inode)->i_data_sem);
|
| 282 |
path = ext4_ext_find_extent(inode, block, path);
|
| 283 |
+ up_read(&EXT4_I(inode)->i_data_sem);
|
| 284 |
if (IS_ERR(path)) {
|
| 285 |
err = PTR_ERR(path);
|
| 286 |
path = NULL;
|
| 287 |
@@ -2074,7 +2076,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
|
| 288 |
ext_debug("free last %u blocks starting %llu\n", num, start);
|
| 289 |
for (i = 0; i < num; i++) {
|
| 290 |
bh = sb_find_get_block(inode->i_sb, start + i);
|
| 291 |
- ext4_forget(handle, 0, inode, bh, start + i);
|
| 292 |
+ ext4_forget(handle, metadata, inode, bh, start + i);
|
| 293 |
}
|
| 294 |
ext4_free_blocks(handle, inode, start, num, metadata);
|
| 295 |
} else if (from == le32_to_cpu(ex->ee_block)
|
| 296 |
@@ -2167,7 +2169,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
| 297 |
correct_index = 1;
|
| 298 |
credits += (ext_depth(inode)) + 1;
|
| 299 |
}
|
| 300 |
- credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
|
| 301 |
+ credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
|
| 302 |
|
| 303 |
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
|
| 304 |
if (err)
|
| 305 |
@@ -3064,6 +3066,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
| 306 |
if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
|
| 307 |
ret = ext4_convert_unwritten_extents_dio(handle, inode,
|
| 308 |
path);
|
| 309 |
+ if (ret >= 0)
|
| 310 |
+ ext4_update_inode_fsync_trans(handle, inode, 1);
|
| 311 |
goto out2;
|
| 312 |
}
|
| 313 |
/* buffered IO case */
|
| 314 |
@@ -3091,6 +3095,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
| 315 |
ret = ext4_ext_convert_to_initialized(handle, inode,
|
| 316 |
path, iblock,
|
| 317 |
max_blocks);
|
| 318 |
+ if (ret >= 0)
|
| 319 |
+ ext4_update_inode_fsync_trans(handle, inode, 1);
|
| 320 |
out:
|
| 321 |
if (ret <= 0) {
|
| 322 |
err = ret;
|
| 323 |
@@ -3329,10 +3335,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
| 324 |
allocated = ext4_ext_get_actual_len(&newex);
|
| 325 |
set_buffer_new(bh_result);
|
| 326 |
|
| 327 |
- /* Cache only when it is _not_ an uninitialized extent */
|
| 328 |
- if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
|
| 329 |
+ /*
|
| 330 |
+ * Cache the extent and update transaction to commit on fdatasync only
|
| 331 |
+ * when it is _not_ an uninitialized extent.
|
| 332 |
+ */
|
| 333 |
+ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
|
| 334 |
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
|
| 335 |
EXT4_EXT_CACHE_EXTENT);
|
| 336 |
+ ext4_update_inode_fsync_trans(handle, inode, 1);
|
| 337 |
+ } else
|
| 338 |
+ ext4_update_inode_fsync_trans(handle, inode, 0);
|
| 339 |
out:
|
| 340 |
if (allocated > max_blocks)
|
| 341 |
allocated = max_blocks;
|
| 342 |
@@ -3720,10 +3732,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
| 343 |
* Walk the extent tree gathering extent information.
|
| 344 |
* ext4_ext_fiemap_cb will push extents back to user.
|
| 345 |
*/
|
| 346 |
- down_read(&EXT4_I(inode)->i_data_sem);
|
| 347 |
error = ext4_ext_walk_space(inode, start_blk, len_blks,
|
| 348 |
ext4_ext_fiemap_cb, fieinfo);
|
| 349 |
- up_read(&EXT4_I(inode)->i_data_sem);
|
| 350 |
}
|
| 351 |
|
| 352 |
return error;
|
| 353 |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
|
| 354 |
index 2b15312..d6049e4 100644
|
| 355 |
--- a/fs/ext4/fsync.c
|
| 356 |
+++ b/fs/ext4/fsync.c
|
| 357 |
@@ -51,25 +51,30 @@
|
| 358 |
int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
|
| 359 |
{
|
| 360 |
struct inode *inode = dentry->d_inode;
|
| 361 |
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
| 362 |
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
| 363 |
- int err, ret = 0;
|
| 364 |
+ int ret;
|
| 365 |
+ tid_t commit_tid;
|
| 366 |
|
| 367 |
J_ASSERT(ext4_journal_current_handle() == NULL);
|
| 368 |
|
| 369 |
trace_ext4_sync_file(file, dentry, datasync);
|
| 370 |
|
| 371 |
+ if (inode->i_sb->s_flags & MS_RDONLY)
|
| 372 |
+ return 0;
|
| 373 |
+
|
| 374 |
ret = flush_aio_dio_completed_IO(inode);
|
| 375 |
if (ret < 0)
|
| 376 |
- goto out;
|
| 377 |
+ return ret;
|
| 378 |
+
|
| 379 |
+ if (!journal)
|
| 380 |
+ return simple_fsync(file, dentry, datasync);
|
| 381 |
+
|
| 382 |
/*
|
| 383 |
- * data=writeback:
|
| 384 |
+ * data=writeback,ordered:
|
| 385 |
* The caller's filemap_fdatawrite()/wait will sync the data.
|
| 386 |
- * sync_inode() will sync the metadata
|
| 387 |
- *
|
| 388 |
- * data=ordered:
|
| 389 |
- * The caller's filemap_fdatawrite() will write the data and
|
| 390 |
- * sync_inode() will write the inode if it is dirty. Then the caller's
|
| 391 |
- * filemap_fdatawait() will wait on the pages.
|
| 392 |
+ * Metadata is in the journal, we wait for proper transaction to
|
| 393 |
+ * commit here.
|
| 394 |
*
|
| 395 |
* data=journal:
|
| 396 |
* filemap_fdatawrite won't do anything (the buffers are clean).
|
| 397 |
@@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
|
| 398 |
* (they were dirtied by commit). But that's OK - the blocks are
|
| 399 |
* safe in-journal, which is all fsync() needs to ensure.
|
| 400 |
*/
|
| 401 |
- if (ext4_should_journal_data(inode)) {
|
| 402 |
- ret = ext4_force_commit(inode->i_sb);
|
| 403 |
- goto out;
|
| 404 |
- }
|
| 405 |
+ if (ext4_should_journal_data(inode))
|
| 406 |
+ return ext4_force_commit(inode->i_sb);
|
| 407 |
|
| 408 |
- if (!journal)
|
| 409 |
- ret = sync_mapping_buffers(inode->i_mapping);
|
| 410 |
-
|
| 411 |
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
|
| 412 |
- goto out;
|
| 413 |
-
|
| 414 |
- /*
|
| 415 |
- * The VFS has written the file data. If the inode is unaltered
|
| 416 |
- * then we need not start a commit.
|
| 417 |
- */
|
| 418 |
- if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
|
| 419 |
- struct writeback_control wbc = {
|
| 420 |
- .sync_mode = WB_SYNC_ALL,
|
| 421 |
- .nr_to_write = 0, /* sys_fsync did this */
|
| 422 |
- };
|
| 423 |
- err = sync_inode(inode, &wbc);
|
| 424 |
- if (ret == 0)
|
| 425 |
- ret = err;
|
| 426 |
- }
|
| 427 |
-out:
|
| 428 |
- if (journal && (journal->j_flags & JBD2_BARRIER))
|
| 429 |
+ commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
|
| 430 |
+ if (jbd2_log_start_commit(journal, commit_tid))
|
| 431 |
+ jbd2_log_wait_commit(journal, commit_tid);
|
| 432 |
+ else if (journal->j_flags & JBD2_BARRIER)
|
| 433 |
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
|
| 434 |
return ret;
|
| 435 |
}
|
| 436 |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
|
| 437 |
index 2c8caa5..1dae9a4 100644
|
| 438 |
--- a/fs/ext4/inode.c
|
| 439 |
+++ b/fs/ext4/inode.c
|
| 440 |
@@ -1021,10 +1021,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
|
| 441 |
if (!err)
|
| 442 |
err = ext4_splice_branch(handle, inode, iblock,
|
| 443 |
partial, indirect_blks, count);
|
| 444 |
- else
|
| 445 |
+ if (err)
|
| 446 |
goto cleanup;
|
| 447 |
|
| 448 |
set_buffer_new(bh_result);
|
| 449 |
+
|
| 450 |
+ ext4_update_inode_fsync_trans(handle, inode, 1);
|
| 451 |
got_it:
|
| 452 |
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
|
| 453 |
if (count > blocks_to_boundary)
|
| 454 |
@@ -1052,7 +1054,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
|
| 455 |
EXT4_I(inode)->i_reserved_meta_blocks;
|
| 456 |
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
| 457 |
|
| 458 |
- return total;
|
| 459 |
+ return (total << inode->i_blkbits);
|
| 460 |
}
|
| 461 |
/*
|
| 462 |
* Calculate the number of metadata blocks need to reserve
|
| 463 |
@@ -1534,6 +1536,16 @@ static int do_journal_get_write_access(handle_t *handle,
|
| 464 |
return ext4_journal_get_write_access(handle, bh);
|
| 465 |
}
|
| 466 |
|
| 467 |
+/*
|
| 468 |
+ * Truncate blocks that were not used by write. We have to truncate the
|
| 469 |
+ * pagecache as well so that corresponding buffers get properly unmapped.
|
| 470 |
+ */
|
| 471 |
+static void ext4_truncate_failed_write(struct inode *inode)
|
| 472 |
+{
|
| 473 |
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
|
| 474 |
+ ext4_truncate(inode);
|
| 475 |
+}
|
| 476 |
+
|
| 477 |
static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
| 478 |
loff_t pos, unsigned len, unsigned flags,
|
| 479 |
struct page **pagep, void **fsdata)
|
| 480 |
@@ -1599,7 +1611,7 @@ retry:
|
| 481 |
|
| 482 |
ext4_journal_stop(handle);
|
| 483 |
if (pos + len > inode->i_size) {
|
| 484 |
- ext4_truncate(inode);
|
| 485 |
+ ext4_truncate_failed_write(inode);
|
| 486 |
/*
|
| 487 |
* If truncate failed early the inode might
|
| 488 |
* still be on the orphan list; we need to
|
| 489 |
@@ -1709,7 +1721,7 @@ static int ext4_ordered_write_end(struct file *file,
|
| 490 |
ret = ret2;
|
| 491 |
|
| 492 |
if (pos + len > inode->i_size) {
|
| 493 |
- ext4_truncate(inode);
|
| 494 |
+ ext4_truncate_failed_write(inode);
|
| 495 |
/*
|
| 496 |
* If truncate failed early the inode might still be
|
| 497 |
* on the orphan list; we need to make sure the inode
|
| 498 |
@@ -1751,7 +1763,7 @@ static int ext4_writeback_write_end(struct file *file,
|
| 499 |
ret = ret2;
|
| 500 |
|
| 501 |
if (pos + len > inode->i_size) {
|
| 502 |
- ext4_truncate(inode);
|
| 503 |
+ ext4_truncate_failed_write(inode);
|
| 504 |
/*
|
| 505 |
* If truncate failed early the inode might still be
|
| 506 |
* on the orphan list; we need to make sure the inode
|
| 507 |
@@ -1814,7 +1826,7 @@ static int ext4_journalled_write_end(struct file *file,
|
| 508 |
if (!ret)
|
| 509 |
ret = ret2;
|
| 510 |
if (pos + len > inode->i_size) {
|
| 511 |
- ext4_truncate(inode);
|
| 512 |
+ ext4_truncate_failed_write(inode);
|
| 513 |
/*
|
| 514 |
* If truncate failed early the inode might still be
|
| 515 |
* on the orphan list; we need to make sure the inode
|
| 516 |
@@ -2788,7 +2800,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
|
| 517 |
* number of contiguous block. So we will limit
|
| 518 |
* number of contiguous block to a sane value
|
| 519 |
*/
|
| 520 |
- if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
|
| 521 |
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
|
| 522 |
(max_blocks > EXT4_MAX_TRANS_DATA))
|
| 523 |
max_blocks = EXT4_MAX_TRANS_DATA;
|
| 524 |
|
| 525 |
@@ -3091,7 +3103,7 @@ retry:
|
| 526 |
* i_size_read because we hold i_mutex.
|
| 527 |
*/
|
| 528 |
if (pos + len > inode->i_size)
|
| 529 |
- ext4_truncate(inode);
|
| 530 |
+ ext4_truncate_failed_write(inode);
|
| 531 |
}
|
| 532 |
|
| 533 |
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
| 534 |
@@ -4120,6 +4132,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
|
| 535 |
__le32 *last)
|
| 536 |
{
|
| 537 |
__le32 *p;
|
| 538 |
+ int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
|
| 539 |
+
|
| 540 |
if (try_to_extend_transaction(handle, inode)) {
|
| 541 |
if (bh) {
|
| 542 |
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
|
| 543 |
@@ -4150,11 +4164,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
|
| 544 |
|
| 545 |
*p = 0;
|
| 546 |
tbh = sb_find_get_block(inode->i_sb, nr);
|
| 547 |
- ext4_forget(handle, 0, inode, tbh, nr);
|
| 548 |
+ ext4_forget(handle, is_metadata, inode, tbh, nr);
|
| 549 |
}
|
| 550 |
}
|
| 551 |
|
| 552 |
- ext4_free_blocks(handle, inode, block_to_free, count, 0);
|
| 553 |
+ ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
|
| 554 |
}
|
| 555 |
|
| 556 |
/**
|
| 557 |
@@ -4781,8 +4795,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 558 |
struct ext4_iloc iloc;
|
| 559 |
struct ext4_inode *raw_inode;
|
| 560 |
struct ext4_inode_info *ei;
|
| 561 |
- struct buffer_head *bh;
|
| 562 |
struct inode *inode;
|
| 563 |
+ journal_t *journal = EXT4_SB(sb)->s_journal;
|
| 564 |
long ret;
|
| 565 |
int block;
|
| 566 |
|
| 567 |
@@ -4793,11 +4807,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 568 |
return inode;
|
| 569 |
|
| 570 |
ei = EXT4_I(inode);
|
| 571 |
+ iloc.bh = 0;
|
| 572 |
|
| 573 |
ret = __ext4_get_inode_loc(inode, &iloc, 0);
|
| 574 |
if (ret < 0)
|
| 575 |
goto bad_inode;
|
| 576 |
- bh = iloc.bh;
|
| 577 |
raw_inode = ext4_raw_inode(&iloc);
|
| 578 |
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
|
| 579 |
inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
|
| 580 |
@@ -4820,7 +4834,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 581 |
if (inode->i_mode == 0 ||
|
| 582 |
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
|
| 583 |
/* this inode is deleted */
|
| 584 |
- brelse(bh);
|
| 585 |
ret = -ESTALE;
|
| 586 |
goto bad_inode;
|
| 587 |
}
|
| 588 |
@@ -4848,11 +4861,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 589 |
ei->i_data[block] = raw_inode->i_block[block];
|
| 590 |
INIT_LIST_HEAD(&ei->i_orphan);
|
| 591 |
|
| 592 |
+ /*
|
| 593 |
+ * Set transaction id's of transactions that have to be committed
|
| 594 |
+ * to finish f[data]sync. We set them to currently running transaction
|
| 595 |
+ * as we cannot be sure that the inode or some of its metadata isn't
|
| 596 |
+ * part of the transaction - the inode could have been reclaimed and
|
| 597 |
+ * now it is reread from disk.
|
| 598 |
+ */
|
| 599 |
+ if (journal) {
|
| 600 |
+ transaction_t *transaction;
|
| 601 |
+ tid_t tid;
|
| 602 |
+
|
| 603 |
+ spin_lock(&journal->j_state_lock);
|
| 604 |
+ if (journal->j_running_transaction)
|
| 605 |
+ transaction = journal->j_running_transaction;
|
| 606 |
+ else
|
| 607 |
+ transaction = journal->j_committing_transaction;
|
| 608 |
+ if (transaction)
|
| 609 |
+ tid = transaction->t_tid;
|
| 610 |
+ else
|
| 611 |
+ tid = journal->j_commit_sequence;
|
| 612 |
+ spin_unlock(&journal->j_state_lock);
|
| 613 |
+ ei->i_sync_tid = tid;
|
| 614 |
+ ei->i_datasync_tid = tid;
|
| 615 |
+ }
|
| 616 |
+
|
| 617 |
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
|
| 618 |
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
|
| 619 |
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
|
| 620 |
EXT4_INODE_SIZE(inode->i_sb)) {
|
| 621 |
- brelse(bh);
|
| 622 |
ret = -EIO;
|
| 623 |
goto bad_inode;
|
| 624 |
}
|
| 625 |
@@ -4884,10 +4921,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 626 |
|
| 627 |
ret = 0;
|
| 628 |
if (ei->i_file_acl &&
|
| 629 |
- ((ei->i_file_acl <
|
| 630 |
- (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
|
| 631 |
- EXT4_SB(sb)->s_gdb_count)) ||
|
| 632 |
- (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
|
| 633 |
+ !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
|
| 634 |
ext4_error(sb, __func__,
|
| 635 |
"bad extended attribute block %llu in inode #%lu",
|
| 636 |
ei->i_file_acl, inode->i_ino);
|
| 637 |
@@ -4905,10 +4939,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 638 |
/* Validate block references which are part of inode */
|
| 639 |
ret = ext4_check_inode_blockref(inode);
|
| 640 |
}
|
| 641 |
- if (ret) {
|
| 642 |
- brelse(bh);
|
| 643 |
+ if (ret)
|
| 644 |
goto bad_inode;
|
| 645 |
- }
|
| 646 |
|
| 647 |
if (S_ISREG(inode->i_mode)) {
|
| 648 |
inode->i_op = &ext4_file_inode_operations;
|
| 649 |
@@ -4936,7 +4968,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 650 |
init_special_inode(inode, inode->i_mode,
|
| 651 |
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
|
| 652 |
} else {
|
| 653 |
- brelse(bh);
|
| 654 |
ret = -EIO;
|
| 655 |
ext4_error(inode->i_sb, __func__,
|
| 656 |
"bogus i_mode (%o) for inode=%lu",
|
| 657 |
@@ -4949,6 +4980,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
| 658 |
return inode;
|
| 659 |
|
| 660 |
bad_inode:
|
| 661 |
+ brelse(iloc.bh);
|
| 662 |
iget_failed(inode);
|
| 663 |
return ERR_PTR(ret);
|
| 664 |
}
|
| 665 |
@@ -5108,6 +5140,7 @@ static int ext4_do_update_inode(handle_t *handle,
|
| 666 |
err = rc;
|
| 667 |
ei->i_state &= ~EXT4_STATE_NEW;
|
| 668 |
|
| 669 |
+ ext4_update_inode_fsync_trans(handle, inode, 0);
|
| 670 |
out_brelse:
|
| 671 |
brelse(bh);
|
| 672 |
ext4_std_error(inode->i_sb, err);
|
| 673 |
@@ -5227,8 +5260,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
| 674 |
|
| 675 |
/* (user+group)*(old+new) structure, inode write (sb,
|
| 676 |
* inode block, ? - but truncate inode update has it) */
|
| 677 |
- handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
|
| 678 |
- EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
|
| 679 |
+ handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
|
| 680 |
+ EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
|
| 681 |
if (IS_ERR(handle)) {
|
| 682 |
error = PTR_ERR(handle);
|
| 683 |
goto err_out;
|
| 684 |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
|
| 685 |
index c1cdf61..b63d193 100644
|
| 686 |
--- a/fs/ext4/ioctl.c
|
| 687 |
+++ b/fs/ext4/ioctl.c
|
| 688 |
@@ -221,31 +221,38 @@ setversion_out:
|
| 689 |
struct file *donor_filp;
|
| 690 |
int err;
|
| 691 |
|
| 692 |
+ if (!(filp->f_mode & FMODE_READ) ||
|
| 693 |
+ !(filp->f_mode & FMODE_WRITE))
|
| 694 |
+ return -EBADF;
|
| 695 |
+
|
| 696 |
if (copy_from_user(&me,
|
| 697 |
(struct move_extent __user *)arg, sizeof(me)))
|
| 698 |
return -EFAULT;
|
| 699 |
+ me.moved_len = 0;
|
| 700 |
|
| 701 |
donor_filp = fget(me.donor_fd);
|
| 702 |
if (!donor_filp)
|
| 703 |
return -EBADF;
|
| 704 |
|
| 705 |
- if (!capable(CAP_DAC_OVERRIDE)) {
|
| 706 |
- if ((current->real_cred->fsuid != inode->i_uid) ||
|
| 707 |
- !(inode->i_mode & S_IRUSR) ||
|
| 708 |
- !(donor_filp->f_dentry->d_inode->i_mode &
|
| 709 |
- S_IRUSR)) {
|
| 710 |
- fput(donor_filp);
|
| 711 |
- return -EACCES;
|
| 712 |
- }
|
| 713 |
+ if (!(donor_filp->f_mode & FMODE_WRITE)) {
|
| 714 |
+ err = -EBADF;
|
| 715 |
+ goto mext_out;
|
| 716 |
}
|
| 717 |
|
| 718 |
+ err = mnt_want_write(filp->f_path.mnt);
|
| 719 |
+ if (err)
|
| 720 |
+ goto mext_out;
|
| 721 |
+
|
| 722 |
err = ext4_move_extents(filp, donor_filp, me.orig_start,
|
| 723 |
me.donor_start, me.len, &me.moved_len);
|
| 724 |
- fput(donor_filp);
|
| 725 |
+ mnt_drop_write(filp->f_path.mnt);
|
| 726 |
+ if (me.moved_len > 0)
|
| 727 |
+ file_remove_suid(donor_filp);
|
| 728 |
|
| 729 |
if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
|
| 730 |
- return -EFAULT;
|
| 731 |
-
|
| 732 |
+ err = -EFAULT;
|
| 733 |
+mext_out:
|
| 734 |
+ fput(donor_filp);
|
| 735 |
return err;
|
| 736 |
}
|
| 737 |
|
| 738 |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
|
| 739 |
index bba1282..7d71148 100644
|
| 740 |
--- a/fs/ext4/mballoc.c
|
| 741 |
+++ b/fs/ext4/mballoc.c
|
| 742 |
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
|
| 743 |
struct ext4_group_info *db;
|
| 744 |
int err, count = 0, count2 = 0;
|
| 745 |
struct ext4_free_data *entry;
|
| 746 |
- ext4_fsblk_t discard_block;
|
| 747 |
struct list_head *l, *ltmp;
|
| 748 |
|
| 749 |
list_for_each_safe(l, ltmp, &txn->t_private_list) {
|
| 750 |
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
|
| 751 |
page_cache_release(e4b.bd_bitmap_page);
|
| 752 |
}
|
| 753 |
ext4_unlock_group(sb, entry->group);
|
| 754 |
- discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
|
| 755 |
- + entry->start_blk
|
| 756 |
- + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
|
| 757 |
- trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
|
| 758 |
- entry->count);
|
| 759 |
- sb_issue_discard(sb, discard_block, entry->count);
|
| 760 |
-
|
| 761 |
+ if (test_opt(sb, DISCARD)) {
|
| 762 |
+ ext4_fsblk_t discard_block;
|
| 763 |
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
| 764 |
+
|
| 765 |
+ discard_block = (ext4_fsblk_t)entry->group *
|
| 766 |
+ EXT4_BLOCKS_PER_GROUP(sb)
|
| 767 |
+ + entry->start_blk
|
| 768 |
+ + le32_to_cpu(es->s_first_data_block);
|
| 769 |
+ trace_ext4_discard_blocks(sb,
|
| 770 |
+ (unsigned long long)discard_block,
|
| 771 |
+ entry->count);
|
| 772 |
+ sb_issue_discard(sb, discard_block, entry->count);
|
| 773 |
+ }
|
| 774 |
kmem_cache_free(ext4_free_ext_cachep, entry);
|
| 775 |
ext4_mb_release_desc(&e4b);
|
| 776 |
}
|
| 777 |
@@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
|
| 778 |
}
|
| 779 |
|
| 780 |
/*
|
| 781 |
+ * Called on failure; free up any blocks from the inode PA for this
|
| 782 |
+ * context. We don't need this for MB_GROUP_PA because we only change
|
| 783 |
+ * pa_free in ext4_mb_release_context(), but on failure, we've already
|
| 784 |
+ * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
|
| 785 |
+ */
|
| 786 |
+static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
|
| 787 |
+{
|
| 788 |
+ struct ext4_prealloc_space *pa = ac->ac_pa;
|
| 789 |
+ int len;
|
| 790 |
+
|
| 791 |
+ if (pa && pa->pa_type == MB_INODE_PA) {
|
| 792 |
+ len = ac->ac_b_ex.fe_len;
|
| 793 |
+ pa->pa_free += len;
|
| 794 |
+ }
|
| 795 |
+
|
| 796 |
+}
|
| 797 |
+
|
| 798 |
+/*
|
| 799 |
* use blocks preallocated to inode
|
| 800 |
*/
|
| 801 |
static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
|
| 802 |
@@ -4290,6 +4313,7 @@ repeat:
|
| 803 |
ac->ac_status = AC_STATUS_CONTINUE;
|
| 804 |
goto repeat;
|
| 805 |
} else if (*errp) {
|
| 806 |
+ ext4_discard_allocated_blocks(ac);
|
| 807 |
ac->ac_b_ex.fe_len = 0;
|
| 808 |
ar->len = 0;
|
| 809 |
ext4_mb_show_ac(ac);
|
| 810 |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
|
| 811 |
index a93d5b8..8646149 100644
|
| 812 |
--- a/fs/ext4/migrate.c
|
| 813 |
+++ b/fs/ext4/migrate.c
|
| 814 |
@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
|
| 815 |
* So allocate a credit of 3. We may update
|
| 816 |
* quota (user and group).
|
| 817 |
*/
|
| 818 |
- needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
|
| 819 |
+ needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
|
| 820 |
|
| 821 |
if (ext4_journal_extend(handle, needed) != 0)
|
| 822 |
retval = ext4_journal_restart(handle, needed);
|
| 823 |
@@ -477,7 +477,7 @@ int ext4_ext_migrate(struct inode *inode)
|
| 824 |
handle = ext4_journal_start(inode,
|
| 825 |
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
|
| 826 |
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
| 827 |
- 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
|
| 828 |
+ EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
|
| 829 |
+ 1);
|
| 830 |
if (IS_ERR(handle)) {
|
| 831 |
retval = PTR_ERR(handle);
|
| 832 |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
|
| 833 |
index 25b6b14..f5b03a1 100644
|
| 834 |
--- a/fs/ext4/move_extent.c
|
| 835 |
+++ b/fs/ext4/move_extent.c
|
| 836 |
@@ -77,12 +77,14 @@ static int
|
| 837 |
mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
|
| 838 |
struct ext4_extent **extent)
|
| 839 |
{
|
| 840 |
+ struct ext4_extent_header *eh;
|
| 841 |
int ppos, leaf_ppos = path->p_depth;
|
| 842 |
|
| 843 |
ppos = leaf_ppos;
|
| 844 |
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
|
| 845 |
/* leaf block */
|
| 846 |
*extent = ++path[ppos].p_ext;
|
| 847 |
+ path[ppos].p_block = ext_pblock(path[ppos].p_ext);
|
| 848 |
return 0;
|
| 849 |
}
|
| 850 |
|
| 851 |
@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
|
| 852 |
ext_block_hdr(path[cur_ppos+1].p_bh);
|
| 853 |
}
|
| 854 |
|
| 855 |
+ path[leaf_ppos].p_ext = *extent = NULL;
|
| 856 |
+
|
| 857 |
+ eh = path[leaf_ppos].p_hdr;
|
| 858 |
+ if (le16_to_cpu(eh->eh_entries) == 0)
|
| 859 |
+ /* empty leaf is found */
|
| 860 |
+ return -ENODATA;
|
| 861 |
+
|
| 862 |
/* leaf block */
|
| 863 |
path[leaf_ppos].p_ext = *extent =
|
| 864 |
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
|
| 865 |
+ path[leaf_ppos].p_block =
|
| 866 |
+ ext_pblock(path[leaf_ppos].p_ext);
|
| 867 |
return 0;
|
| 868 |
}
|
| 869 |
}
|
| 870 |
@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
|
| 871 |
}
|
| 872 |
|
| 873 |
/**
|
| 874 |
- * mext_double_down_read - Acquire two inodes' read semaphore
|
| 875 |
- *
|
| 876 |
- * @orig_inode: original inode structure
|
| 877 |
- * @donor_inode: donor inode structure
|
| 878 |
- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
|
| 879 |
- */
|
| 880 |
-static void
|
| 881 |
-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
|
| 882 |
-{
|
| 883 |
- struct inode *first = orig_inode, *second = donor_inode;
|
| 884 |
-
|
| 885 |
- /*
|
| 886 |
- * Use the inode number to provide the stable locking order instead
|
| 887 |
- * of its address, because the C language doesn't guarantee you can
|
| 888 |
- * compare pointers that don't come from the same array.
|
| 889 |
- */
|
| 890 |
- if (donor_inode->i_ino < orig_inode->i_ino) {
|
| 891 |
- first = donor_inode;
|
| 892 |
- second = orig_inode;
|
| 893 |
- }
|
| 894 |
-
|
| 895 |
- down_read(&EXT4_I(first)->i_data_sem);
|
| 896 |
- down_read(&EXT4_I(second)->i_data_sem);
|
| 897 |
-}
|
| 898 |
-
|
| 899 |
-/**
|
| 900 |
- * mext_double_down_write - Acquire two inodes' write semaphore
|
| 901 |
+ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
|
| 902 |
*
|
| 903 |
* @orig_inode: original inode structure
|
| 904 |
* @donor_inode: donor inode structure
|
| 905 |
- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
|
| 906 |
+ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
|
| 907 |
+ * i_ino order.
|
| 908 |
*/
|
| 909 |
static void
|
| 910 |
-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
|
| 911 |
+double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
|
| 912 |
{
|
| 913 |
struct inode *first = orig_inode, *second = donor_inode;
|
| 914 |
|
| 915 |
@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
|
| 916 |
}
|
| 917 |
|
| 918 |
down_write(&EXT4_I(first)->i_data_sem);
|
| 919 |
- down_write(&EXT4_I(second)->i_data_sem);
|
| 920 |
+ down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
|
| 921 |
}
|
| 922 |
|
| 923 |
/**
|
| 924 |
- * mext_double_up_read - Release two inodes' read semaphore
|
| 925 |
+ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
|
| 926 |
*
|
| 927 |
* @orig_inode: original inode structure to be released its lock first
|
| 928 |
* @donor_inode: donor inode structure to be released its lock second
|
| 929 |
- * Release read semaphore of two inodes (orig and donor).
|
| 930 |
+ * Release write lock of i_data_sem of two inodes (orig and donor).
|
| 931 |
*/
|
| 932 |
static void
|
| 933 |
-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
|
| 934 |
-{
|
| 935 |
- up_read(&EXT4_I(orig_inode)->i_data_sem);
|
| 936 |
- up_read(&EXT4_I(donor_inode)->i_data_sem);
|
| 937 |
-}
|
| 938 |
-
|
| 939 |
-/**
|
| 940 |
- * mext_double_up_write - Release two inodes' write semaphore
|
| 941 |
- *
|
| 942 |
- * @orig_inode: original inode structure to be released its lock first
|
| 943 |
- * @donor_inode: donor inode structure to be released its lock second
|
| 944 |
- * Release write semaphore of two inodes (orig and donor).
|
| 945 |
- */
|
| 946 |
-static void
|
| 947 |
-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
|
| 948 |
+double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
|
| 949 |
{
|
| 950 |
up_write(&EXT4_I(orig_inode)->i_data_sem);
|
| 951 |
up_write(&EXT4_I(donor_inode)->i_data_sem);
|
| 952 |
@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
|
| 953 |
* @donor_inode: donor inode
|
| 954 |
* @from: block offset of orig_inode
|
| 955 |
* @count: block count to be replaced
|
| 956 |
+ * @err: pointer to save return value
|
| 957 |
*
|
| 958 |
* Replace original inode extents and donor inode extents page by page.
|
| 959 |
* We implement this replacement in the following three steps:
|
| 960 |
@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
|
| 961 |
* 3. Change the block information of donor inode to point at the saved
|
| 962 |
* original inode blocks in the dummy extents.
|
| 963 |
*
|
| 964 |
- * Return 0 on success, or a negative error value on failure.
|
| 965 |
+ * Return replaced block count.
|
| 966 |
*/
|
| 967 |
static int
|
| 968 |
mext_replace_branches(handle_t *handle, struct inode *orig_inode,
|
| 969 |
struct inode *donor_inode, ext4_lblk_t from,
|
| 970 |
- ext4_lblk_t count)
|
| 971 |
+ ext4_lblk_t count, int *err)
|
| 972 |
{
|
| 973 |
struct ext4_ext_path *orig_path = NULL;
|
| 974 |
struct ext4_ext_path *donor_path = NULL;
|
| 975 |
struct ext4_extent *oext, *dext;
|
| 976 |
struct ext4_extent tmp_dext, tmp_oext;
|
| 977 |
ext4_lblk_t orig_off = from, donor_off = from;
|
| 978 |
- int err = 0;
|
| 979 |
int depth;
|
| 980 |
int replaced_count = 0;
|
| 981 |
int dext_alen;
|
| 982 |
|
| 983 |
- mext_double_down_write(orig_inode, donor_inode);
|
| 984 |
+ /* Protect extent trees against block allocations via delalloc */
|
| 985 |
+ double_down_write_data_sem(orig_inode, donor_inode);
|
| 986 |
|
| 987 |
/* Get the original extent for the block "orig_off" */
|
| 988 |
- err = get_ext_path(orig_inode, orig_off, &orig_path);
|
| 989 |
- if (err)
|
| 990 |
+ *err = get_ext_path(orig_inode, orig_off, &orig_path);
|
| 991 |
+ if (*err)
|
| 992 |
goto out;
|
| 993 |
|
| 994 |
/* Get the donor extent for the head */
|
| 995 |
- err = get_ext_path(donor_inode, donor_off, &donor_path);
|
| 996 |
- if (err)
|
| 997 |
+ *err = get_ext_path(donor_inode, donor_off, &donor_path);
|
| 998 |
+ if (*err)
|
| 999 |
goto out;
|
| 1000 |
depth = ext_depth(orig_inode);
|
| 1001 |
oext = orig_path[depth].p_ext;
|
| 1002 |
@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
|
| 1003 |
dext = donor_path[depth].p_ext;
|
| 1004 |
tmp_dext = *dext;
|
| 1005 |
|
| 1006 |
- err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
|
| 1007 |
+ *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
|
| 1008 |
donor_off, count);
|
| 1009 |
- if (err)
|
| 1010 |
+ if (*err)
|
| 1011 |
goto out;
|
| 1012 |
|
| 1013 |
/* Loop for the donor extents */
|
| 1014 |
@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
|
| 1015 |
if (!dext) {
|
| 1016 |
ext4_error(donor_inode->i_sb, __func__,
|
| 1017 |
"The extent for donor must be found");
|
| 1018 |
- err = -EIO;
|
| 1019 |
+ *err = -EIO;
|
| 1020 |
goto out;
|
| 1021 |
} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
|
| 1022 |
ext4_error(donor_inode->i_sb, __func__,
|
| 1023 |
@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
|
| 1024 |
"extent(%u) should be equal",
|
| 1025 |
donor_off,
|
| 1026 |
le32_to_cpu(tmp_dext.ee_block));
|
| 1027 |
- err = -EIO;
|
| 1028 |
+ *err = -EIO;
|
| 1029 |
goto out;
|
| 1030 |
}
|
| 1031 |
|
| 1032 |
/* Set donor extent to orig extent */
|
| 1033 |
- err = mext_leaf_block(handle, orig_inode,
|
| 1034 |
+ *err = mext_leaf_block(handle, orig_inode,
|
| 1035 |
orig_path, &tmp_dext, &orig_off);
|
| 1036 |
- if (err < 0)
|
| 1037 |
+ if (*err)
|
| 1038 |
goto out;
|
| 1039 |
|
| 1040 |
/* Set orig extent to donor extent */
|
| 1041 |
- err = mext_leaf_block(handle, donor_inode,
|
| 1042 |
+ *err = mext_leaf_block(handle, donor_inode,
|
| 1043 |
donor_path, &tmp_oext, &donor_off);
|
| 1044 |
- if (err < 0)
|
| 1045 |
+ if (*err)
|
| 1046 |
goto out;
|
| 1047 |
|
| 1048 |
dext_alen = ext4_ext_get_actual_len(&tmp_dext);
|
| 1049 |
@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
|
| 1050 |
|
| 1051 |
if (orig_path)
|
| 1052 |
ext4_ext_drop_refs(orig_path);
|
| 1053 |
- err = get_ext_path(orig_inode, orig_off, &orig_path);
|
| 1054 |
- if (err)
|
| 1055 |
+ *err = get_ext_path(orig_inode, orig_off, &orig_path);
|
| 1056 |
+ if (*err)
|
| 1057 |
goto out;
|
| 1058 |
depth = ext_depth(orig_inode);
|
| 1059 |
oext = orig_path[depth].p_ext;
|
| 1060 |
- if (le32_to_cpu(oext->ee_block) +
|
| 1061 |
- ext4_ext_get_actual_len(oext) <= orig_off) {
|
| 1062 |
- err = 0;
|
| 1063 |
- goto out;
|
| 1064 |
- }
|
| 1065 |
tmp_oext = *oext;
|
| 1066 |
|
| 1067 |
if (donor_path)
|
| 1068 |
ext4_ext_drop_refs(donor_path);
|
| 1069 |
- err = get_ext_path(donor_inode, donor_off, &donor_path);
|
| 1070 |
- if (err)
|
| 1071 |
+ *err = get_ext_path(donor_inode, donor_off, &donor_path);
|
| 1072 |
+ if (*err)
|
| 1073 |
goto out;
|
| 1074 |
depth = ext_depth(donor_inode);
|
| 1075 |
dext = donor_path[depth].p_ext;
|
| 1076 |
- if (le32_to_cpu(dext->ee_block) +
|
| 1077 |
- ext4_ext_get_actual_len(dext) <= donor_off) {
|
| 1078 |
- err = 0;
|
| 1079 |
- goto out;
|
| 1080 |
- }
|
| 1081 |
tmp_dext = *dext;
|
| 1082 |
|
| 1083 |
- err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
|
| 1084 |
+ *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
|
| 1085 |
donor_off, count - replaced_count);
|
| 1086 |
- if (err)
|
| 1087 |
+ if (*err)
|
| 1088 |
goto out;
|
| 1089 |
}
|
| 1090 |
|
| 1091 |
@@ -795,8 +758,12 @@ out:
|
| 1092 |
kfree(donor_path);
|
| 1093 |
}
|
| 1094 |
|
| 1095 |
- mext_double_up_write(orig_inode, donor_inode);
|
| 1096 |
- return err;
|
| 1097 |
+ ext4_ext_invalidate_cache(orig_inode);
|
| 1098 |
+ ext4_ext_invalidate_cache(donor_inode);
|
| 1099 |
+
|
| 1100 |
+ double_up_write_data_sem(orig_inode, donor_inode);
|
| 1101 |
+
|
| 1102 |
+ return replaced_count;
|
| 1103 |
}
|
| 1104 |
|
| 1105 |
/**
|
| 1106 |
@@ -808,16 +775,17 @@ out:
|
| 1107 |
* @data_offset_in_page: block index where data swapping starts
|
| 1108 |
* @block_len_in_page: the number of blocks to be swapped
|
| 1109 |
* @uninit: orig extent is uninitialized or not
|
| 1110 |
+ * @err: pointer to save return value
|
| 1111 |
*
|
| 1112 |
* Save the data in original inode blocks and replace original inode extents
|
| 1113 |
* with donor inode extents by calling mext_replace_branches().
|
| 1114 |
- * Finally, write out the saved data in new original inode blocks. Return 0
|
| 1115 |
- * on success, or a negative error value on failure.
|
| 1116 |
+ * Finally, write out the saved data in new original inode blocks. Return
|
| 1117 |
+ * replaced block count.
|
| 1118 |
*/
|
| 1119 |
static int
|
| 1120 |
move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
| 1121 |
pgoff_t orig_page_offset, int data_offset_in_page,
|
| 1122 |
- int block_len_in_page, int uninit)
|
| 1123 |
+ int block_len_in_page, int uninit, int *err)
|
| 1124 |
{
|
| 1125 |
struct inode *orig_inode = o_filp->f_dentry->d_inode;
|
| 1126 |
struct address_space *mapping = orig_inode->i_mapping;
|
| 1127 |
@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
| 1128 |
long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
|
| 1129 |
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
|
| 1130 |
unsigned int w_flags = 0;
|
| 1131 |
- unsigned int tmp_data_len, data_len;
|
| 1132 |
+ unsigned int tmp_data_size, data_size, replaced_size;
|
| 1133 |
void *fsdata;
|
| 1134 |
- int ret, i, jblocks;
|
| 1135 |
+ int i, jblocks;
|
| 1136 |
+ int err2 = 0;
|
| 1137 |
+ int replaced_count = 0;
|
| 1138 |
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
|
| 1139 |
|
| 1140 |
/*
|
| 1141 |
@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
| 1142 |
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
|
| 1143 |
handle = ext4_journal_start(orig_inode, jblocks);
|
| 1144 |
if (IS_ERR(handle)) {
|
| 1145 |
- ret = PTR_ERR(handle);
|
| 1146 |
- return ret;
|
| 1147 |
+ *err = PTR_ERR(handle);
|
| 1148 |
+ return 0;
|
| 1149 |
}
|
| 1150 |
|
| 1151 |
if (segment_eq(get_fs(), KERNEL_DS))
|
| 1152 |
@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
| 1153 |
* Just swap data blocks between orig and donor.
|
| 1154 |
*/
|
| 1155 |
if (uninit) {
|
| 1156 |
- ret = mext_replace_branches(handle, orig_inode,
|
| 1157 |
- donor_inode, orig_blk_offset,
|
| 1158 |
- block_len_in_page);
|
| 1159 |
-
|
| 1160 |
- /* Clear the inode cache not to refer to the old data */
|
| 1161 |
- ext4_ext_invalidate_cache(orig_inode);
|
| 1162 |
- ext4_ext_invalidate_cache(donor_inode);
|
| 1163 |
+ replaced_count = mext_replace_branches(handle, orig_inode,
|
| 1164 |
+ donor_inode, orig_blk_offset,
|
| 1165 |
+ block_len_in_page, err);
|
| 1166 |
goto out2;
|
| 1167 |
}
|
| 1168 |
|
| 1169 |
offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
|
| 1170 |
|
| 1171 |
- /* Calculate data_len */
|
| 1172 |
+ /* Calculate data_size */
|
| 1173 |
if ((orig_blk_offset + block_len_in_page - 1) ==
|
| 1174 |
((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
|
| 1175 |
/* Replace the last block */
|
| 1176 |
- tmp_data_len = orig_inode->i_size & (blocksize - 1);
|
| 1177 |
+ tmp_data_size = orig_inode->i_size & (blocksize - 1);
|
| 1178 |
/*
|
| 1179 |
- * If data_len equal zero, it shows data_len is multiples of
|
| 1180 |
+ * If data_size equal zero, it shows data_size is multiples of
|
| 1181 |
* blocksize. So we set appropriate value.
|
| 1182 |
*/
|
| 1183 |
- if (tmp_data_len == 0)
|
| 1184 |
- tmp_data_len = blocksize;
|
| 1185 |
+ if (tmp_data_size == 0)
|
| 1186 |
+ tmp_data_size = blocksize;
|
| 1187 |
|
| 1188 |
- data_len = tmp_data_len +
|
| 1189 |
+ data_size = tmp_data_size +
|
| 1190 |
((block_len_in_page - 1) << orig_inode->i_blkbits);
|
| 1191 |
- } else {
|
| 1192 |
- data_len = block_len_in_page << orig_inode->i_blkbits;
|
| 1193 |
- }
|
| 1194 |
+ } else
|
| 1195 |
+ data_size = block_len_in_page << orig_inode->i_blkbits;
|
| 1196 |
+
|
| 1197 |
+ replaced_size = data_size;
|
| 1198 |
|
| 1199 |
- ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
|
| 1200 |
+ *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
|
| 1201 |
&page, &fsdata);
|
| 1202 |
- if (unlikely(ret < 0))
|
| 1203 |
+ if (unlikely(*err < 0))
|
| 1204 |
goto out;
|
| 1205 |
|
| 1206 |
if (!PageUptodate(page)) {
|
| 1207 |
@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
| 1208 |
/* Release old bh and drop refs */
|
| 1209 |
try_to_release_page(page, 0);
|
| 1210 |
|
| 1211 |
- ret = mext_replace_branches(handle, orig_inode, donor_inode,
|
| 1212 |
- orig_blk_offset, block_len_in_page);
|
| 1213 |
- if (ret < 0)
|
| 1214 |
- goto out;
|
| 1215 |
-
|
| 1216 |
- /* Clear the inode cache not to refer to the old data */
|
| 1217 |
- ext4_ext_invalidate_cache(orig_inode);
|
| 1218 |
- ext4_ext_invalidate_cache(donor_inode);
|
| 1219 |
+ replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
|
| 1220 |
+ orig_blk_offset, block_len_in_page,
|
| 1221 |
+ &err2);
|
| 1222 |
+ if (err2) {
|
| 1223 |
+ if (replaced_count) {
|
| 1224 |
+ block_len_in_page = replaced_count;
|
| 1225 |
+ replaced_size =
|
| 1226 |
+ block_len_in_page << orig_inode->i_blkbits;
|
| 1227 |
+ } else
|
| 1228 |
+ goto out;
|
| 1229 |
+ }
|
| 1230 |
|
| 1231 |
if (!page_has_buffers(page))
|
| 1232 |
create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
|
| 1233 |
@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
| 1234 |
bh = bh->b_this_page;
|
| 1235 |
|
| 1236 |
for (i = 0; i < block_len_in_page; i++) {
|
| 1237 |
- ret = ext4_get_block(orig_inode,
|
| 1238 |
+ *err = ext4_get_block(orig_inode,
|
| 1239 |
(sector_t)(orig_blk_offset + i), bh, 0);
|
| 1240 |
- if (ret < 0)
|
| 1241 |
+ if (*err < 0)
|
| 1242 |
goto out;
|
| 1243 |
|
| 1244 |
if (bh->b_this_page != NULL)
|
| 1245 |
bh = bh->b_this_page;
|
| 1246 |
}
|
| 1247 |
|
| 1248 |
- ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
|
| 1249 |
+ *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
|
| 1250 |
page, fsdata);
|
| 1251 |
page = NULL;
|
| 1252 |
|
| 1253 |
@@ -951,7 +921,10 @@ out:
|
| 1254 |
out2:
|
| 1255 |
ext4_journal_stop(handle);
|
| 1256 |
|
| 1257 |
- return ret < 0 ? ret : 0;
|
| 1258 |
+ if (err2)
|
| 1259 |
+ *err = err2;
|
| 1260 |
+
|
| 1261 |
+ return replaced_count;
|
| 1262 |
}
|
| 1263 |
|
| 1264 |
/**
|
| 1265 |
@@ -962,7 +935,6 @@ out2:
|
| 1266 |
* @orig_start: logical start offset in block for orig
|
| 1267 |
* @donor_start: logical start offset in block for donor
|
| 1268 |
* @len: the number of blocks to be moved
|
| 1269 |
- * @moved_len: moved block length
|
| 1270 |
*
|
| 1271 |
* Check the arguments of ext4_move_extents() whether the files can be
|
| 1272 |
* exchanged with each other.
|
| 1273 |
@@ -970,8 +942,8 @@ out2:
|
| 1274 |
*/
|
| 1275 |
static int
|
| 1276 |
mext_check_arguments(struct inode *orig_inode,
|
| 1277 |
- struct inode *donor_inode, __u64 orig_start,
|
| 1278 |
- __u64 donor_start, __u64 *len, __u64 moved_len)
|
| 1279 |
+ struct inode *donor_inode, __u64 orig_start,
|
| 1280 |
+ __u64 donor_start, __u64 *len)
|
| 1281 |
{
|
| 1282 |
ext4_lblk_t orig_blocks, donor_blocks;
|
| 1283 |
unsigned int blkbits = orig_inode->i_blkbits;
|
| 1284 |
@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
|
| 1285 |
return -EINVAL;
|
| 1286 |
}
|
| 1287 |
|
| 1288 |
+ if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
|
| 1289 |
+ ext4_debug("ext4 move extent: suid or sgid is set"
|
| 1290 |
+ " to donor file [ino:orig %lu, donor %lu]\n",
|
| 1291 |
+ orig_inode->i_ino, donor_inode->i_ino);
|
| 1292 |
+ return -EINVAL;
|
| 1293 |
+ }
|
| 1294 |
+
|
| 1295 |
/* Ext4 move extent does not support swapfile */
|
| 1296 |
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
|
| 1297 |
ext4_debug("ext4 move extent: The argument files should "
|
| 1298 |
@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
|
| 1299 |
return -EINVAL;
|
| 1300 |
}
|
| 1301 |
|
| 1302 |
- if (moved_len) {
|
| 1303 |
- ext4_debug("ext4 move extent: moved_len should be 0 "
|
| 1304 |
- "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
|
| 1305 |
- donor_inode->i_ino);
|
| 1306 |
- return -EINVAL;
|
| 1307 |
- }
|
| 1308 |
-
|
| 1309 |
if ((orig_start > EXT_MAX_BLOCK) ||
|
| 1310 |
(donor_start > EXT_MAX_BLOCK) ||
|
| 1311 |
(*len > EXT_MAX_BLOCK) ||
|
| 1312 |
@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
| 1313 |
return -EINVAL;
|
| 1314 |
}
|
| 1315 |
|
| 1316 |
- /* protect orig and donor against a truncate */
|
| 1317 |
+ /* Protect orig and donor inodes against a truncate */
|
| 1318 |
ret1 = mext_inode_double_lock(orig_inode, donor_inode);
|
| 1319 |
if (ret1 < 0)
|
| 1320 |
return ret1;
|
| 1321 |
|
| 1322 |
- mext_double_down_read(orig_inode, donor_inode);
|
| 1323 |
+ /* Protect extent tree against block allocations via delalloc */
|
| 1324 |
+ double_down_write_data_sem(orig_inode, donor_inode);
|
| 1325 |
/* Check the filesystem environment whether move_extent can be done */
|
| 1326 |
ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
|
| 1327 |
- donor_start, &len, *moved_len);
|
| 1328 |
- mext_double_up_read(orig_inode, donor_inode);
|
| 1329 |
+ donor_start, &len);
|
| 1330 |
if (ret1)
|
| 1331 |
goto out;
|
| 1332 |
|
| 1333 |
@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
| 1334 |
seq_start = le32_to_cpu(ext_cur->ee_block);
|
| 1335 |
rest_blocks = seq_blocks;
|
| 1336 |
|
| 1337 |
- /* Discard preallocations of two inodes */
|
| 1338 |
- down_write(&EXT4_I(orig_inode)->i_data_sem);
|
| 1339 |
- ext4_discard_preallocations(orig_inode);
|
| 1340 |
- up_write(&EXT4_I(orig_inode)->i_data_sem);
|
| 1341 |
-
|
| 1342 |
- down_write(&EXT4_I(donor_inode)->i_data_sem);
|
| 1343 |
- ext4_discard_preallocations(donor_inode);
|
| 1344 |
- up_write(&EXT4_I(donor_inode)->i_data_sem);
|
| 1345 |
+ /*
|
| 1346 |
+ * Up semaphore to avoid following problems:
|
| 1347 |
+ * a. transaction deadlock among ext4_journal_start,
|
| 1348 |
+ * ->write_begin via pagefault, and jbd2_journal_commit
|
| 1349 |
+ * b. racing with ->readpage, ->write_begin, and ext4_get_block
|
| 1350 |
+ * in move_extent_per_page
|
| 1351 |
+ */
|
| 1352 |
+ double_up_write_data_sem(orig_inode, donor_inode);
|
| 1353 |
|
| 1354 |
while (orig_page_offset <= seq_end_page) {
|
| 1355 |
|
| 1356 |
/* Swap original branches with new branches */
|
| 1357 |
- ret1 = move_extent_per_page(o_filp, donor_inode,
|
| 1358 |
+ block_len_in_page = move_extent_per_page(
|
| 1359 |
+ o_filp, donor_inode,
|
| 1360 |
orig_page_offset,
|
| 1361 |
data_offset_in_page,
|
| 1362 |
- block_len_in_page, uninit);
|
| 1363 |
- if (ret1 < 0)
|
| 1364 |
- goto out;
|
| 1365 |
- orig_page_offset++;
|
| 1366 |
+ block_len_in_page, uninit,
|
| 1367 |
+ &ret1);
|
| 1368 |
+
|
| 1369 |
/* Count how many blocks we have exchanged */
|
| 1370 |
*moved_len += block_len_in_page;
|
| 1371 |
+ if (ret1 < 0)
|
| 1372 |
+ break;
|
| 1373 |
if (*moved_len > len) {
|
| 1374 |
ext4_error(orig_inode->i_sb, __func__,
|
| 1375 |
"We replaced blocks too much! "
|
| 1376 |
"sum of replaced: %llu requested: %llu",
|
| 1377 |
*moved_len, len);
|
| 1378 |
ret1 = -EIO;
|
| 1379 |
- goto out;
|
| 1380 |
+ break;
|
| 1381 |
}
|
| 1382 |
|
| 1383 |
+ orig_page_offset++;
|
| 1384 |
data_offset_in_page = 0;
|
| 1385 |
rest_blocks -= block_len_in_page;
|
| 1386 |
if (rest_blocks > blocks_per_page)
|
| 1387 |
@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
| 1388 |
block_len_in_page = rest_blocks;
|
| 1389 |
}
|
| 1390 |
|
| 1391 |
+ double_down_write_data_sem(orig_inode, donor_inode);
|
| 1392 |
+ if (ret1 < 0)
|
| 1393 |
+ break;
|
| 1394 |
+
|
| 1395 |
/* Decrease buffer counter */
|
| 1396 |
if (holecheck_path)
|
| 1397 |
ext4_ext_drop_refs(holecheck_path);
|
| 1398 |
@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
| 1399 |
|
| 1400 |
}
|
| 1401 |
out:
|
| 1402 |
+ if (*moved_len) {
|
| 1403 |
+ ext4_discard_preallocations(orig_inode);
|
| 1404 |
+ ext4_discard_preallocations(donor_inode);
|
| 1405 |
+ }
|
| 1406 |
+
|
| 1407 |
if (orig_path) {
|
| 1408 |
ext4_ext_drop_refs(orig_path);
|
| 1409 |
kfree(orig_path);
|
| 1410 |
@@ -1422,7 +1406,7 @@ out:
|
| 1411 |
ext4_ext_drop_refs(holecheck_path);
|
| 1412 |
kfree(holecheck_path);
|
| 1413 |
}
|
| 1414 |
-
|
| 1415 |
+ double_up_write_data_sem(orig_inode, donor_inode);
|
| 1416 |
ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
|
| 1417 |
|
| 1418 |
if (ret1)
|
| 1419 |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
|
| 1420 |
index 6d2c1b8..17a17e1 100644
|
| 1421 |
--- a/fs/ext4/namei.c
|
| 1422 |
+++ b/fs/ext4/namei.c
|
| 1423 |
@@ -1292,9 +1292,6 @@ errout:
|
| 1424 |
* add_dirent_to_buf will attempt search the directory block for
|
| 1425 |
* space. It will return -ENOSPC if no space is available, and -EIO
|
| 1426 |
* and -EEXIST if directory entry already exists.
|
| 1427 |
- *
|
| 1428 |
- * NOTE! bh is NOT released in the case where ENOSPC is returned. In
|
| 1429 |
- * all other cases bh is released.
|
| 1430 |
*/
|
| 1431 |
static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
| 1432 |
struct inode *inode, struct ext4_dir_entry_2 *de,
|
| 1433 |
@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
| 1434 |
top = bh->b_data + blocksize - reclen;
|
| 1435 |
while ((char *) de <= top) {
|
| 1436 |
if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
|
| 1437 |
- bh, offset)) {
|
| 1438 |
- brelse(bh);
|
| 1439 |
+ bh, offset))
|
| 1440 |
return -EIO;
|
| 1441 |
- }
|
| 1442 |
- if (ext4_match(namelen, name, de)) {
|
| 1443 |
- brelse(bh);
|
| 1444 |
+ if (ext4_match(namelen, name, de))
|
| 1445 |
return -EEXIST;
|
| 1446 |
- }
|
| 1447 |
nlen = EXT4_DIR_REC_LEN(de->name_len);
|
| 1448 |
rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
|
| 1449 |
if ((de->inode? rlen - nlen: rlen) >= reclen)
|
| 1450 |
@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
| 1451 |
err = ext4_journal_get_write_access(handle, bh);
|
| 1452 |
if (err) {
|
| 1453 |
ext4_std_error(dir->i_sb, err);
|
| 1454 |
- brelse(bh);
|
| 1455 |
return err;
|
| 1456 |
}
|
| 1457 |
|
| 1458 |
@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
| 1459 |
err = ext4_handle_dirty_metadata(handle, dir, bh);
|
| 1460 |
if (err)
|
| 1461 |
ext4_std_error(dir->i_sb, err);
|
| 1462 |
- brelse(bh);
|
| 1463 |
return 0;
|
| 1464 |
}
|
| 1465 |
|
| 1466 |
@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
|
| 1467 |
if (!(de))
|
| 1468 |
return retval;
|
| 1469 |
|
| 1470 |
- return add_dirent_to_buf(handle, dentry, inode, de, bh);
|
| 1471 |
+ retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
|
| 1472 |
+ brelse(bh);
|
| 1473 |
+ return retval;
|
| 1474 |
}
|
| 1475 |
|
| 1476 |
/*
|
| 1477 |
@@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
| 1478 |
if(!bh)
|
| 1479 |
return retval;
|
| 1480 |
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
|
| 1481 |
- if (retval != -ENOSPC)
|
| 1482 |
+ if (retval != -ENOSPC) {
|
| 1483 |
+ brelse(bh);
|
| 1484 |
return retval;
|
| 1485 |
+ }
|
| 1486 |
|
| 1487 |
if (blocks == 1 && !dx_fallback &&
|
| 1488 |
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
|
| 1489 |
@@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
| 1490 |
de = (struct ext4_dir_entry_2 *) bh->b_data;
|
| 1491 |
de->inode = 0;
|
| 1492 |
de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
|
| 1493 |
- return add_dirent_to_buf(handle, dentry, inode, de, bh);
|
| 1494 |
+ retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
|
| 1495 |
+ brelse(bh);
|
| 1496 |
+ return retval;
|
| 1497 |
}
|
| 1498 |
|
| 1499 |
/*
|
| 1500 |
@@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
| 1501 |
goto journal_error;
|
| 1502 |
|
| 1503 |
err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
|
| 1504 |
- if (err != -ENOSPC) {
|
| 1505 |
- bh = NULL;
|
| 1506 |
+ if (err != -ENOSPC)
|
| 1507 |
goto cleanup;
|
| 1508 |
- }
|
| 1509 |
|
| 1510 |
/* Block full, should compress but for now just split */
|
| 1511 |
dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
|
| 1512 |
@@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
| 1513 |
if (!de)
|
| 1514 |
goto cleanup;
|
| 1515 |
err = add_dirent_to_buf(handle, dentry, inode, de, bh);
|
| 1516 |
- bh = NULL;
|
| 1517 |
goto cleanup;
|
| 1518 |
|
| 1519 |
journal_error:
|
| 1520 |
@@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
|
| 1521 |
retry:
|
| 1522 |
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
| 1523 |
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
| 1524 |
- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
|
| 1525 |
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
| 1526 |
if (IS_ERR(handle))
|
| 1527 |
return PTR_ERR(handle);
|
| 1528 |
|
| 1529 |
@@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
|
| 1530 |
retry:
|
| 1531 |
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
| 1532 |
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
| 1533 |
- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
|
| 1534 |
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
| 1535 |
if (IS_ERR(handle))
|
| 1536 |
return PTR_ERR(handle);
|
| 1537 |
|
| 1538 |
@@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
| 1539 |
retry:
|
| 1540 |
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
| 1541 |
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
| 1542 |
- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
|
| 1543 |
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
| 1544 |
if (IS_ERR(handle))
|
| 1545 |
return PTR_ERR(handle);
|
| 1546 |
|
| 1547 |
@@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
|
| 1548 |
retry:
|
| 1549 |
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
| 1550 |
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
|
| 1551 |
- 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
|
| 1552 |
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
| 1553 |
if (IS_ERR(handle))
|
| 1554 |
return PTR_ERR(handle);
|
| 1555 |
|
| 1556 |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
|
| 1557 |
index 3cfc343..3b2c554 100644
|
| 1558 |
--- a/fs/ext4/resize.c
|
| 1559 |
+++ b/fs/ext4/resize.c
|
| 1560 |
@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
|
| 1561 |
goto exit_bh;
|
| 1562 |
|
| 1563 |
if (IS_ERR(gdb = bclean(handle, sb, block))) {
|
| 1564 |
- err = PTR_ERR(bh);
|
| 1565 |
+ err = PTR_ERR(gdb);
|
| 1566 |
goto exit_bh;
|
| 1567 |
}
|
| 1568 |
ext4_handle_dirty_metadata(handle, NULL, gdb);
|
| 1569 |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
|
| 1570 |
index d4ca92a..9ae5217 100644
|
| 1571 |
--- a/fs/ext4/super.c
|
| 1572 |
+++ b/fs/ext4/super.c
|
| 1573 |
@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
|
| 1574 |
if (sb->s_dirt)
|
| 1575 |
ext4_commit_super(sb, 1);
|
| 1576 |
|
| 1577 |
- ext4_release_system_zone(sb);
|
| 1578 |
- ext4_mb_release(sb);
|
| 1579 |
- ext4_ext_release(sb);
|
| 1580 |
- ext4_xattr_put_super(sb);
|
| 1581 |
if (sbi->s_journal) {
|
| 1582 |
err = jbd2_journal_destroy(sbi->s_journal);
|
| 1583 |
sbi->s_journal = NULL;
|
| 1584 |
@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
|
| 1585 |
ext4_abort(sb, __func__,
|
| 1586 |
"Couldn't clean up the journal");
|
| 1587 |
}
|
| 1588 |
+
|
| 1589 |
+ ext4_release_system_zone(sb);
|
| 1590 |
+ ext4_mb_release(sb);
|
| 1591 |
+ ext4_ext_release(sb);
|
| 1592 |
+ ext4_xattr_put_super(sb);
|
| 1593 |
+
|
| 1594 |
if (!(sb->s_flags & MS_RDONLY)) {
|
| 1595 |
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
|
| 1596 |
es->s_state = cpu_to_le16(sbi->s_mount_state);
|
| 1597 |
@@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
| 1598 |
spin_lock_init(&(ei->i_block_reservation_lock));
|
| 1599 |
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
|
| 1600 |
ei->cur_aio_dio = NULL;
|
| 1601 |
+ ei->i_sync_tid = 0;
|
| 1602 |
+ ei->i_datasync_tid = 0;
|
| 1603 |
|
| 1604 |
return &ei->vfs_inode;
|
| 1605 |
}
|
| 1606 |
@@ -899,6 +903,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
| 1607 |
if (test_opt(sb, NO_AUTO_DA_ALLOC))
|
| 1608 |
seq_puts(seq, ",noauto_da_alloc");
|
| 1609 |
|
| 1610 |
+ if (test_opt(sb, DISCARD))
|
| 1611 |
+ seq_puts(seq, ",discard");
|
| 1612 |
+
|
| 1613 |
+ if (test_opt(sb, NOLOAD))
|
| 1614 |
+ seq_puts(seq, ",norecovery");
|
| 1615 |
+
|
| 1616 |
ext4_show_quota_options(seq, sb);
|
| 1617 |
|
| 1618 |
return 0;
|
| 1619 |
@@ -1079,7 +1089,8 @@ enum {
|
| 1620 |
Opt_usrquota, Opt_grpquota, Opt_i_version,
|
| 1621 |
Opt_stripe, Opt_delalloc, Opt_nodelalloc,
|
| 1622 |
Opt_block_validity, Opt_noblock_validity,
|
| 1623 |
- Opt_inode_readahead_blks, Opt_journal_ioprio
|
| 1624 |
+ Opt_inode_readahead_blks, Opt_journal_ioprio,
|
| 1625 |
+ Opt_discard, Opt_nodiscard,
|
| 1626 |
};
|
| 1627 |
|
| 1628 |
static const match_table_t tokens = {
|
| 1629 |
@@ -1104,6 +1115,7 @@ static const match_table_t tokens = {
|
| 1630 |
{Opt_acl, "acl"},
|
| 1631 |
{Opt_noacl, "noacl"},
|
| 1632 |
{Opt_noload, "noload"},
|
| 1633 |
+ {Opt_noload, "norecovery"},
|
| 1634 |
{Opt_nobh, "nobh"},
|
| 1635 |
{Opt_bh, "bh"},
|
| 1636 |
{Opt_commit, "commit=%u"},
|
| 1637 |
@@ -1144,6 +1156,8 @@ static const match_table_t tokens = {
|
| 1638 |
{Opt_auto_da_alloc, "auto_da_alloc=%u"},
|
| 1639 |
{Opt_auto_da_alloc, "auto_da_alloc"},
|
| 1640 |
{Opt_noauto_da_alloc, "noauto_da_alloc"},
|
| 1641 |
+ {Opt_discard, "discard"},
|
| 1642 |
+ {Opt_nodiscard, "nodiscard"},
|
| 1643 |
{Opt_err, NULL},
|
| 1644 |
};
|
| 1645 |
|
| 1646 |
@@ -1565,6 +1579,12 @@ set_qf_format:
|
| 1647 |
else
|
| 1648 |
set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
|
| 1649 |
break;
|
| 1650 |
+ case Opt_discard:
|
| 1651 |
+ set_opt(sbi->s_mount_opt, DISCARD);
|
| 1652 |
+ break;
|
| 1653 |
+ case Opt_nodiscard:
|
| 1654 |
+ clear_opt(sbi->s_mount_opt, DISCARD);
|
| 1655 |
+ break;
|
| 1656 |
default:
|
| 1657 |
ext4_msg(sb, KERN_ERR,
|
| 1658 |
"Unrecognized mount option \"%s\" "
|
| 1659 |
@@ -1673,14 +1693,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
|
| 1660 |
size_t size;
|
| 1661 |
int i;
|
| 1662 |
|
| 1663 |
- if (!sbi->s_es->s_log_groups_per_flex) {
|
| 1664 |
+ sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
|
| 1665 |
+ groups_per_flex = 1 << sbi->s_log_groups_per_flex;
|
| 1666 |
+
|
| 1667 |
+ if (groups_per_flex < 2) {
|
| 1668 |
sbi->s_log_groups_per_flex = 0;
|
| 1669 |
return 1;
|
| 1670 |
}
|
| 1671 |
|
| 1672 |
- sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
|
| 1673 |
- groups_per_flex = 1 << sbi->s_log_groups_per_flex;
|
| 1674 |
-
|
| 1675 |
/* We allocate both existing and potentially added groups */
|
| 1676 |
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
|
| 1677 |
((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
|
| 1678 |
@@ -3668,13 +3688,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
|
| 1679 |
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
|
| 1680 |
buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
|
| 1681 |
percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
|
| 1682 |
- ext4_free_blocks_count_set(es, buf->f_bfree);
|
| 1683 |
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
|
| 1684 |
if (buf->f_bfree < ext4_r_blocks_count(es))
|
| 1685 |
buf->f_bavail = 0;
|
| 1686 |
buf->f_files = le32_to_cpu(es->s_inodes_count);
|
| 1687 |
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
|
| 1688 |
- es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
|
| 1689 |
buf->f_namelen = EXT4_NAME_LEN;
|
| 1690 |
fsid = le64_to_cpup((void *)es->s_uuid) ^
|
| 1691 |
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
|
| 1692 |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
|
| 1693 |
index fed5b01..0257019 100644
|
| 1694 |
--- a/fs/ext4/xattr.c
|
| 1695 |
+++ b/fs/ext4/xattr.c
|
| 1696 |
@@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
|
| 1697 |
if (error)
|
| 1698 |
goto cleanup;
|
| 1699 |
|
| 1700 |
+ error = ext4_journal_get_write_access(handle, is.iloc.bh);
|
| 1701 |
+ if (error)
|
| 1702 |
+ goto cleanup;
|
| 1703 |
+
|
| 1704 |
if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
|
| 1705 |
struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
|
| 1706 |
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
|
| 1707 |
@@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
|
| 1708 |
if (flags & XATTR_CREATE)
|
| 1709 |
goto cleanup;
|
| 1710 |
}
|
| 1711 |
- error = ext4_journal_get_write_access(handle, is.iloc.bh);
|
| 1712 |
- if (error)
|
| 1713 |
- goto cleanup;
|
| 1714 |
if (!value) {
|
| 1715 |
if (!is.s.not_found)
|
| 1716 |
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
|
| 1717 |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
|
| 1718 |
index d4cfd6d..8896c1d 100644
|
| 1719 |
--- a/fs/jbd2/commit.c
|
| 1720 |
+++ b/fs/jbd2/commit.c
|
| 1721 |
@@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
| 1722 |
JBUFFER_TRACE(jh, "ph3: write metadata");
|
| 1723 |
flags = jbd2_journal_write_metadata_buffer(commit_transaction,
|
| 1724 |
jh, &new_jh, blocknr);
|
| 1725 |
+ if (flags < 0) {
|
| 1726 |
+ jbd2_journal_abort(journal, flags);
|
| 1727 |
+ continue;
|
| 1728 |
+ }
|
| 1729 |
set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
|
| 1730 |
wbuf[bufs++] = jh2bh(new_jh);
|
| 1731 |
|
| 1732 |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
|
| 1733 |
index fed8538..82c295d 100644
|
| 1734 |
--- a/fs/jbd2/journal.c
|
| 1735 |
+++ b/fs/jbd2/journal.c
|
| 1736 |
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
|
| 1737 |
EXPORT_SYMBOL(jbd2_journal_ack_err);
|
| 1738 |
EXPORT_SYMBOL(jbd2_journal_clear_err);
|
| 1739 |
EXPORT_SYMBOL(jbd2_log_wait_commit);
|
| 1740 |
+EXPORT_SYMBOL(jbd2_log_start_commit);
|
| 1741 |
EXPORT_SYMBOL(jbd2_journal_start_commit);
|
| 1742 |
EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
|
| 1743 |
EXPORT_SYMBOL(jbd2_journal_wipe);
|
| 1744 |
@@ -358,6 +359,10 @@ repeat:
|
| 1745 |
|
| 1746 |
jbd_unlock_bh_state(bh_in);
|
| 1747 |
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
|
| 1748 |
+ if (!tmp) {
|
| 1749 |
+ jbd2_journal_put_journal_head(new_jh);
|
| 1750 |
+ return -ENOMEM;
|
| 1751 |
+ }
|
| 1752 |
jbd_lock_bh_state(bh_in);
|
| 1753 |
if (jh_in->b_frozen_data) {
|
| 1754 |
jbd2_free(tmp, bh_in->b_size);
|
| 1755 |
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
| 1756 |
index 75e6e60..0f67914 100644
|
| 1757 |
--- a/include/linux/sched.h
|
| 1758 |
+++ b/include/linux/sched.h
|
| 1759 |
@@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info)
|
| 1760 |
return info <= SEND_SIG_FORCED;
|
| 1761 |
}
|
| 1762 |
|
| 1763 |
-/* True if we are on the alternate signal stack. */
|
| 1764 |
-
|
| 1765 |
+/*
|
| 1766 |
+ * True if we are on the alternate signal stack.
|
| 1767 |
+ */
|
| 1768 |
static inline int on_sig_stack(unsigned long sp)
|
| 1769 |
{
|
| 1770 |
- return (sp - current->sas_ss_sp < current->sas_ss_size);
|
| 1771 |
+#ifdef CONFIG_STACK_GROWSUP
|
| 1772 |
+ return sp >= current->sas_ss_sp &&
|
| 1773 |
+ sp - current->sas_ss_sp < current->sas_ss_size;
|
| 1774 |
+#else
|
| 1775 |
+ return sp > current->sas_ss_sp &&
|
| 1776 |
+ sp - current->sas_ss_sp <= current->sas_ss_size;
|
| 1777 |
+#endif
|
| 1778 |
}
|
| 1779 |
|
| 1780 |
static inline int sas_ss_flags(unsigned long sp)
|
| 1781 |
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
|
| 1782 |
index 2cc8e8b..6856612 100644
|
| 1783 |
--- a/include/scsi/osd_protocol.h
|
| 1784 |
+++ b/include/scsi/osd_protocol.h
|
| 1785 |
@@ -17,6 +17,7 @@
|
| 1786 |
#define __OSD_PROTOCOL_H__
|
| 1787 |
|
| 1788 |
#include <linux/types.h>
|
| 1789 |
+#include <linux/kernel.h>
|
| 1790 |
#include <asm/unaligned.h>
|
| 1791 |
#include <scsi/scsi.h>
|
| 1792 |
|
| 1793 |
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
|
| 1794 |
index 47941fc..0b4baba 100644
|
| 1795 |
--- a/include/scsi/scsi_host.h
|
| 1796 |
+++ b/include/scsi/scsi_host.h
|
| 1797 |
@@ -677,6 +677,12 @@ struct Scsi_Host {
|
| 1798 |
void *shost_data;
|
| 1799 |
|
| 1800 |
/*
|
| 1801 |
+ * Points to the physical bus device we'd use to do DMA
|
| 1802 |
+ * Needed just in case we have virtual hosts.
|
| 1803 |
+ */
|
| 1804 |
+ struct device *dma_dev;
|
| 1805 |
+
|
| 1806 |
+ /*
|
| 1807 |
* We should ensure that this is aligned, both for better performance
|
| 1808 |
* and also because some compilers (m68k) don't automatically force
|
| 1809 |
* alignment to a long boundary.
|
| 1810 |
@@ -720,7 +726,9 @@ extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
|
| 1811 |
extern void scsi_flush_work(struct Scsi_Host *);
|
| 1812 |
|
| 1813 |
extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
|
| 1814 |
-extern int __must_check scsi_add_host(struct Scsi_Host *, struct device *);
|
| 1815 |
+extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
|
| 1816 |
+ struct device *,
|
| 1817 |
+ struct device *);
|
| 1818 |
extern void scsi_scan_host(struct Scsi_Host *);
|
| 1819 |
extern void scsi_rescan_device(struct device *);
|
| 1820 |
extern void scsi_remove_host(struct Scsi_Host *);
|
| 1821 |
@@ -731,6 +739,12 @@ extern const char *scsi_host_state_name(enum scsi_host_state);
|
| 1822 |
|
| 1823 |
extern u64 scsi_calculate_bounce_limit(struct Scsi_Host *);
|
| 1824 |
|
| 1825 |
+static inline int __must_check scsi_add_host(struct Scsi_Host *host,
|
| 1826 |
+ struct device *dev)
|
| 1827 |
+{
|
| 1828 |
+ return scsi_add_host_with_dma(host, dev, dev);
|
| 1829 |
+}
|
| 1830 |
+
|
| 1831 |
static inline struct device *scsi_get_device(struct Scsi_Host *shost)
|
| 1832 |
{
|
| 1833 |
return shost->shost_gendev.parent;
|