| 1 |
dsd |
7 |
diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c linux-dsd/drivers/md/dm-bbr.c |
| 2 |
|
|
--- linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c 1970-01-01 01:00:00.000000000 +0100 |
| 3 |
|
|
+++ linux-dsd/drivers/md/dm-bbr.c 2005-04-06 10:06:16.000000000 +0100 |
| 4 |
|
|
@@ -0,0 +1,1003 @@ |
| 5 |
|
|
+/* |
| 6 |
|
|
+ * (C) Copyright IBM Corp. 2002, 2004 |
| 7 |
|
|
+ * |
| 8 |
|
|
+ * This program is free software; you can redistribute it and/or modify |
| 9 |
|
|
+ * it under the terms of the GNU General Public License as published by |
| 10 |
|
|
+ * the Free Software Foundation; either version 2 of the License, or |
| 11 |
|
|
+ * (at your option) any later version. |
| 12 |
|
|
+ * |
| 13 |
|
|
+ * This program is distributed in the hope that it will be useful, |
| 14 |
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 |
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
| 16 |
|
|
+ * the GNU General Public License for more details. |
| 17 |
|
|
+ * |
| 18 |
|
|
+ * You should have received a copy of the GNU General Public License |
| 19 |
|
|
+ * along with this program; if not, write to the Free Software |
| 20 |
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 21 |
|
|
+ * |
| 22 |
|
|
+ * linux/drivers/md/dm-bbr.c |
| 23 |
|
|
+ * |
| 24 |
|
|
+ * Bad-block-relocation (BBR) target for device-mapper. |
| 25 |
|
|
+ * |
| 26 |
|
|
+ * The BBR target is designed to remap I/O write failures to another safe |
| 27 |
|
|
+ * location on disk. Note that most disk drives have BBR built into them, |
| 28 |
|
|
+ * this means that our software BBR will be only activated when all hardware |
| 29 |
|
|
+ * BBR replacement sectors have been used. |
| 30 |
|
|
+ */ |
| 31 |
|
|
+ |
| 32 |
|
|
+#include <linux/module.h> |
| 33 |
|
|
+#include <linux/init.h> |
| 34 |
|
|
+#include <linux/bio.h> |
| 35 |
|
|
+#include <linux/spinlock.h> |
| 36 |
|
|
+#include <linux/slab.h> |
| 37 |
|
|
+#include <linux/mempool.h> |
| 38 |
|
|
+#include <linux/workqueue.h> |
| 39 |
|
|
+#include <linux/vmalloc.h> |
| 40 |
|
|
+ |
| 41 |
|
|
+#include "dm.h" |
| 42 |
|
|
+#include "dm-bio-list.h" |
| 43 |
|
|
+#include "dm-bio-record.h" |
| 44 |
|
|
+#include "dm-bbr.h" |
| 45 |
|
|
+#include "dm-io.h" |
| 46 |
|
|
+ |
| 47 |
|
|
+#define SECTOR_SIZE (1 << SECTOR_SHIFT) |
| 48 |
|
|
+ |
| 49 |
|
|
+static struct workqueue_struct *dm_bbr_wq = NULL; |
| 50 |
|
|
+static void bbr_remap_handler(void *data); |
| 51 |
|
|
+static kmem_cache_t *bbr_remap_cache; |
| 52 |
|
|
+static kmem_cache_t *bbr_io_cache; |
| 53 |
|
|
+static mempool_t *bbr_io_pool; |
| 54 |
|
|
+ |
| 55 |
|
|
+/** |
| 56 |
|
|
+ * bbr_binary_tree_destroy |
| 57 |
|
|
+ * |
| 58 |
|
|
+ * Destroy the binary tree. |
| 59 |
|
|
+ **/ |
| 60 |
|
|
+static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root) |
| 61 |
|
|
+{ |
| 62 |
|
|
+ struct bbr_runtime_remap **link = NULL; |
| 63 |
|
|
+ struct bbr_runtime_remap *node = root; |
| 64 |
|
|
+ |
| 65 |
|
|
+ while (node) { |
| 66 |
|
|
+ if (node->left) { |
| 67 |
|
|
+ link = &(node->left); |
| 68 |
|
|
+ node = node->left; |
| 69 |
|
|
+ continue; |
| 70 |
|
|
+ } |
| 71 |
|
|
+ if (node->right) { |
| 72 |
|
|
+ link = &(node->right); |
| 73 |
|
|
+ node = node->right; |
| 74 |
|
|
+ continue; |
| 75 |
|
|
+ } |
| 76 |
|
|
+ |
| 77 |
|
|
+ kmem_cache_free(bbr_remap_cache, node); |
| 78 |
|
|
+ if (node == root) { |
| 79 |
|
|
+ /* If root is deleted, we're done. */ |
| 80 |
|
|
+ break; |
| 81 |
|
|
+ } |
| 82 |
|
|
+ |
| 83 |
|
|
+ /* Back to root. */ |
| 84 |
|
|
+ node = root; |
| 85 |
|
|
+ *link = NULL; |
| 86 |
|
|
+ } |
| 87 |
|
|
+} |
| 88 |
|
|
+ |
| 89 |
|
|
+static void bbr_free_remap(struct bbr_private *bbr_id) |
| 90 |
|
|
+{ |
| 91 |
|
|
+ spin_lock_irq(&bbr_id->remap_root_lock); |
| 92 |
|
|
+ bbr_binary_tree_destroy(bbr_id->remap_root); |
| 93 |
|
|
+ bbr_id->remap_root = NULL; |
| 94 |
|
|
+ spin_unlock_irq(&bbr_id->remap_root_lock); |
| 95 |
|
|
+} |
| 96 |
|
|
+ |
| 97 |
|
|
+static struct bbr_private *bbr_alloc_private(void) |
| 98 |
|
|
+{ |
| 99 |
|
|
+ struct bbr_private *bbr_id; |
| 100 |
|
|
+ |
| 101 |
|
|
+ bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL); |
| 102 |
|
|
+ if (bbr_id) { |
| 103 |
|
|
+ memset(bbr_id, 0, sizeof(*bbr_id)); |
| 104 |
|
|
+ INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id); |
| 105 |
|
|
+ bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED; |
| 106 |
|
|
+ bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED; |
| 107 |
|
|
+ bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0); |
| 108 |
|
|
+ } |
| 109 |
|
|
+ |
| 110 |
|
|
+ return bbr_id; |
| 111 |
|
|
+} |
| 112 |
|
|
+ |
| 113 |
|
|
+static void bbr_free_private(struct bbr_private *bbr_id) |
| 114 |
|
|
+{ |
| 115 |
|
|
+ if (bbr_id->bbr_table) { |
| 116 |
|
|
+ vfree(bbr_id->bbr_table); |
| 117 |
|
|
+ } |
| 118 |
|
|
+ bbr_free_remap(bbr_id); |
| 119 |
|
|
+ kfree(bbr_id); |
| 120 |
|
|
+} |
| 121 |
|
|
+ |
| 122 |
|
|
+static u32 crc_table[256]; |
| 123 |
|
|
+static u32 crc_table_built = 0; |
| 124 |
|
|
+ |
| 125 |
|
|
+static void build_crc_table(void) |
| 126 |
|
|
+{ |
| 127 |
|
|
+ u32 i, j, crc; |
| 128 |
|
|
+ |
| 129 |
|
|
+ for (i = 0; i <= 255; i++) { |
| 130 |
|
|
+ crc = i; |
| 131 |
|
|
+ for (j = 8; j > 0; j--) { |
| 132 |
|
|
+ if (crc & 1) |
| 133 |
|
|
+ crc = (crc >> 1) ^ CRC_POLYNOMIAL; |
| 134 |
|
|
+ else |
| 135 |
|
|
+ crc >>= 1; |
| 136 |
|
|
+ } |
| 137 |
|
|
+ crc_table[i] = crc; |
| 138 |
|
|
+ } |
| 139 |
|
|
+ crc_table_built = 1; |
| 140 |
|
|
+} |
| 141 |
|
|
+ |
| 142 |
|
|
+static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize) |
| 143 |
|
|
+{ |
| 144 |
|
|
+ unsigned char *current_byte; |
| 145 |
|
|
+ u32 temp1, temp2, i; |
| 146 |
|
|
+ |
| 147 |
|
|
+ current_byte = (unsigned char *) buffer; |
| 148 |
|
|
+ /* Make sure the crc table is available */ |
| 149 |
|
|
+ if (!crc_table_built) |
| 150 |
|
|
+ build_crc_table(); |
| 151 |
|
|
+ /* Process each byte in the buffer. */ |
| 152 |
|
|
+ for (i = 0; i < buffersize; i++) { |
| 153 |
|
|
+ temp1 = (crc >> 8) & 0x00FFFFFF; |
| 154 |
|
|
+ temp2 = crc_table[(crc ^ (u32) * current_byte) & |
| 155 |
|
|
+ (u32) 0xff]; |
| 156 |
|
|
+ current_byte++; |
| 157 |
|
|
+ crc = temp1 ^ temp2; |
| 158 |
|
|
+ } |
| 159 |
|
|
+ return crc; |
| 160 |
|
|
+} |
| 161 |
|
|
+ |
| 162 |
|
|
+/** |
| 163 |
|
|
+ * le_bbr_table_sector_to_cpu |
| 164 |
|
|
+ * |
| 165 |
|
|
+ * Convert bbr meta data from on-disk (LE) format |
| 166 |
|
|
+ * to the native cpu endian format. |
| 167 |
|
|
+ **/ |
| 168 |
|
|
+static void le_bbr_table_sector_to_cpu(struct bbr_table *p) |
| 169 |
|
|
+{ |
| 170 |
|
|
+ int i; |
| 171 |
|
|
+ p->signature = le32_to_cpup(&p->signature); |
| 172 |
|
|
+ p->crc = le32_to_cpup(&p->crc); |
| 173 |
|
|
+ p->sequence_number = le32_to_cpup(&p->sequence_number); |
| 174 |
|
|
+ p->in_use_cnt = le32_to_cpup(&p->in_use_cnt); |
| 175 |
|
|
+ for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) { |
| 176 |
|
|
+ p->entries[i].bad_sect = |
| 177 |
|
|
+ le64_to_cpup(&p->entries[i].bad_sect); |
| 178 |
|
|
+ p->entries[i].replacement_sect = |
| 179 |
|
|
+ le64_to_cpup(&p->entries[i].replacement_sect); |
| 180 |
|
|
+ } |
| 181 |
|
|
+} |
| 182 |
|
|
+ |
| 183 |
|
|
+/** |
| 184 |
|
|
+ * cpu_bbr_table_sector_to_le |
| 185 |
|
|
+ * |
| 186 |
|
|
+ * Convert bbr meta data from cpu endian format to on-disk (LE) format |
| 187 |
|
|
+ **/ |
| 188 |
|
|
+static void cpu_bbr_table_sector_to_le(struct bbr_table *p, |
| 189 |
|
|
+ struct bbr_table *le) |
| 190 |
|
|
+{ |
| 191 |
|
|
+ int i; |
| 192 |
|
|
+ le->signature = cpu_to_le32p(&p->signature); |
| 193 |
|
|
+ le->crc = cpu_to_le32p(&p->crc); |
| 194 |
|
|
+ le->sequence_number = cpu_to_le32p(&p->sequence_number); |
| 195 |
|
|
+ le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt); |
| 196 |
|
|
+ for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) { |
| 197 |
|
|
+ le->entries[i].bad_sect = |
| 198 |
|
|
+ cpu_to_le64p(&p->entries[i].bad_sect); |
| 199 |
|
|
+ le->entries[i].replacement_sect = |
| 200 |
|
|
+ cpu_to_le64p(&p->entries[i].replacement_sect); |
| 201 |
|
|
+ } |
| 202 |
|
|
+} |
| 203 |
|
|
+ |
| 204 |
|
|
+/** |
| 205 |
|
|
+ * validate_bbr_table_sector |
| 206 |
|
|
+ * |
| 207 |
|
|
+ * Check the specified BBR table sector for a valid signature and CRC. If it's |
| 208 |
|
|
+ * valid, endian-convert the table sector. |
| 209 |
|
|
+ **/ |
| 210 |
|
|
+static int validate_bbr_table_sector(struct bbr_table *p) |
| 211 |
|
|
+{ |
| 212 |
|
|
+ int rc = 0; |
| 213 |
|
|
+ int org_crc, final_crc; |
| 214 |
|
|
+ |
| 215 |
|
|
+ if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) { |
| 216 |
|
|
+ DMERR("dm-bbr: BBR table signature doesn't match!"); |
| 217 |
|
|
+ DMERR("dm-bbr: Found 0x%x. Expecting 0x%x", |
| 218 |
|
|
+ le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE); |
| 219 |
|
|
+ rc = -EINVAL; |
| 220 |
|
|
+ goto out; |
| 221 |
|
|
+ } |
| 222 |
|
|
+ |
| 223 |
|
|
+ if (!p->crc) { |
| 224 |
|
|
+ DMERR("dm-bbr: BBR table sector has no CRC!"); |
| 225 |
|
|
+ rc = -EINVAL; |
| 226 |
|
|
+ goto out; |
| 227 |
|
|
+ } |
| 228 |
|
|
+ |
| 229 |
|
|
+ org_crc = le32_to_cpup(&p->crc); |
| 230 |
|
|
+ p->crc = 0; |
| 231 |
|
|
+ final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p)); |
| 232 |
|
|
+ if (final_crc != org_crc) { |
| 233 |
|
|
+ DMERR("dm-bbr: CRC failed!"); |
| 234 |
|
|
+ DMERR("dm-bbr: Found 0x%x. Expecting 0x%x", |
| 235 |
|
|
+ org_crc, final_crc); |
| 236 |
|
|
+ rc = -EINVAL; |
| 237 |
|
|
+ goto out; |
| 238 |
|
|
+ } |
| 239 |
|
|
+ |
| 240 |
|
|
+ p->crc = cpu_to_le32p(&org_crc); |
| 241 |
|
|
+ le_bbr_table_sector_to_cpu(p); |
| 242 |
|
|
+ |
| 243 |
|
|
+out: |
| 244 |
|
|
+ return rc; |
| 245 |
|
|
+} |
| 246 |
|
|
+ |
| 247 |
|
|
+/** |
| 248 |
|
|
+ * bbr_binary_tree_insert |
| 249 |
|
|
+ * |
| 250 |
|
|
+ * Insert a node into the binary tree. |
| 251 |
|
|
+ **/ |
| 252 |
|
|
+static void bbr_binary_tree_insert(struct bbr_runtime_remap **root, |
| 253 |
|
|
+ struct bbr_runtime_remap *newnode) |
| 254 |
|
|
+{ |
| 255 |
|
|
+ struct bbr_runtime_remap **node = root; |
| 256 |
|
|
+ while (node && *node) { |
| 257 |
|
|
+ if (newnode->remap.bad_sect > (*node)->remap.bad_sect) { |
| 258 |
|
|
+ node = &((*node)->right); |
| 259 |
|
|
+ } else { |
| 260 |
|
|
+ node = &((*node)->left); |
| 261 |
|
|
+ } |
| 262 |
|
|
+ } |
| 263 |
|
|
+ |
| 264 |
|
|
+ newnode->left = newnode->right = NULL; |
| 265 |
|
|
+ *node = newnode; |
| 266 |
|
|
+} |
| 267 |
|
|
+ |
| 268 |
|
|
+/** |
| 269 |
|
|
+ * bbr_binary_search |
| 270 |
|
|
+ * |
| 271 |
|
|
+ * Search for a node that contains bad_sect == lsn. |
| 272 |
|
|
+ **/ |
| 273 |
|
|
+static struct bbr_runtime_remap *bbr_binary_search( |
| 274 |
|
|
+ struct bbr_runtime_remap *root, |
| 275 |
|
|
+ u64 lsn) |
| 276 |
|
|
+{ |
| 277 |
|
|
+ struct bbr_runtime_remap *node = root; |
| 278 |
|
|
+ while (node) { |
| 279 |
|
|
+ if (node->remap.bad_sect == lsn) { |
| 280 |
|
|
+ break; |
| 281 |
|
|
+ } |
| 282 |
|
|
+ if (lsn > node->remap.bad_sect) { |
| 283 |
|
|
+ node = node->right; |
| 284 |
|
|
+ } else { |
| 285 |
|
|
+ node = node->left; |
| 286 |
|
|
+ } |
| 287 |
|
|
+ } |
| 288 |
|
|
+ return node; |
| 289 |
|
|
+} |
| 290 |
|
|
+ |
| 291 |
|
|
+/** |
| 292 |
|
|
+ * bbr_insert_remap_entry |
| 293 |
|
|
+ * |
| 294 |
|
|
+ * Create a new remap entry and add it to the binary tree for this node. |
| 295 |
|
|
+ **/ |
| 296 |
|
|
+static int bbr_insert_remap_entry(struct bbr_private *bbr_id, |
| 297 |
|
|
+ struct bbr_table_entry *new_bbr_entry) |
| 298 |
|
|
+{ |
| 299 |
|
|
+ struct bbr_runtime_remap *newnode; |
| 300 |
|
|
+ |
| 301 |
|
|
+ newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO); |
| 302 |
|
|
+ if (!newnode) { |
| 303 |
|
|
+ DMERR("dm-bbr: Could not allocate from remap cache!"); |
| 304 |
|
|
+ return -ENOMEM; |
| 305 |
|
|
+ } |
| 306 |
|
|
+ newnode->remap.bad_sect = new_bbr_entry->bad_sect; |
| 307 |
|
|
+ newnode->remap.replacement_sect = new_bbr_entry->replacement_sect; |
| 308 |
|
|
+ spin_lock_irq(&bbr_id->remap_root_lock); |
| 309 |
|
|
+ bbr_binary_tree_insert(&bbr_id->remap_root, newnode); |
| 310 |
|
|
+ spin_unlock_irq(&bbr_id->remap_root_lock); |
| 311 |
|
|
+ return 0; |
| 312 |
|
|
+} |
| 313 |
|
|
+ |
| 314 |
|
|
+/** |
| 315 |
|
|
+ * bbr_table_to_remap_list |
| 316 |
|
|
+ * |
| 317 |
|
|
+ * The on-disk bbr table is sorted by the replacement sector LBA. In order to |
| 318 |
|
|
+ * improve run time performance, the in memory remap list must be sorted by |
| 319 |
|
|
+ * the bad sector LBA. This function is called at discovery time to initialize |
| 320 |
|
|
+ * the remap list. This function assumes that at least one copy of meta data |
| 321 |
|
|
+ * is valid. |
| 322 |
|
|
+ **/ |
| 323 |
|
|
+static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id) |
| 324 |
|
|
+{ |
| 325 |
|
|
+ u32 in_use_blks = 0; |
| 326 |
|
|
+ int i, j; |
| 327 |
|
|
+ struct bbr_table *p; |
| 328 |
|
|
+ |
| 329 |
|
|
+ for (i = 0, p = bbr_id->bbr_table; |
| 330 |
|
|
+ i < bbr_id->nr_sects_bbr_table; |
| 331 |
|
|
+ i++, p++) { |
| 332 |
|
|
+ if (!p->in_use_cnt) { |
| 333 |
|
|
+ break; |
| 334 |
|
|
+ } |
| 335 |
|
|
+ in_use_blks += p->in_use_cnt; |
| 336 |
|
|
+ for (j = 0; j < p->in_use_cnt; j++) { |
| 337 |
|
|
+ bbr_insert_remap_entry(bbr_id, &p->entries[j]); |
| 338 |
|
|
+ } |
| 339 |
|
|
+ } |
| 340 |
|
|
+ if (in_use_blks) { |
| 341 |
|
|
+ char b[32]; |
| 342 |
|
|
+ DMWARN("dm-bbr: There are %u BBR entries for device %s", |
| 343 |
|
|
+ in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev)); |
| 344 |
|
|
+ } |
| 345 |
|
|
+ |
| 346 |
|
|
+ return in_use_blks; |
| 347 |
|
|
+} |
| 348 |
|
|
+ |
| 349 |
|
|
+/** |
| 350 |
|
|
+ * bbr_search_remap_entry |
| 351 |
|
|
+ * |
| 352 |
|
|
+ * Search remap entry for the specified sector. If found, return a pointer to |
| 353 |
|
|
+ * the table entry. Otherwise, return NULL. |
| 354 |
|
|
+ **/ |
| 355 |
|
|
+static struct bbr_table_entry *bbr_search_remap_entry( |
| 356 |
|
|
+ struct bbr_private *bbr_id, |
| 357 |
|
|
+ u64 lsn) |
| 358 |
|
|
+{ |
| 359 |
|
|
+ struct bbr_runtime_remap *p; |
| 360 |
|
|
+ |
| 361 |
|
|
+ spin_lock_irq(&bbr_id->remap_root_lock); |
| 362 |
|
|
+ p = bbr_binary_search(bbr_id->remap_root, lsn); |
| 363 |
|
|
+ spin_unlock_irq(&bbr_id->remap_root_lock); |
| 364 |
|
|
+ if (p) { |
| 365 |
|
|
+ return (&p->remap); |
| 366 |
|
|
+ } else { |
| 367 |
|
|
+ return NULL; |
| 368 |
|
|
+ } |
| 369 |
|
|
+} |
| 370 |
|
|
+ |
| 371 |
|
|
+/** |
| 372 |
|
|
+ * bbr_remap |
| 373 |
|
|
+ * |
| 374 |
|
|
+ * If *lsn is in the remap table, return TRUE and modify *lsn, |
| 375 |
|
|
+ * else, return FALSE. |
| 376 |
|
|
+ **/ |
| 377 |
|
|
+static inline int bbr_remap(struct bbr_private *bbr_id, |
| 378 |
|
|
+ u64 *lsn) |
| 379 |
|
|
+{ |
| 380 |
|
|
+ struct bbr_table_entry *e; |
| 381 |
|
|
+ |
| 382 |
|
|
+ if (atomic_read(&bbr_id->in_use_replacement_blks)) { |
| 383 |
|
|
+ e = bbr_search_remap_entry(bbr_id, *lsn); |
| 384 |
|
|
+ if (e) { |
| 385 |
|
|
+ *lsn = e->replacement_sect; |
| 386 |
|
|
+ return 1; |
| 387 |
|
|
+ } |
| 388 |
|
|
+ } |
| 389 |
|
|
+ return 0; |
| 390 |
|
|
+} |
| 391 |
|
|
+ |
| 392 |
|
|
+/** |
| 393 |
|
|
+ * bbr_remap_probe |
| 394 |
|
|
+ * |
| 395 |
|
|
+ * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap |
| 396 |
|
|
+ * table return TRUE, Else, return FALSE. |
| 397 |
|
|
+ **/ |
| 398 |
|
|
+static inline int bbr_remap_probe(struct bbr_private *bbr_id, |
| 399 |
|
|
+ u64 lsn, u64 nr_sects) |
| 400 |
|
|
+{ |
| 401 |
|
|
+ u64 tmp, cnt; |
| 402 |
|
|
+ |
| 403 |
|
|
+ if (atomic_read(&bbr_id->in_use_replacement_blks)) { |
| 404 |
|
|
+ for (cnt = 0, tmp = lsn; |
| 405 |
|
|
+ cnt < nr_sects; |
| 406 |
|
|
+ cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) { |
| 407 |
|
|
+ if (bbr_remap(bbr_id,&tmp)) { |
| 408 |
|
|
+ return 1; |
| 409 |
|
|
+ } |
| 410 |
|
|
+ } |
| 411 |
|
|
+ } |
| 412 |
|
|
+ return 0; |
| 413 |
|
|
+} |
| 414 |
|
|
+ |
| 415 |
|
|
+/** |
| 416 |
|
|
+ * bbr_setup |
| 417 |
|
|
+ * |
| 418 |
|
|
+ * Read the remap tables from disk and set up the initial remap tree. |
| 419 |
|
|
+ **/ |
| 420 |
|
|
+static int bbr_setup(struct bbr_private *bbr_id) |
| 421 |
|
|
+{ |
| 422 |
|
|
+ struct bbr_table *table = bbr_id->bbr_table; |
| 423 |
|
|
+ struct io_region job; |
| 424 |
|
|
+ unsigned long error; |
| 425 |
|
|
+ int i, rc = 0; |
| 426 |
|
|
+ |
| 427 |
|
|
+ job.bdev = bbr_id->dev->bdev; |
| 428 |
|
|
+ job.count = 1; |
| 429 |
|
|
+ |
| 430 |
|
|
+ /* Read and verify each BBR table sector individually. */ |
| 431 |
|
|
+ for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) { |
| 432 |
|
|
+ job.sector = bbr_id->lba_table1 + i; |
| 433 |
|
|
+ rc = dm_io_sync_vm(1, &job, READ, table, &error); |
| 434 |
|
|
+ if (rc && bbr_id->lba_table2) { |
| 435 |
|
|
+ job.sector = bbr_id->lba_table2 + i; |
| 436 |
|
|
+ rc = dm_io_sync_vm(1, &job, READ, table, &error); |
| 437 |
|
|
+ } |
| 438 |
|
|
+ if (rc) { |
| 439 |
|
|
+ goto out; |
| 440 |
|
|
+ } |
| 441 |
|
|
+ |
| 442 |
|
|
+ rc = validate_bbr_table_sector(table); |
| 443 |
|
|
+ if (rc) { |
| 444 |
|
|
+ goto out; |
| 445 |
|
|
+ } |
| 446 |
|
|
+ } |
| 447 |
|
|
+ atomic_set(&bbr_id->in_use_replacement_blks, |
| 448 |
|
|
+ bbr_table_to_remap_list(bbr_id)); |
| 449 |
|
|
+ |
| 450 |
|
|
+out: |
| 451 |
|
|
+ if (rc) { |
| 452 |
|
|
+ DMERR("dm-bbr: error during device setup: %d", rc); |
| 453 |
|
|
+ } |
| 454 |
|
|
+ return rc; |
| 455 |
|
|
+} |
| 456 |
|
|
+ |
| 457 |
|
|
+/** |
| 458 |
|
|
+ * bbr_io_remap_error |
| 459 |
|
|
+ * @bbr_id: Private data for the BBR node. |
| 460 |
|
|
+ * @rw: READ or WRITE. |
| 461 |
|
|
+ * @starting_lsn: Starting sector of request to remap. |
| 462 |
|
|
+ * @count: Number of sectors in the request. |
| 463 |
|
|
+ * @page: Page containing the data for the request. |
| 464 |
|
|
+ * @offset: Byte-offset of the data within the page. |
| 465 |
|
|
+ * |
| 466 |
|
|
+ * For the requested range, try to write each sector individually. For each |
| 467 |
|
|
+ * sector that fails, find the next available remap location and write the |
| 468 |
|
|
+ * data to that new location. Then update the table and write both copies |
| 469 |
|
|
+ * of the table to disk. Finally, update the in-memory mapping and do any |
| 470 |
|
|
+ * other necessary bookkeeping. |
| 471 |
|
|
+ **/ |
| 472 |
|
|
+static int bbr_io_remap_error(struct bbr_private *bbr_id, |
| 473 |
|
|
+ int rw, |
| 474 |
|
|
+ u64 starting_lsn, |
| 475 |
|
|
+ u64 count, |
| 476 |
|
|
+ struct page *page, |
| 477 |
|
|
+ unsigned int offset) |
| 478 |
|
|
+{ |
| 479 |
|
|
+ struct bbr_table *bbr_table; |
| 480 |
|
|
+ struct io_region job; |
| 481 |
|
|
+ struct page_list pl; |
| 482 |
|
|
+ unsigned long table_sector_index; |
| 483 |
|
|
+ unsigned long table_sector_offset; |
| 484 |
|
|
+ unsigned long index; |
| 485 |
|
|
+ unsigned long error; |
| 486 |
|
|
+ u64 lsn, new_lsn; |
| 487 |
|
|
+ char b[32]; |
| 488 |
|
|
+ int rc; |
| 489 |
|
|
+ |
| 490 |
|
|
+ job.bdev = bbr_id->dev->bdev; |
| 491 |
|
|
+ job.count = 1; |
| 492 |
|
|
+ pl.page = page; |
| 493 |
|
|
+ pl.next = NULL; |
| 494 |
|
|
+ |
| 495 |
|
|
+ /* For each sector in the request. */ |
| 496 |
|
|
+ for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) { |
| 497 |
|
|
+ job.sector = starting_lsn + lsn; |
| 498 |
|
|
+ rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
| 499 |
|
|
+ while (rc) { |
| 500 |
|
|
+ /* Find the next available relocation sector. */ |
| 501 |
|
|
+ new_lsn = atomic_read(&bbr_id->in_use_replacement_blks); |
| 502 |
|
|
+ if (new_lsn >= bbr_id->nr_replacement_blks) { |
| 503 |
|
|
+ /* No more replacement sectors available. */ |
| 504 |
|
|
+ return -EIO; |
| 505 |
|
|
+ } |
| 506 |
|
|
+ new_lsn += bbr_id->start_replacement_sect; |
| 507 |
|
|
+ |
| 508 |
|
|
+ /* Write the data to its new location. */ |
| 509 |
|
|
+ DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64, |
| 510 |
|
|
+ format_dev_t(b, bbr_id->dev->bdev->bd_dev), |
| 511 |
|
|
+ starting_lsn + lsn, new_lsn); |
| 512 |
|
|
+ job.sector = new_lsn; |
| 513 |
|
|
+ rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
| 514 |
|
|
+ if (rc) { |
| 515 |
|
|
+ /* This replacement sector is bad. |
| 516 |
|
|
+ * Try the next one. |
| 517 |
|
|
+ */ |
| 518 |
|
|
+ DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.", |
| 519 |
|
|
+ format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn); |
| 520 |
|
|
+ atomic_inc(&bbr_id->in_use_replacement_blks); |
| 521 |
|
|
+ continue; |
| 522 |
|
|
+ } |
| 523 |
|
|
+ |
| 524 |
|
|
+ /* Add this new entry to the on-disk table. */ |
| 525 |
|
|
+ table_sector_index = new_lsn - |
| 526 |
|
|
+ bbr_id->start_replacement_sect; |
| 527 |
|
|
+ table_sector_offset = table_sector_index / |
| 528 |
|
|
+ BBR_ENTRIES_PER_SECT; |
| 529 |
|
|
+ index = table_sector_index % BBR_ENTRIES_PER_SECT; |
| 530 |
|
|
+ |
| 531 |
|
|
+ bbr_table = &bbr_id->bbr_table[table_sector_offset]; |
| 532 |
|
|
+ bbr_table->entries[index].bad_sect = starting_lsn + lsn; |
| 533 |
|
|
+ bbr_table->entries[index].replacement_sect = new_lsn; |
| 534 |
|
|
+ bbr_table->in_use_cnt++; |
| 535 |
|
|
+ bbr_table->sequence_number++; |
| 536 |
|
|
+ bbr_table->crc = 0; |
| 537 |
|
|
+ bbr_table->crc = calculate_crc(INITIAL_CRC, |
| 538 |
|
|
+ bbr_table, |
| 539 |
|
|
+ sizeof(struct bbr_table)); |
| 540 |
|
|
+ |
| 541 |
|
|
+ /* Write the table to disk. */ |
| 542 |
|
|
+ cpu_bbr_table_sector_to_le(bbr_table, bbr_table); |
| 543 |
|
|
+ if (bbr_id->lba_table1) { |
| 544 |
|
|
+ job.sector = bbr_id->lba_table1 + table_sector_offset; |
| 545 |
|
|
+ rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error); |
| 546 |
|
|
+ } |
| 547 |
|
|
+ if (bbr_id->lba_table2) { |
| 548 |
|
|
+ job.sector = bbr_id->lba_table2 + table_sector_offset; |
| 549 |
|
|
+ rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error); |
| 550 |
|
|
+ } |
| 551 |
|
|
+ le_bbr_table_sector_to_cpu(bbr_table); |
| 552 |
|
|
+ |
| 553 |
|
|
+ if (rc) { |
| 554 |
|
|
+ /* Error writing one of the tables to disk. */ |
| 555 |
|
|
+ DMERR("dm-bbr: device %s: error updating BBR tables on disk.", |
| 556 |
|
|
+ format_dev_t(b, bbr_id->dev->bdev->bd_dev)); |
| 557 |
|
|
+ return rc; |
| 558 |
|
|
+ } |
| 559 |
|
|
+ |
| 560 |
|
|
+ /* Insert a new entry in the remapping binary-tree. */ |
| 561 |
|
|
+ rc = bbr_insert_remap_entry(bbr_id, |
| 562 |
|
|
+ &bbr_table->entries[index]); |
| 563 |
|
|
+ if (rc) { |
| 564 |
|
|
+ DMERR("dm-bbr: device %s: error adding new entry to remap tree.", |
| 565 |
|
|
+ format_dev_t(b, bbr_id->dev->bdev->bd_dev)); |
| 566 |
|
|
+ return rc; |
| 567 |
|
|
+ } |
| 568 |
|
|
+ |
| 569 |
|
|
+ atomic_inc(&bbr_id->in_use_replacement_blks); |
| 570 |
|
|
+ } |
| 571 |
|
|
+ } |
| 572 |
|
|
+ |
| 573 |
|
|
+ return 0; |
| 574 |
|
|
+} |
| 575 |
|
|
+ |
| 576 |
|
|
+/** |
| 577 |
|
|
+ * bbr_io_process_request |
| 578 |
|
|
+ * |
| 579 |
|
|
+ * For each sector in this request, check if the sector has already |
| 580 |
|
|
+ * been remapped. If so, process all previous sectors in the request, |
| 581 |
|
|
+ * followed by the remapped sector. Then reset the starting lsn and |
| 582 |
|
|
+ * count, and keep going with the rest of the request as if it were |
| 583 |
|
|
+ * a whole new request. If any of the sync_io's return an error, |
| 584 |
|
|
+ * call the remapper to relocate the bad sector(s). |
| 585 |
|
|
+ * |
| 586 |
|
|
+ * 2.5 Note: When switching over to bio's for the I/O path, we have made |
| 587 |
|
|
+ * the assumption that the I/O request described by the bio is one |
| 588 |
|
|
+ * virtually contiguous piece of memory (even though the bio vector |
| 589 |
|
|
+ * describes it using a series of physical page addresses). |
| 590 |
|
|
+ **/ |
| 591 |
|
|
+static int bbr_io_process_request(struct bbr_private *bbr_id, |
| 592 |
|
|
+ struct bio *bio) |
| 593 |
|
|
+{ |
| 594 |
|
|
+ struct io_region job; |
| 595 |
|
|
+ u64 starting_lsn = bio->bi_sector; |
| 596 |
|
|
+ u64 count, lsn, remapped_lsn; |
| 597 |
|
|
+ struct page_list pl; |
| 598 |
|
|
+ unsigned int offset; |
| 599 |
|
|
+ unsigned long error; |
| 600 |
|
|
+ int i, rw = bio_data_dir(bio); |
| 601 |
|
|
+ int rc = 0; |
| 602 |
|
|
+ |
| 603 |
|
|
+ job.bdev = bbr_id->dev->bdev; |
| 604 |
|
|
+ pl.next = NULL; |
| 605 |
|
|
+ |
| 606 |
|
|
+ /* Each bio can contain multiple vectors, each with a different page. |
| 607 |
|
|
+ * Treat each vector as a separate request. |
| 608 |
|
|
+ */ |
| 609 |
|
|
+ /* KMC: Is this the right way to walk the bvec list? */ |
| 610 |
|
|
+ for (i = 0; |
| 611 |
|
|
+ i < bio->bi_vcnt; |
| 612 |
|
|
+ i++, bio->bi_idx++, starting_lsn += count) { |
| 613 |
|
|
+ |
| 614 |
|
|
+ /* Bvec info: number of sectors, page, |
| 615 |
|
|
+ * and byte-offset within page. |
| 616 |
|
|
+ */ |
| 617 |
|
|
+ count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT; |
| 618 |
|
|
+ pl.page = bio_iovec(bio)->bv_page; |
| 619 |
|
|
+ offset = bio_iovec(bio)->bv_offset; |
| 620 |
|
|
+ |
| 621 |
|
|
+ /* For each sector in this bvec, check if the sector has |
| 622 |
|
|
+ * already been remapped. If so, process all previous sectors |
| 623 |
|
|
+ * in this request, followed by the remapped sector. Then reset |
| 624 |
|
|
+ * the starting lsn and count and keep going with the rest of |
| 625 |
|
|
+ * the request as if it were a whole new request. |
| 626 |
|
|
+ */ |
| 627 |
|
|
+ for (lsn = 0; lsn < count; lsn++) { |
| 628 |
|
|
+ remapped_lsn = starting_lsn + lsn; |
| 629 |
|
|
+ rc = bbr_remap(bbr_id, &remapped_lsn); |
| 630 |
|
|
+ if (!rc) { |
| 631 |
|
|
+ /* This sector is fine. */ |
| 632 |
|
|
+ continue; |
| 633 |
|
|
+ } |
| 634 |
|
|
+ |
| 635 |
|
|
+ /* Process all sectors in the request up to this one. */ |
| 636 |
|
|
+ if (lsn > 0) { |
| 637 |
|
|
+ job.sector = starting_lsn; |
| 638 |
|
|
+ job.count = lsn; |
| 639 |
|
|
+ rc = dm_io_sync(1, &job, rw, &pl, |
| 640 |
|
|
+ offset, &error); |
| 641 |
|
|
+ if (rc) { |
| 642 |
|
|
+ /* If this I/O failed, then one of the |
| 643 |
|
|
+ * sectors in this request needs to be |
| 644 |
|
|
+ * relocated. |
| 645 |
|
|
+ */ |
| 646 |
|
|
+ rc = bbr_io_remap_error(bbr_id, rw, |
| 647 |
|
|
+ starting_lsn, |
| 648 |
|
|
+ lsn, pl.page, |
| 649 |
|
|
+ offset); |
| 650 |
|
|
+ if (rc) { |
| 651 |
|
|
+ /* KMC: Return? Or continue to next bvec? */ |
| 652 |
|
|
+ return rc; |
| 653 |
|
|
+ } |
| 654 |
|
|
+ } |
| 655 |
|
|
+ offset += (lsn << SECTOR_SHIFT); |
| 656 |
|
|
+ } |
| 657 |
|
|
+ |
| 658 |
|
|
+ /* Process the remapped sector. */ |
| 659 |
|
|
+ job.sector = remapped_lsn; |
| 660 |
|
|
+ job.count = 1; |
| 661 |
|
|
+ rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
| 662 |
|
|
+ if (rc) { |
| 663 |
|
|
+ /* BUGBUG - Need more processing if this caused |
| 664 |
|
|
+ * an error. If this I/O failed, then the |
| 665 |
|
|
+ * existing remap is now bad, and we need to |
| 666 |
|
|
+ * find a new remap. Can't use |
| 667 |
|
|
+ * bbr_io_remap_error(), because the existing |
| 668 |
|
|
+ * map entry needs to be changed, not added |
| 669 |
|
|
+ * again, and the original table entry also |
| 670 |
|
|
+ * needs to be changed. |
| 671 |
|
|
+ */ |
| 672 |
|
|
+ return rc; |
| 673 |
|
|
+ } |
| 674 |
|
|
+ |
| 675 |
|
|
+ starting_lsn += (lsn + 1); |
| 676 |
|
|
+ count -= (lsn + 1); |
| 677 |
|
|
+ lsn = -1; |
| 678 |
|
|
+ offset += SECTOR_SIZE; |
| 679 |
|
|
+ } |
| 680 |
|
|
+ |
| 681 |
|
|
+ /* Check for any remaining sectors after the last split. This |
| 682 |
|
|
+ * could potentially be the whole request, but that should be a |
| 683 |
|
|
+ * rare case because requests should only be processed by the |
| 684 |
|
|
+ * thread if we know an error occurred or they contained one or |
| 685 |
|
|
+ * more remapped sectors. |
| 686 |
|
|
+ */ |
| 687 |
|
|
+ if (count) { |
| 688 |
|
|
+ job.sector = starting_lsn; |
| 689 |
|
|
+ job.count = count; |
| 690 |
|
|
+ rc = dm_io_sync(1, &job, rw, &pl, offset, &error); |
| 691 |
|
|
+ if (rc) { |
| 692 |
|
|
+ /* If this I/O failed, then one of the sectors |
| 693 |
|
|
+ * in this request needs to be relocated. |
| 694 |
|
|
+ */ |
| 695 |
|
|
+ rc = bbr_io_remap_error(bbr_id, rw, starting_lsn, |
| 696 |
|
|
+ count, pl.page, offset); |
| 697 |
|
|
+ if (rc) { |
| 698 |
|
|
+ /* KMC: Return? Or continue to next bvec? */ |
| 699 |
|
|
+ return rc; |
| 700 |
|
|
+ } |
| 701 |
|
|
+ } |
| 702 |
|
|
+ } |
| 703 |
|
|
+ } |
| 704 |
|
|
+ |
| 705 |
|
|
+ return 0; |
| 706 |
|
|
+} |
| 707 |
|
|
+ |
| 708 |
|
|
+static void bbr_io_process_requests(struct bbr_private *bbr_id, |
| 709 |
|
|
+ struct bio *bio) |
| 710 |
|
|
+{ |
| 711 |
|
|
+ struct bio *next; |
| 712 |
|
|
+ int rc; |
| 713 |
|
|
+ |
| 714 |
|
|
+ while (bio) { |
| 715 |
|
|
+ next = bio->bi_next; |
| 716 |
|
|
+ bio->bi_next = NULL; |
| 717 |
|
|
+ |
| 718 |
|
|
+ rc = bbr_io_process_request(bbr_id, bio); |
| 719 |
|
|
+ |
| 720 |
|
|
+ bio_endio(bio, bio->bi_size, rc); |
| 721 |
|
|
+ |
| 722 |
|
|
+ bio = next; |
| 723 |
|
|
+ } |
| 724 |
|
|
+} |
| 725 |
|
|
+ |
| 726 |
|
|
+/** |
| 727 |
|
|
+ * bbr_remap_handler |
| 728 |
|
|
+ * |
| 729 |
|
|
+ * This is the handler for the bbr work-queue. |
| 730 |
|
|
+ * |
| 731 |
|
|
+ * I/O requests should only be sent to this handler if we know that: |
| 732 |
|
|
+ * a) the request contains at least one remapped sector. |
| 733 |
|
|
+ * or |
| 734 |
|
|
+ * b) the request caused an error on the normal I/O path. |
| 735 |
|
|
+ * |
| 736 |
|
|
+ * This function uses synchronous I/O, so sending a request to this |
| 737 |
|
|
+ * thread that doesn't need special processing will cause severe |
| 738 |
|
|
+ * performance degredation. |
| 739 |
|
|
+ **/ |
| 740 |
|
|
+static void bbr_remap_handler(void *data) |
| 741 |
|
|
+{ |
| 742 |
|
|
+ struct bbr_private *bbr_id = data; |
| 743 |
|
|
+ struct bio *bio; |
| 744 |
|
|
+ unsigned long flags; |
| 745 |
|
|
+ |
| 746 |
|
|
+ spin_lock_irqsave(&bbr_id->remap_ios_lock, flags); |
| 747 |
|
|
+ bio = bio_list_get(&bbr_id->remap_ios); |
| 748 |
|
|
+ spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags); |
| 749 |
|
|
+ |
| 750 |
|
|
+ bbr_io_process_requests(bbr_id, bio); |
| 751 |
|
|
+} |
| 752 |
|
|
+ |
| 753 |
|
|
+/** |
| 754 |
|
|
+ * bbr_endio |
| 755 |
|
|
+ * |
| 756 |
|
|
+ * This is the callback for normal write requests. Check for an error |
| 757 |
|
|
+ * during the I/O, and send to the thread for processing if necessary. |
| 758 |
|
|
+ **/ |
| 759 |
|
|
+static int bbr_endio(struct dm_target *ti, struct bio *bio, |
| 760 |
|
|
+ int error, union map_info *map_context) |
| 761 |
|
|
+{ |
| 762 |
|
|
+ struct bbr_private *bbr_id = ti->private; |
| 763 |
|
|
+ struct dm_bio_details *bbr_io = map_context->ptr; |
| 764 |
|
|
+ |
| 765 |
|
|
+ if (error && bbr_io) { |
| 766 |
|
|
+ unsigned long flags; |
| 767 |
|
|
+ char b[32]; |
| 768 |
|
|
+ |
| 769 |
|
|
+ dm_bio_restore(bbr_io, bio); |
| 770 |
|
|
+ map_context->ptr = NULL; |
| 771 |
|
|
+ |
| 772 |
|
|
+ DMERR("dm-bbr: device %s: I/O failure on sector %lu. " |
| 773 |
|
|
+ "Scheduling for retry.", |
| 774 |
|
|
+ format_dev_t(b, bbr_id->dev->bdev->bd_dev), |
| 775 |
|
|
+ (unsigned long)bio->bi_sector); |
| 776 |
|
|
+ |
| 777 |
|
|
+ spin_lock_irqsave(&bbr_id->remap_ios_lock, flags); |
| 778 |
|
|
+ bio_list_add(&bbr_id->remap_ios, bio); |
| 779 |
|
|
+ spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags); |
| 780 |
|
|
+ |
| 781 |
|
|
+ queue_work(dm_bbr_wq, &bbr_id->remap_work); |
| 782 |
|
|
+ |
| 783 |
|
|
+ error = 1; |
| 784 |
|
|
+ } |
| 785 |
|
|
+ |
| 786 |
|
|
+ if (bbr_io) |
| 787 |
|
|
+ mempool_free(bbr_io, bbr_io_pool); |
| 788 |
|
|
+ |
| 789 |
|
|
+ return error; |
| 790 |
|
|
+} |
| 791 |
|
|
+ |
| 792 |
|
|
+/** |
| 793 |
|
|
+ * Construct a bbr mapping |
| 794 |
|
|
+ **/ |
| 795 |
|
|
+static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv) |
| 796 |
|
|
+{ |
| 797 |
|
|
+ struct bbr_private *bbr_id; |
| 798 |
|
|
+ unsigned long block_size; |
| 799 |
|
|
+ char *end; |
| 800 |
|
|
+ int rc = -EINVAL; |
| 801 |
|
|
+ |
| 802 |
|
|
+ if (argc != 8) { |
| 803 |
|
|
+ ti->error = "dm-bbr requires exactly 8 arguments: " |
| 804 |
|
|
+ "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size"; |
| 805 |
|
|
+ goto out1; |
| 806 |
|
|
+ } |
| 807 |
|
|
+ |
| 808 |
|
|
+ bbr_id = bbr_alloc_private(); |
| 809 |
|
|
+ if (!bbr_id) { |
| 810 |
|
|
+ ti->error = "dm-bbr: Error allocating bbr private data."; |
| 811 |
|
|
+ goto out1; |
| 812 |
|
|
+ } |
| 813 |
|
|
+ |
| 814 |
|
|
+ bbr_id->offset = simple_strtoull(argv[1], &end, 10); |
| 815 |
|
|
+ bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10); |
| 816 |
|
|
+ bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10); |
| 817 |
|
|
+ bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10); |
| 818 |
|
|
+ bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10); |
| 819 |
|
|
+ bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10); |
| 820 |
|
|
+ block_size = simple_strtoul(argv[7], &end, 10); |
| 821 |
|
|
+ bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT); |
| 822 |
|
|
+ |
| 823 |
|
|
+ bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT); |
| 824 |
|
|
+ if (!bbr_id->bbr_table) { |
| 825 |
|
|
+ ti->error = "dm-bbr: Error allocating bbr table."; |
| 826 |
|
|
+ goto out2; |
| 827 |
|
|
+ } |
| 828 |
|
|
+ |
| 829 |
|
|
+ if (dm_get_device(ti, argv[0], 0, ti->len, |
| 830 |
|
|
+ dm_table_get_mode(ti->table), &bbr_id->dev)) { |
| 831 |
|
|
+ ti->error = "dm-bbr: Device lookup failed"; |
| 832 |
|
|
+ goto out2; |
| 833 |
|
|
+ } |
| 834 |
|
|
+ |
| 835 |
|
|
+ rc = bbr_setup(bbr_id); |
| 836 |
|
|
+ if (rc) { |
| 837 |
|
|
+ ti->error = "dm-bbr: Device setup failed"; |
| 838 |
|
|
+ goto out3; |
| 839 |
|
|
+ } |
| 840 |
|
|
+ |
| 841 |
|
|
+ ti->private = bbr_id; |
| 842 |
|
|
+ return 0; |
| 843 |
|
|
+ |
| 844 |
|
|
+out3: |
| 845 |
|
|
+ dm_put_device(ti, bbr_id->dev); |
| 846 |
|
|
+out2: |
| 847 |
|
|
+ bbr_free_private(bbr_id); |
| 848 |
|
|
+out1: |
| 849 |
|
|
+ return rc; |
| 850 |
|
|
+} |
| 851 |
|
|
+ |
| 852 |
|
|
+static void bbr_dtr(struct dm_target *ti) |
| 853 |
|
|
+{ |
| 854 |
|
|
+ struct bbr_private *bbr_id = ti->private; |
| 855 |
|
|
+ |
| 856 |
|
|
+ dm_put_device(ti, bbr_id->dev); |
| 857 |
|
|
+ bbr_free_private(bbr_id); |
| 858 |
|
|
+} |
| 859 |
|
|
+ |
| 860 |
|
|
+static int bbr_map(struct dm_target *ti, struct bio *bio, |
| 861 |
|
|
+ union map_info *map_context) |
| 862 |
|
|
+{ |
| 863 |
|
|
+ struct bbr_private *bbr_id = ti->private; |
| 864 |
|
|
+ struct dm_bio_details *bbr_io; |
| 865 |
|
|
+ unsigned long flags; |
| 866 |
|
|
+ int rc = 1; |
| 867 |
|
|
+ |
| 868 |
|
|
+ bio->bi_sector += bbr_id->offset; |
| 869 |
|
|
+ |
| 870 |
|
|
+ if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 || |
| 871 |
|
|
+ !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) { |
| 872 |
|
|
+ /* No existing remaps or this request doesn't |
| 873 |
|
|
+ * contain any remapped sectors. |
| 874 |
|
|
+ */ |
| 875 |
|
|
+ bio->bi_bdev = bbr_id->dev->bdev; |
| 876 |
|
|
+ |
| 877 |
|
|
+ bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO); |
| 878 |
|
|
+ dm_bio_record(bbr_io, bio); |
| 879 |
|
|
+ map_context->ptr = bbr_io; |
| 880 |
|
|
+ } else { |
| 881 |
|
|
+ /* This request has at least one remapped sector. |
| 882 |
|
|
+ * Give it to the work-queue for processing. |
| 883 |
|
|
+ */ |
| 884 |
|
|
+ map_context->ptr = NULL; |
| 885 |
|
|
+ spin_lock_irqsave(&bbr_id->remap_ios_lock, flags); |
| 886 |
|
|
+ bio_list_add(&bbr_id->remap_ios, bio); |
| 887 |
|
|
+ spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags); |
| 888 |
|
|
+ |
| 889 |
|
|
+ queue_work(dm_bbr_wq, &bbr_id->remap_work); |
| 890 |
|
|
+ rc = 0; |
| 891 |
|
|
+ } |
| 892 |
|
|
+ |
| 893 |
|
|
+ return rc; |
| 894 |
|
|
+} |
| 895 |
|
|
+ |
| 896 |
|
|
+static int bbr_status(struct dm_target *ti, status_type_t type, |
| 897 |
|
|
+ char *result, unsigned int maxlen) |
| 898 |
|
|
+{ |
| 899 |
|
|
+ struct bbr_private *bbr_id = ti->private; |
| 900 |
|
|
+ char b[BDEVNAME_SIZE]; |
| 901 |
|
|
+ |
| 902 |
|
|
+ switch (type) { |
| 903 |
|
|
+ case STATUSTYPE_INFO: |
| 904 |
|
|
+ result[0] = '\0'; |
| 905 |
|
|
+ break; |
| 906 |
|
|
+ |
| 907 |
|
|
+ case STATUSTYPE_TABLE: |
| 908 |
|
|
+ snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u", |
| 909 |
|
|
+ format_dev_t(b, bbr_id->dev->bdev->bd_dev), |
| 910 |
|
|
+ bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2, |
| 911 |
|
|
+ bbr_id->nr_sects_bbr_table, |
| 912 |
|
|
+ bbr_id->start_replacement_sect, |
| 913 |
|
|
+ bbr_id->nr_replacement_blks, |
| 914 |
|
|
+ bbr_id->blksize_in_sects << SECTOR_SHIFT); |
| 915 |
|
|
+ break; |
| 916 |
|
|
+ } |
| 917 |
|
|
+ return 0; |
| 918 |
|
|
+} |
| 919 |
|
|
+ |
| 920 |
|
|
+static struct target_type bbr_target = { |
| 921 |
|
|
+ .name = "bbr", |
| 922 |
|
|
+ .version= {1, 0, 1}, |
| 923 |
|
|
+ .module = THIS_MODULE, |
| 924 |
|
|
+ .ctr = bbr_ctr, |
| 925 |
|
|
+ .dtr = bbr_dtr, |
| 926 |
|
|
+ .map = bbr_map, |
| 927 |
|
|
+ .end_io = bbr_endio, |
| 928 |
|
|
+ .status = bbr_status, |
| 929 |
|
|
+}; |
| 930 |
|
|
+ |
| 931 |
|
|
+int __init dm_bbr_init(void) |
| 932 |
|
|
+{ |
| 933 |
|
|
+ int rc; |
| 934 |
|
|
+ |
| 935 |
|
|
+ rc = dm_register_target(&bbr_target); |
| 936 |
|
|
+ if (rc) { |
| 937 |
|
|
+ DMERR("dm-bbr: error registering target."); |
| 938 |
|
|
+ goto err1; |
| 939 |
|
|
+ } |
| 940 |
|
|
+ |
| 941 |
|
|
+ bbr_remap_cache = kmem_cache_create("bbr-remap", |
| 942 |
|
|
+ sizeof(struct bbr_runtime_remap), |
| 943 |
|
|
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL); |
| 944 |
|
|
+ if (!bbr_remap_cache) { |
| 945 |
|
|
+ DMERR("dm-bbr: error creating remap cache."); |
| 946 |
|
|
+ rc = ENOMEM; |
| 947 |
|
|
+ goto err2; |
| 948 |
|
|
+ } |
| 949 |
|
|
+ |
| 950 |
|
|
+ bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details), |
| 951 |
|
|
+ 0, SLAB_HWCACHE_ALIGN, NULL, NULL); |
| 952 |
|
|
+ if (!bbr_io_cache) { |
| 953 |
|
|
+ DMERR("dm-bbr: error creating io cache."); |
| 954 |
|
|
+ rc = ENOMEM; |
| 955 |
|
|
+ goto err3; |
| 956 |
|
|
+ } |
| 957 |
|
|
+ |
| 958 |
|
|
+ bbr_io_pool = mempool_create(256, mempool_alloc_slab, |
| 959 |
|
|
+ mempool_free_slab, bbr_io_cache); |
| 960 |
|
|
+ if (!bbr_io_pool) { |
| 961 |
|
|
+ DMERR("dm-bbr: error creating io mempool."); |
| 962 |
|
|
+ rc = ENOMEM; |
| 963 |
|
|
+ goto err4; |
| 964 |
|
|
+ } |
| 965 |
|
|
+ |
| 966 |
|
|
+ dm_bbr_wq = create_workqueue("dm-bbr"); |
| 967 |
|
|
+ if (!dm_bbr_wq) { |
| 968 |
|
|
+ DMERR("dm-bbr: error creating work-queue."); |
| 969 |
|
|
+ rc = ENOMEM; |
| 970 |
|
|
+ goto err5; |
| 971 |
|
|
+ } |
| 972 |
|
|
+ |
| 973 |
|
|
+ rc = dm_io_get(1); |
| 974 |
|
|
+ if (rc) { |
| 975 |
|
|
+ DMERR("dm-bbr: error initializing I/O service."); |
| 976 |
|
|
+ goto err6; |
| 977 |
|
|
+ } |
| 978 |
|
|
+ |
| 979 |
|
|
+ return 0; |
| 980 |
|
|
+ |
| 981 |
|
|
+err6: |
| 982 |
|
|
+ destroy_workqueue(dm_bbr_wq); |
| 983 |
|
|
+err5: |
| 984 |
|
|
+ mempool_destroy(bbr_io_pool); |
| 985 |
|
|
+err4: |
| 986 |
|
|
+ kmem_cache_destroy(bbr_io_cache); |
| 987 |
|
|
+err3: |
| 988 |
|
|
+ kmem_cache_destroy(bbr_remap_cache); |
| 989 |
|
|
+err2: |
| 990 |
|
|
+ dm_unregister_target(&bbr_target); |
| 991 |
|
|
+err1: |
| 992 |
|
|
+ return rc; |
| 993 |
|
|
+} |
| 994 |
|
|
+ |
| 995 |
|
|
+void __exit dm_bbr_exit(void) |
| 996 |
|
|
+{ |
| 997 |
|
|
+ dm_io_put(1); |
| 998 |
|
|
+ destroy_workqueue(dm_bbr_wq); |
| 999 |
|
|
+ mempool_destroy(bbr_io_pool); |
| 1000 |
|
|
+ kmem_cache_destroy(bbr_io_cache); |
| 1001 |
|
|
+ kmem_cache_destroy(bbr_remap_cache); |
| 1002 |
|
|
+ dm_unregister_target(&bbr_target); |
| 1003 |
|
|
+} |
| 1004 |
|
|
+ |
| 1005 |
|
|
+module_init(dm_bbr_init); |
| 1006 |
|
|
+module_exit(dm_bbr_exit); |
| 1007 |
|
|
+MODULE_LICENSE("GPL"); |
| 1008 |
|
|
diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h linux-dsd/drivers/md/dm-bbr.h |
| 1009 |
|
|
--- linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h 1970-01-01 01:00:00.000000000 +0100 |
| 1010 |
|
|
+++ linux-dsd/drivers/md/dm-bbr.h 2005-04-06 10:06:16.000000000 +0100 |
| 1011 |
|
|
@@ -0,0 +1,125 @@ |
| 1012 |
|
|
+/* |
| 1013 |
|
|
+ * (C) Copyright IBM Corp. 2002, 2004 |
| 1014 |
|
|
+ * |
| 1015 |
|
|
+ * This program is free software; you can redistribute it and/or modify |
| 1016 |
|
|
+ * it under the terms of the GNU General Public License as published by |
| 1017 |
|
|
+ * the Free Software Foundation; either version 2 of the License, or |
| 1018 |
|
|
+ * (at your option) any later version. |
| 1019 |
|
|
+ * |
| 1020 |
|
|
+ * This program is distributed in the hope that it will be useful, |
| 1021 |
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 1022 |
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See |
| 1023 |
|
|
+ * the GNU General Public License for more details. |
| 1024 |
|
|
+ * |
| 1025 |
|
|
+ * You should have received a copy of the GNU General Public License |
| 1026 |
|
|
+ * along with this program; if not, write to the Free Software |
| 1027 |
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 1028 |
|
|
+ * |
| 1029 |
|
|
+ * linux/drivers/md/dm-bbr.h |
| 1030 |
|
|
+ * |
| 1031 |
|
|
+ * Bad-block-relocation (BBR) target for device-mapper. |
| 1032 |
|
|
+ * |
| 1033 |
|
|
+ * The BBR target is designed to remap I/O write failures to another safe |
| 1034 |
|
|
+ * location on disk. Note that most disk drives have BBR built into them, |
| 1035 |
|
|
+ * this means that our software BBR will be only activated when all hardware |
| 1036 |
|
|
+ * BBR replacement sectors have been used. |
| 1037 |
|
|
+ */ |
| 1038 |
|
|
+ |
| 1039 |
|
|
+#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */ |
| 1040 |
|
|
+#define BBR_ENTRIES_PER_SECT 31 |
| 1041 |
|
|
+#define INITIAL_CRC 0xFFFFFFFF |
| 1042 |
|
|
+#define CRC_POLYNOMIAL 0xEDB88320L |
| 1043 |
|
|
+ |
| 1044 |
|
|
+/** |
| 1045 |
|
|
+ * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines. |
| 1046 |
|
|
+ * Use these in place of %Ld, %Lu, and %Lx. |
| 1047 |
|
|
+ **/ |
| 1048 |
|
|
+#if BITS_PER_LONG > 32 |
| 1049 |
|
|
+#define PFU64 "%lu" |
| 1050 |
|
|
+#else |
| 1051 |
|
|
+#define PFU64 "%Lu" |
| 1052 |
|
|
+#endif |
| 1053 |
|
|
+ |
| 1054 |
|
|
+/** |
| 1055 |
|
|
+ * struct bbr_table_entry |
| 1056 |
|
|
+ * @bad_sect: LBA of bad location. |
| 1057 |
|
|
+ * @replacement_sect: LBA of new location. |
| 1058 |
|
|
+ * |
| 1059 |
|
|
+ * Structure to describe one BBR remap. |
| 1060 |
|
|
+ **/ |
| 1061 |
|
|
+struct bbr_table_entry { |
| 1062 |
|
|
+ u64 bad_sect; |
| 1063 |
|
|
+ u64 replacement_sect; |
| 1064 |
|
|
+}; |
| 1065 |
|
|
+ |
| 1066 |
|
|
+/** |
| 1067 |
|
|
+ * struct bbr_table |
| 1068 |
|
|
+ * @signature: Signature on each BBR table sector. |
| 1069 |
|
|
+ * @crc: CRC for this table sector. |
| 1070 |
|
|
+ * @sequence_number: Used to resolve conflicts when primary and secondary |
| 1071 |
|
|
+ * tables do not match. |
| 1072 |
|
|
+ * @in_use_cnt: Number of in-use table entries. |
| 1073 |
|
|
+ * @entries: Actual table of remaps. |
| 1074 |
|
|
+ * |
| 1075 |
|
|
+ * Structure to describe each sector of the metadata table. Each sector in this |
| 1076 |
|
|
+ * table can describe 31 remapped sectors. |
| 1077 |
|
|
+ **/ |
| 1078 |
|
|
+struct bbr_table { |
| 1079 |
|
|
+ u32 signature; |
| 1080 |
|
|
+ u32 crc; |
| 1081 |
|
|
+ u32 sequence_number; |
| 1082 |
|
|
+ u32 in_use_cnt; |
| 1083 |
|
|
+ struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT]; |
| 1084 |
|
|
+}; |
| 1085 |
|
|
+ |
| 1086 |
|
|
+/** |
| 1087 |
|
|
+ * struct bbr_runtime_remap |
| 1088 |
|
|
+ * |
| 1089 |
|
|
+ * Node in the binary tree used to keep track of remaps. |
| 1090 |
|
|
+ **/ |
| 1091 |
|
|
+struct bbr_runtime_remap { |
| 1092 |
|
|
+ struct bbr_table_entry remap; |
| 1093 |
|
|
+ struct bbr_runtime_remap *left; |
| 1094 |
|
|
+ struct bbr_runtime_remap *right; |
| 1095 |
|
|
+}; |
| 1096 |
|
|
+ |
| 1097 |
|
|
+/** |
| 1098 |
|
|
+ * struct bbr_private |
| 1099 |
|
|
+ * @dev: Info about underlying device. |
| 1100 |
|
|
+ * @bbr_table: Copy of metadata table. |
| 1101 |
|
|
+ * @remap_root: Binary tree containing all remaps. |
| 1102 |
|
|
+ * @remap_root_lock: Lock for the binary tree. |
| 1103 |
|
|
+ * @remap_work: For adding work items to the work-queue. |
| 1104 |
|
|
+ * @remap_ios: List of I/Os for the work-queue to handle. |
| 1105 |
|
|
+ * @remap_ios_lock: Lock for the remap_ios list. |
| 1106 |
|
|
+ * @offset: LBA of data area. |
| 1107 |
|
|
+ * @lba_table1: LBA of primary BBR table. |
| 1108 |
|
|
+ * @lba_table2: LBA of secondary BBR table. |
| 1109 |
|
|
+ * @nr_sects_bbr_table: Size of each BBR table. |
| 1110 |
|
|
+ * @nr_replacement_blks: Number of replacement blocks. |
| 1111 |
|
|
+ * @start_replacement_sect: LBA of start of replacement blocks. |
| 1112 |
|
|
+ * @blksize_in_sects: Size of each block. |
| 1113 |
|
|
+ * @in_use_replacement_blks: Current number of remapped blocks. |
| 1114 |
|
|
+ * |
| 1115 |
|
|
+ * Private data for each BBR target. |
| 1116 |
|
|
+ **/ |
| 1117 |
|
|
+struct bbr_private { |
| 1118 |
|
|
+ struct dm_dev *dev; |
| 1119 |
|
|
+ struct bbr_table *bbr_table; |
| 1120 |
|
|
+ struct bbr_runtime_remap *remap_root; |
| 1121 |
|
|
+ spinlock_t remap_root_lock; |
| 1122 |
|
|
+ |
| 1123 |
|
|
+ struct work_struct remap_work; |
| 1124 |
|
|
+ struct bio_list remap_ios; |
| 1125 |
|
|
+ spinlock_t remap_ios_lock; |
| 1126 |
|
|
+ |
| 1127 |
|
|
+ u64 offset; |
| 1128 |
|
|
+ u64 lba_table1; |
| 1129 |
|
|
+ u64 lba_table2; |
| 1130 |
|
|
+ u64 nr_sects_bbr_table; |
| 1131 |
|
|
+ u64 start_replacement_sect; |
| 1132 |
|
|
+ u64 nr_replacement_blks; |
| 1133 |
|
|
+ u32 blksize_in_sects; |
| 1134 |
|
|
+ atomic_t in_use_replacement_blks; |
| 1135 |
|
|
+}; |
| 1136 |
|
|
+ |
| 1137 |
|
|
diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/Kconfig linux-dsd/drivers/md/Kconfig |
| 1138 |
|
|
--- linux-2.6.12-rc2-gentoo/drivers/md/Kconfig 2005-04-06 09:46:58.000000000 +0100 |
| 1139 |
|
|
+++ linux-dsd/drivers/md/Kconfig 2005-04-06 10:07:02.000000000 +0100 |
| 1140 |
|
|
@@ -236,5 +236,16 @@ config DM_MULTIPATH_EMC |
| 1141 |
|
|
---help--- |
| 1142 |
|
|
Multipath support for EMC CX/AX series hardware. |
| 1143 |
|
|
|
| 1144 |
|
|
+config BLK_DEV_DM_BBR |
| 1145 |
|
|
+ tristate "Bad Block Relocation Device Target (EXPERIMENTAL)" |
| 1146 |
|
|
+ depends on BLK_DEV_DM && EXPERIMENTAL |
| 1147 |
|
|
+ ---help--- |
| 1148 |
|
|
+ Support for devices with software-based bad-block-relocation. |
| 1149 |
|
|
+ |
| 1150 |
|
|
+ To compile this as a module, choose M here: the module will be |
| 1151 |
|
|
+ called dm-bbr. |
| 1152 |
|
|
+ |
| 1153 |
|
|
+ If unsure, say N. |
| 1154 |
|
|
+ |
| 1155 |
|
|
endmenu |
| 1156 |
|
|
|
| 1157 |
|
|
diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/Makefile linux-dsd/drivers/md/Makefile |
| 1158 |
|
|
--- linux-2.6.12-rc2-gentoo/drivers/md/Makefile 2005-04-06 09:46:58.000000000 +0100 |
| 1159 |
|
|
+++ linux-dsd/drivers/md/Makefile 2005-04-06 10:06:16.000000000 +0100 |
| 1160 |
|
|
@@ -36,6 +36,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc |
| 1161 |
|
|
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o |
| 1162 |
|
|
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o |
| 1163 |
|
|
obj-$(CONFIG_DM_ZERO) += dm-zero.o |
| 1164 |
|
|
+obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o |
| 1165 |
|
|
|
| 1166 |
|
|
quiet_cmd_unroll = UNROLL $@ |
| 1167 |
|
|
cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ |