/[linux-patches]/genpatches-2.6/tags/2.6.13-4/4305_dm-bbr.patch
Gentoo

Contents of /genpatches-2.6/tags/2.6.13-4/4305_dm-bbr.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 168 - (show annotations) (download)
Sun Sep 18 11:19:29 2005 UTC (8 years, 11 months ago) by dsd
File size: 32467 byte(s)
2.6.13-4 release
1 diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c linux-dsd/drivers/md/dm-bbr.c
2 --- linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c 1970-01-01 01:00:00.000000000 +0100
3 +++ linux-dsd/drivers/md/dm-bbr.c 2005-04-06 10:06:16.000000000 +0100
4 @@ -0,0 +1,1003 @@
5 +/*
6 + * (C) Copyright IBM Corp. 2002, 2004
7 + *
8 + * This program is free software; you can redistribute it and/or modify
9 + * it under the terms of the GNU General Public License as published by
10 + * the Free Software Foundation; either version 2 of the License, or
11 + * (at your option) any later version.
12 + *
13 + * This program is distributed in the hope that it will be useful,
14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
16 + * the GNU General Public License for more details.
17 + *
18 + * You should have received a copy of the GNU General Public License
19 + * along with this program; if not, write to the Free Software
20 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 + *
22 + * linux/drivers/md/dm-bbr.c
23 + *
24 + * Bad-block-relocation (BBR) target for device-mapper.
25 + *
26 + * The BBR target is designed to remap I/O write failures to another safe
27 + * location on disk. Note that most disk drives have BBR built into them,
28 + * this means that our software BBR will be only activated when all hardware
29 + * BBR replacement sectors have been used.
30 + */
31 +
32 +#include <linux/module.h>
33 +#include <linux/init.h>
34 +#include <linux/bio.h>
35 +#include <linux/spinlock.h>
36 +#include <linux/slab.h>
37 +#include <linux/mempool.h>
38 +#include <linux/workqueue.h>
39 +#include <linux/vmalloc.h>
40 +
41 +#include "dm.h"
42 +#include "dm-bio-list.h"
43 +#include "dm-bio-record.h"
44 +#include "dm-bbr.h"
45 +#include "dm-io.h"
46 +
47 +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
48 +
49 +static struct workqueue_struct *dm_bbr_wq = NULL;
50 +static void bbr_remap_handler(void *data);
51 +static kmem_cache_t *bbr_remap_cache;
52 +static kmem_cache_t *bbr_io_cache;
53 +static mempool_t *bbr_io_pool;
54 +
55 +/**
56 + * bbr_binary_tree_destroy
57 + *
58 + * Destroy the binary tree.
59 + **/
60 +static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
61 +{
62 + struct bbr_runtime_remap **link = NULL;
63 + struct bbr_runtime_remap *node = root;
64 +
65 + while (node) {
66 + if (node->left) {
67 + link = &(node->left);
68 + node = node->left;
69 + continue;
70 + }
71 + if (node->right) {
72 + link = &(node->right);
73 + node = node->right;
74 + continue;
75 + }
76 +
77 + kmem_cache_free(bbr_remap_cache, node);
78 + if (node == root) {
79 + /* If root is deleted, we're done. */
80 + break;
81 + }
82 +
83 + /* Back to root. */
84 + node = root;
85 + *link = NULL;
86 + }
87 +}
88 +
89 +static void bbr_free_remap(struct bbr_private *bbr_id)
90 +{
91 + spin_lock_irq(&bbr_id->remap_root_lock);
92 + bbr_binary_tree_destroy(bbr_id->remap_root);
93 + bbr_id->remap_root = NULL;
94 + spin_unlock_irq(&bbr_id->remap_root_lock);
95 +}
96 +
97 +static struct bbr_private *bbr_alloc_private(void)
98 +{
99 + struct bbr_private *bbr_id;
100 +
101 + bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
102 + if (bbr_id) {
103 + memset(bbr_id, 0, sizeof(*bbr_id));
104 + INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
105 + bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
106 + bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;
107 + bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
108 + }
109 +
110 + return bbr_id;
111 +}
112 +
113 +static void bbr_free_private(struct bbr_private *bbr_id)
114 +{
115 + if (bbr_id->bbr_table) {
116 + vfree(bbr_id->bbr_table);
117 + }
118 + bbr_free_remap(bbr_id);
119 + kfree(bbr_id);
120 +}
121 +
122 +static u32 crc_table[256];
123 +static u32 crc_table_built = 0;
124 +
125 +static void build_crc_table(void)
126 +{
127 + u32 i, j, crc;
128 +
129 + for (i = 0; i <= 255; i++) {
130 + crc = i;
131 + for (j = 8; j > 0; j--) {
132 + if (crc & 1)
133 + crc = (crc >> 1) ^ CRC_POLYNOMIAL;
134 + else
135 + crc >>= 1;
136 + }
137 + crc_table[i] = crc;
138 + }
139 + crc_table_built = 1;
140 +}
141 +
142 +static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
143 +{
144 + unsigned char *current_byte;
145 + u32 temp1, temp2, i;
146 +
147 + current_byte = (unsigned char *) buffer;
148 + /* Make sure the crc table is available */
149 + if (!crc_table_built)
150 + build_crc_table();
151 + /* Process each byte in the buffer. */
152 + for (i = 0; i < buffersize; i++) {
153 + temp1 = (crc >> 8) & 0x00FFFFFF;
154 + temp2 = crc_table[(crc ^ (u32) * current_byte) &
155 + (u32) 0xff];
156 + current_byte++;
157 + crc = temp1 ^ temp2;
158 + }
159 + return crc;
160 +}
161 +
162 +/**
163 + * le_bbr_table_sector_to_cpu
164 + *
165 + * Convert bbr meta data from on-disk (LE) format
166 + * to the native cpu endian format.
167 + **/
168 +static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
169 +{
170 + int i;
171 + p->signature = le32_to_cpup(&p->signature);
172 + p->crc = le32_to_cpup(&p->crc);
173 + p->sequence_number = le32_to_cpup(&p->sequence_number);
174 + p->in_use_cnt = le32_to_cpup(&p->in_use_cnt);
175 + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
176 + p->entries[i].bad_sect =
177 + le64_to_cpup(&p->entries[i].bad_sect);
178 + p->entries[i].replacement_sect =
179 + le64_to_cpup(&p->entries[i].replacement_sect);
180 + }
181 +}
182 +
183 +/**
184 + * cpu_bbr_table_sector_to_le
185 + *
186 + * Convert bbr meta data from cpu endian format to on-disk (LE) format
187 + **/
188 +static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
189 + struct bbr_table *le)
190 +{
191 + int i;
192 + le->signature = cpu_to_le32p(&p->signature);
193 + le->crc = cpu_to_le32p(&p->crc);
194 + le->sequence_number = cpu_to_le32p(&p->sequence_number);
195 + le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt);
196 + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
197 + le->entries[i].bad_sect =
198 + cpu_to_le64p(&p->entries[i].bad_sect);
199 + le->entries[i].replacement_sect =
200 + cpu_to_le64p(&p->entries[i].replacement_sect);
201 + }
202 +}
203 +
204 +/**
205 + * validate_bbr_table_sector
206 + *
207 + * Check the specified BBR table sector for a valid signature and CRC. If it's
208 + * valid, endian-convert the table sector.
209 + **/
210 +static int validate_bbr_table_sector(struct bbr_table *p)
211 +{
212 + int rc = 0;
213 + int org_crc, final_crc;
214 +
215 + if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
216 + DMERR("dm-bbr: BBR table signature doesn't match!");
217 + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
218 + le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
219 + rc = -EINVAL;
220 + goto out;
221 + }
222 +
223 + if (!p->crc) {
224 + DMERR("dm-bbr: BBR table sector has no CRC!");
225 + rc = -EINVAL;
226 + goto out;
227 + }
228 +
229 + org_crc = le32_to_cpup(&p->crc);
230 + p->crc = 0;
231 + final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
232 + if (final_crc != org_crc) {
233 + DMERR("dm-bbr: CRC failed!");
234 + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
235 + org_crc, final_crc);
236 + rc = -EINVAL;
237 + goto out;
238 + }
239 +
240 + p->crc = cpu_to_le32p(&org_crc);
241 + le_bbr_table_sector_to_cpu(p);
242 +
243 +out:
244 + return rc;
245 +}
246 +
247 +/**
248 + * bbr_binary_tree_insert
249 + *
250 + * Insert a node into the binary tree.
251 + **/
252 +static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
253 + struct bbr_runtime_remap *newnode)
254 +{
255 + struct bbr_runtime_remap **node = root;
256 + while (node && *node) {
257 + if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
258 + node = &((*node)->right);
259 + } else {
260 + node = &((*node)->left);
261 + }
262 + }
263 +
264 + newnode->left = newnode->right = NULL;
265 + *node = newnode;
266 +}
267 +
268 +/**
269 + * bbr_binary_search
270 + *
271 + * Search for a node that contains bad_sect == lsn.
272 + **/
273 +static struct bbr_runtime_remap *bbr_binary_search(
274 + struct bbr_runtime_remap *root,
275 + u64 lsn)
276 +{
277 + struct bbr_runtime_remap *node = root;
278 + while (node) {
279 + if (node->remap.bad_sect == lsn) {
280 + break;
281 + }
282 + if (lsn > node->remap.bad_sect) {
283 + node = node->right;
284 + } else {
285 + node = node->left;
286 + }
287 + }
288 + return node;
289 +}
290 +
291 +/**
292 + * bbr_insert_remap_entry
293 + *
294 + * Create a new remap entry and add it to the binary tree for this node.
295 + **/
296 +static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
297 + struct bbr_table_entry *new_bbr_entry)
298 +{
299 + struct bbr_runtime_remap *newnode;
300 +
301 + newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
302 + if (!newnode) {
303 + DMERR("dm-bbr: Could not allocate from remap cache!");
304 + return -ENOMEM;
305 + }
306 + newnode->remap.bad_sect = new_bbr_entry->bad_sect;
307 + newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
308 + spin_lock_irq(&bbr_id->remap_root_lock);
309 + bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
310 + spin_unlock_irq(&bbr_id->remap_root_lock);
311 + return 0;
312 +}
313 +
314 +/**
315 + * bbr_table_to_remap_list
316 + *
317 + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
318 + * improve run time performance, the in memory remap list must be sorted by
319 + * the bad sector LBA. This function is called at discovery time to initialize
320 + * the remap list. This function assumes that at least one copy of meta data
321 + * is valid.
322 + **/
323 +static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
324 +{
325 + u32 in_use_blks = 0;
326 + int i, j;
327 + struct bbr_table *p;
328 +
329 + for (i = 0, p = bbr_id->bbr_table;
330 + i < bbr_id->nr_sects_bbr_table;
331 + i++, p++) {
332 + if (!p->in_use_cnt) {
333 + break;
334 + }
335 + in_use_blks += p->in_use_cnt;
336 + for (j = 0; j < p->in_use_cnt; j++) {
337 + bbr_insert_remap_entry(bbr_id, &p->entries[j]);
338 + }
339 + }
340 + if (in_use_blks) {
341 + char b[32];
342 + DMWARN("dm-bbr: There are %u BBR entries for device %s",
343 + in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
344 + }
345 +
346 + return in_use_blks;
347 +}
348 +
349 +/**
350 + * bbr_search_remap_entry
351 + *
352 + * Search remap entry for the specified sector. If found, return a pointer to
353 + * the table entry. Otherwise, return NULL.
354 + **/
355 +static struct bbr_table_entry *bbr_search_remap_entry(
356 + struct bbr_private *bbr_id,
357 + u64 lsn)
358 +{
359 + struct bbr_runtime_remap *p;
360 +
361 + spin_lock_irq(&bbr_id->remap_root_lock);
362 + p = bbr_binary_search(bbr_id->remap_root, lsn);
363 + spin_unlock_irq(&bbr_id->remap_root_lock);
364 + if (p) {
365 + return (&p->remap);
366 + } else {
367 + return NULL;
368 + }
369 +}
370 +
371 +/**
372 + * bbr_remap
373 + *
374 + * If *lsn is in the remap table, return TRUE and modify *lsn,
375 + * else, return FALSE.
376 + **/
377 +static inline int bbr_remap(struct bbr_private *bbr_id,
378 + u64 *lsn)
379 +{
380 + struct bbr_table_entry *e;
381 +
382 + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
383 + e = bbr_search_remap_entry(bbr_id, *lsn);
384 + if (e) {
385 + *lsn = e->replacement_sect;
386 + return 1;
387 + }
388 + }
389 + return 0;
390 +}
391 +
392 +/**
393 + * bbr_remap_probe
394 + *
395 + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
396 + * table return TRUE, Else, return FALSE.
397 + **/
398 +static inline int bbr_remap_probe(struct bbr_private *bbr_id,
399 + u64 lsn, u64 nr_sects)
400 +{
401 + u64 tmp, cnt;
402 +
403 + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
404 + for (cnt = 0, tmp = lsn;
405 + cnt < nr_sects;
406 + cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
407 + if (bbr_remap(bbr_id,&tmp)) {
408 + return 1;
409 + }
410 + }
411 + }
412 + return 0;
413 +}
414 +
415 +/**
416 + * bbr_setup
417 + *
418 + * Read the remap tables from disk and set up the initial remap tree.
419 + **/
420 +static int bbr_setup(struct bbr_private *bbr_id)
421 +{
422 + struct bbr_table *table = bbr_id->bbr_table;
423 + struct io_region job;
424 + unsigned long error;
425 + int i, rc = 0;
426 +
427 + job.bdev = bbr_id->dev->bdev;
428 + job.count = 1;
429 +
430 + /* Read and verify each BBR table sector individually. */
431 + for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
432 + job.sector = bbr_id->lba_table1 + i;
433 + rc = dm_io_sync_vm(1, &job, READ, table, &error);
434 + if (rc && bbr_id->lba_table2) {
435 + job.sector = bbr_id->lba_table2 + i;
436 + rc = dm_io_sync_vm(1, &job, READ, table, &error);
437 + }
438 + if (rc) {
439 + goto out;
440 + }
441 +
442 + rc = validate_bbr_table_sector(table);
443 + if (rc) {
444 + goto out;
445 + }
446 + }
447 + atomic_set(&bbr_id->in_use_replacement_blks,
448 + bbr_table_to_remap_list(bbr_id));
449 +
450 +out:
451 + if (rc) {
452 + DMERR("dm-bbr: error during device setup: %d", rc);
453 + }
454 + return rc;
455 +}
456 +
457 +/**
458 + * bbr_io_remap_error
459 + * @bbr_id: Private data for the BBR node.
460 + * @rw: READ or WRITE.
461 + * @starting_lsn: Starting sector of request to remap.
462 + * @count: Number of sectors in the request.
463 + * @page: Page containing the data for the request.
464 + * @offset: Byte-offset of the data within the page.
465 + *
466 + * For the requested range, try to write each sector individually. For each
467 + * sector that fails, find the next available remap location and write the
468 + * data to that new location. Then update the table and write both copies
469 + * of the table to disk. Finally, update the in-memory mapping and do any
470 + * other necessary bookkeeping.
471 + **/
472 +static int bbr_io_remap_error(struct bbr_private *bbr_id,
473 + int rw,
474 + u64 starting_lsn,
475 + u64 count,
476 + struct page *page,
477 + unsigned int offset)
478 +{
479 + struct bbr_table *bbr_table;
480 + struct io_region job;
481 + struct page_list pl;
482 + unsigned long table_sector_index;
483 + unsigned long table_sector_offset;
484 + unsigned long index;
485 + unsigned long error;
486 + u64 lsn, new_lsn;
487 + char b[32];
488 + int rc;
489 +
490 + job.bdev = bbr_id->dev->bdev;
491 + job.count = 1;
492 + pl.page = page;
493 + pl.next = NULL;
494 +
495 + /* For each sector in the request. */
496 + for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
497 + job.sector = starting_lsn + lsn;
498 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
499 + while (rc) {
500 + /* Find the next available relocation sector. */
501 + new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
502 + if (new_lsn >= bbr_id->nr_replacement_blks) {
503 + /* No more replacement sectors available. */
504 + return -EIO;
505 + }
506 + new_lsn += bbr_id->start_replacement_sect;
507 +
508 + /* Write the data to its new location. */
509 + DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
510 + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
511 + starting_lsn + lsn, new_lsn);
512 + job.sector = new_lsn;
513 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
514 + if (rc) {
515 + /* This replacement sector is bad.
516 + * Try the next one.
517 + */
518 + DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.",
519 + format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
520 + atomic_inc(&bbr_id->in_use_replacement_blks);
521 + continue;
522 + }
523 +
524 + /* Add this new entry to the on-disk table. */
525 + table_sector_index = new_lsn -
526 + bbr_id->start_replacement_sect;
527 + table_sector_offset = table_sector_index /
528 + BBR_ENTRIES_PER_SECT;
529 + index = table_sector_index % BBR_ENTRIES_PER_SECT;
530 +
531 + bbr_table = &bbr_id->bbr_table[table_sector_offset];
532 + bbr_table->entries[index].bad_sect = starting_lsn + lsn;
533 + bbr_table->entries[index].replacement_sect = new_lsn;
534 + bbr_table->in_use_cnt++;
535 + bbr_table->sequence_number++;
536 + bbr_table->crc = 0;
537 + bbr_table->crc = calculate_crc(INITIAL_CRC,
538 + bbr_table,
539 + sizeof(struct bbr_table));
540 +
541 + /* Write the table to disk. */
542 + cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
543 + if (bbr_id->lba_table1) {
544 + job.sector = bbr_id->lba_table1 + table_sector_offset;
545 + rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
546 + }
547 + if (bbr_id->lba_table2) {
548 + job.sector = bbr_id->lba_table2 + table_sector_offset;
549 + rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
550 + }
551 + le_bbr_table_sector_to_cpu(bbr_table);
552 +
553 + if (rc) {
554 + /* Error writing one of the tables to disk. */
555 + DMERR("dm-bbr: device %s: error updating BBR tables on disk.",
556 + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
557 + return rc;
558 + }
559 +
560 + /* Insert a new entry in the remapping binary-tree. */
561 + rc = bbr_insert_remap_entry(bbr_id,
562 + &bbr_table->entries[index]);
563 + if (rc) {
564 + DMERR("dm-bbr: device %s: error adding new entry to remap tree.",
565 + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
566 + return rc;
567 + }
568 +
569 + atomic_inc(&bbr_id->in_use_replacement_blks);
570 + }
571 + }
572 +
573 + return 0;
574 +}
575 +
576 +/**
577 + * bbr_io_process_request
578 + *
579 + * For each sector in this request, check if the sector has already
580 + * been remapped. If so, process all previous sectors in the request,
581 + * followed by the remapped sector. Then reset the starting lsn and
582 + * count, and keep going with the rest of the request as if it were
583 + * a whole new request. If any of the sync_io's return an error,
584 + * call the remapper to relocate the bad sector(s).
585 + *
586 + * 2.5 Note: When switching over to bio's for the I/O path, we have made
587 + * the assumption that the I/O request described by the bio is one
588 + * virtually contiguous piece of memory (even though the bio vector
589 + * describes it using a series of physical page addresses).
590 + **/
591 +static int bbr_io_process_request(struct bbr_private *bbr_id,
592 + struct bio *bio)
593 +{
594 + struct io_region job;
595 + u64 starting_lsn = bio->bi_sector;
596 + u64 count, lsn, remapped_lsn;
597 + struct page_list pl;
598 + unsigned int offset;
599 + unsigned long error;
600 + int i, rw = bio_data_dir(bio);
601 + int rc = 0;
602 +
603 + job.bdev = bbr_id->dev->bdev;
604 + pl.next = NULL;
605 +
606 + /* Each bio can contain multiple vectors, each with a different page.
607 + * Treat each vector as a separate request.
608 + */
609 + /* KMC: Is this the right way to walk the bvec list? */
610 + for (i = 0;
611 + i < bio->bi_vcnt;
612 + i++, bio->bi_idx++, starting_lsn += count) {
613 +
614 + /* Bvec info: number of sectors, page,
615 + * and byte-offset within page.
616 + */
617 + count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
618 + pl.page = bio_iovec(bio)->bv_page;
619 + offset = bio_iovec(bio)->bv_offset;
620 +
621 + /* For each sector in this bvec, check if the sector has
622 + * already been remapped. If so, process all previous sectors
623 + * in this request, followed by the remapped sector. Then reset
624 + * the starting lsn and count and keep going with the rest of
625 + * the request as if it were a whole new request.
626 + */
627 + for (lsn = 0; lsn < count; lsn++) {
628 + remapped_lsn = starting_lsn + lsn;
629 + rc = bbr_remap(bbr_id, &remapped_lsn);
630 + if (!rc) {
631 + /* This sector is fine. */
632 + continue;
633 + }
634 +
635 + /* Process all sectors in the request up to this one. */
636 + if (lsn > 0) {
637 + job.sector = starting_lsn;
638 + job.count = lsn;
639 + rc = dm_io_sync(1, &job, rw, &pl,
640 + offset, &error);
641 + if (rc) {
642 + /* If this I/O failed, then one of the
643 + * sectors in this request needs to be
644 + * relocated.
645 + */
646 + rc = bbr_io_remap_error(bbr_id, rw,
647 + starting_lsn,
648 + lsn, pl.page,
649 + offset);
650 + if (rc) {
651 + /* KMC: Return? Or continue to next bvec? */
652 + return rc;
653 + }
654 + }
655 + offset += (lsn << SECTOR_SHIFT);
656 + }
657 +
658 + /* Process the remapped sector. */
659 + job.sector = remapped_lsn;
660 + job.count = 1;
661 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
662 + if (rc) {
663 + /* BUGBUG - Need more processing if this caused
664 + * an error. If this I/O failed, then the
665 + * existing remap is now bad, and we need to
666 + * find a new remap. Can't use
667 + * bbr_io_remap_error(), because the existing
668 + * map entry needs to be changed, not added
669 + * again, and the original table entry also
670 + * needs to be changed.
671 + */
672 + return rc;
673 + }
674 +
675 + starting_lsn += (lsn + 1);
676 + count -= (lsn + 1);
677 + lsn = -1;
678 + offset += SECTOR_SIZE;
679 + }
680 +
681 + /* Check for any remaining sectors after the last split. This
682 + * could potentially be the whole request, but that should be a
683 + * rare case because requests should only be processed by the
684 + * thread if we know an error occurred or they contained one or
685 + * more remapped sectors.
686 + */
687 + if (count) {
688 + job.sector = starting_lsn;
689 + job.count = count;
690 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
691 + if (rc) {
692 + /* If this I/O failed, then one of the sectors
693 + * in this request needs to be relocated.
694 + */
695 + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
696 + count, pl.page, offset);
697 + if (rc) {
698 + /* KMC: Return? Or continue to next bvec? */
699 + return rc;
700 + }
701 + }
702 + }
703 + }
704 +
705 + return 0;
706 +}
707 +
708 +static void bbr_io_process_requests(struct bbr_private *bbr_id,
709 + struct bio *bio)
710 +{
711 + struct bio *next;
712 + int rc;
713 +
714 + while (bio) {
715 + next = bio->bi_next;
716 + bio->bi_next = NULL;
717 +
718 + rc = bbr_io_process_request(bbr_id, bio);
719 +
720 + bio_endio(bio, bio->bi_size, rc);
721 +
722 + bio = next;
723 + }
724 +}
725 +
726 +/**
727 + * bbr_remap_handler
728 + *
729 + * This is the handler for the bbr work-queue.
730 + *
731 + * I/O requests should only be sent to this handler if we know that:
732 + * a) the request contains at least one remapped sector.
733 + * or
734 + * b) the request caused an error on the normal I/O path.
735 + *
736 + * This function uses synchronous I/O, so sending a request to this
737 + * thread that doesn't need special processing will cause severe
738 + * performance degredation.
739 + **/
740 +static void bbr_remap_handler(void *data)
741 +{
742 + struct bbr_private *bbr_id = data;
743 + struct bio *bio;
744 + unsigned long flags;
745 +
746 + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
747 + bio = bio_list_get(&bbr_id->remap_ios);
748 + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
749 +
750 + bbr_io_process_requests(bbr_id, bio);
751 +}
752 +
753 +/**
754 + * bbr_endio
755 + *
756 + * This is the callback for normal write requests. Check for an error
757 + * during the I/O, and send to the thread for processing if necessary.
758 + **/
759 +static int bbr_endio(struct dm_target *ti, struct bio *bio,
760 + int error, union map_info *map_context)
761 +{
762 + struct bbr_private *bbr_id = ti->private;
763 + struct dm_bio_details *bbr_io = map_context->ptr;
764 +
765 + if (error && bbr_io) {
766 + unsigned long flags;
767 + char b[32];
768 +
769 + dm_bio_restore(bbr_io, bio);
770 + map_context->ptr = NULL;
771 +
772 + DMERR("dm-bbr: device %s: I/O failure on sector %lu. "
773 + "Scheduling for retry.",
774 + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
775 + (unsigned long)bio->bi_sector);
776 +
777 + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
778 + bio_list_add(&bbr_id->remap_ios, bio);
779 + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
780 +
781 + queue_work(dm_bbr_wq, &bbr_id->remap_work);
782 +
783 + error = 1;
784 + }
785 +
786 + if (bbr_io)
787 + mempool_free(bbr_io, bbr_io_pool);
788 +
789 + return error;
790 +}
791 +
792 +/**
793 + * Construct a bbr mapping
794 + **/
795 +static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
796 +{
797 + struct bbr_private *bbr_id;
798 + unsigned long block_size;
799 + char *end;
800 + int rc = -EINVAL;
801 +
802 + if (argc != 8) {
803 + ti->error = "dm-bbr requires exactly 8 arguments: "
804 + "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
805 + goto out1;
806 + }
807 +
808 + bbr_id = bbr_alloc_private();
809 + if (!bbr_id) {
810 + ti->error = "dm-bbr: Error allocating bbr private data.";
811 + goto out1;
812 + }
813 +
814 + bbr_id->offset = simple_strtoull(argv[1], &end, 10);
815 + bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
816 + bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
817 + bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
818 + bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
819 + bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
820 + block_size = simple_strtoul(argv[7], &end, 10);
821 + bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
822 +
823 + bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
824 + if (!bbr_id->bbr_table) {
825 + ti->error = "dm-bbr: Error allocating bbr table.";
826 + goto out2;
827 + }
828 +
829 + if (dm_get_device(ti, argv[0], 0, ti->len,
830 + dm_table_get_mode(ti->table), &bbr_id->dev)) {
831 + ti->error = "dm-bbr: Device lookup failed";
832 + goto out2;
833 + }
834 +
835 + rc = bbr_setup(bbr_id);
836 + if (rc) {
837 + ti->error = "dm-bbr: Device setup failed";
838 + goto out3;
839 + }
840 +
841 + ti->private = bbr_id;
842 + return 0;
843 +
844 +out3:
845 + dm_put_device(ti, bbr_id->dev);
846 +out2:
847 + bbr_free_private(bbr_id);
848 +out1:
849 + return rc;
850 +}
851 +
852 +static void bbr_dtr(struct dm_target *ti)
853 +{
854 + struct bbr_private *bbr_id = ti->private;
855 +
856 + dm_put_device(ti, bbr_id->dev);
857 + bbr_free_private(bbr_id);
858 +}
859 +
860 +static int bbr_map(struct dm_target *ti, struct bio *bio,
861 + union map_info *map_context)
862 +{
863 + struct bbr_private *bbr_id = ti->private;
864 + struct dm_bio_details *bbr_io;
865 + unsigned long flags;
866 + int rc = 1;
867 +
868 + bio->bi_sector += bbr_id->offset;
869 +
870 + if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
871 + !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
872 + /* No existing remaps or this request doesn't
873 + * contain any remapped sectors.
874 + */
875 + bio->bi_bdev = bbr_id->dev->bdev;
876 +
877 + bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
878 + dm_bio_record(bbr_io, bio);
879 + map_context->ptr = bbr_io;
880 + } else {
881 + /* This request has at least one remapped sector.
882 + * Give it to the work-queue for processing.
883 + */
884 + map_context->ptr = NULL;
885 + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
886 + bio_list_add(&bbr_id->remap_ios, bio);
887 + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
888 +
889 + queue_work(dm_bbr_wq, &bbr_id->remap_work);
890 + rc = 0;
891 + }
892 +
893 + return rc;
894 +}
895 +
896 +static int bbr_status(struct dm_target *ti, status_type_t type,
897 + char *result, unsigned int maxlen)
898 +{
899 + struct bbr_private *bbr_id = ti->private;
900 + char b[BDEVNAME_SIZE];
901 +
902 + switch (type) {
903 + case STATUSTYPE_INFO:
904 + result[0] = '\0';
905 + break;
906 +
907 + case STATUSTYPE_TABLE:
908 + snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
909 + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
910 + bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
911 + bbr_id->nr_sects_bbr_table,
912 + bbr_id->start_replacement_sect,
913 + bbr_id->nr_replacement_blks,
914 + bbr_id->blksize_in_sects << SECTOR_SHIFT);
915 + break;
916 + }
917 + return 0;
918 +}
919 +
920 +static struct target_type bbr_target = {
921 + .name = "bbr",
922 + .version= {1, 0, 1},
923 + .module = THIS_MODULE,
924 + .ctr = bbr_ctr,
925 + .dtr = bbr_dtr,
926 + .map = bbr_map,
927 + .end_io = bbr_endio,
928 + .status = bbr_status,
929 +};
930 +
931 +int __init dm_bbr_init(void)
932 +{
933 + int rc;
934 +
935 + rc = dm_register_target(&bbr_target);
936 + if (rc) {
937 + DMERR("dm-bbr: error registering target.");
938 + goto err1;
939 + }
940 +
941 + bbr_remap_cache = kmem_cache_create("bbr-remap",
942 + sizeof(struct bbr_runtime_remap),
943 + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
944 + if (!bbr_remap_cache) {
945 + DMERR("dm-bbr: error creating remap cache.");
946 + rc = ENOMEM;
947 + goto err2;
948 + }
949 +
950 + bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
951 + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
952 + if (!bbr_io_cache) {
953 + DMERR("dm-bbr: error creating io cache.");
954 + rc = ENOMEM;
955 + goto err3;
956 + }
957 +
958 + bbr_io_pool = mempool_create(256, mempool_alloc_slab,
959 + mempool_free_slab, bbr_io_cache);
960 + if (!bbr_io_pool) {
961 + DMERR("dm-bbr: error creating io mempool.");
962 + rc = ENOMEM;
963 + goto err4;
964 + }
965 +
966 + dm_bbr_wq = create_workqueue("dm-bbr");
967 + if (!dm_bbr_wq) {
968 + DMERR("dm-bbr: error creating work-queue.");
969 + rc = ENOMEM;
970 + goto err5;
971 + }
972 +
973 + rc = dm_io_get(1);
974 + if (rc) {
975 + DMERR("dm-bbr: error initializing I/O service.");
976 + goto err6;
977 + }
978 +
979 + return 0;
980 +
981 +err6:
982 + destroy_workqueue(dm_bbr_wq);
983 +err5:
984 + mempool_destroy(bbr_io_pool);
985 +err4:
986 + kmem_cache_destroy(bbr_io_cache);
987 +err3:
988 + kmem_cache_destroy(bbr_remap_cache);
989 +err2:
990 + dm_unregister_target(&bbr_target);
991 +err1:
992 + return rc;
993 +}
994 +
995 +void __exit dm_bbr_exit(void)
996 +{
997 + dm_io_put(1);
998 + destroy_workqueue(dm_bbr_wq);
999 + mempool_destroy(bbr_io_pool);
1000 + kmem_cache_destroy(bbr_io_cache);
1001 + kmem_cache_destroy(bbr_remap_cache);
1002 + dm_unregister_target(&bbr_target);
1003 +}
1004 +
1005 +module_init(dm_bbr_init);
1006 +module_exit(dm_bbr_exit);
1007 +MODULE_LICENSE("GPL");
1008 diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h linux-dsd/drivers/md/dm-bbr.h
1009 --- linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h 1970-01-01 01:00:00.000000000 +0100
1010 +++ linux-dsd/drivers/md/dm-bbr.h 2005-04-06 10:06:16.000000000 +0100
1011 @@ -0,0 +1,125 @@
1012 +/*
1013 + * (C) Copyright IBM Corp. 2002, 2004
1014 + *
1015 + * This program is free software; you can redistribute it and/or modify
1016 + * it under the terms of the GNU General Public License as published by
1017 + * the Free Software Foundation; either version 2 of the License, or
1018 + * (at your option) any later version.
1019 + *
1020 + * This program is distributed in the hope that it will be useful,
1021 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1022 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
1023 + * the GNU General Public License for more details.
1024 + *
1025 + * You should have received a copy of the GNU General Public License
1026 + * along with this program; if not, write to the Free Software
1027 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1028 + *
1029 + * linux/drivers/md/dm-bbr.h
1030 + *
1031 + * Bad-block-relocation (BBR) target for device-mapper.
1032 + *
1033 + * The BBR target is designed to remap I/O write failures to another safe
1034 + * location on disk. Note that most disk drives have BBR built into them,
1035 + * this means that our software BBR will be only activated when all hardware
1036 + * BBR replacement sectors have been used.
1037 + */
1038 +
1039 +#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */
1040 +#define BBR_ENTRIES_PER_SECT 31
1041 +#define INITIAL_CRC 0xFFFFFFFF
1042 +#define CRC_POLYNOMIAL 0xEDB88320L
1043 +
1044 +/**
1045 + * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1046 + * Use these in place of %Ld, %Lu, and %Lx.
1047 + **/
1048 +#if BITS_PER_LONG > 32
1049 +#define PFU64 "%lu"
1050 +#else
1051 +#define PFU64 "%Lu"
1052 +#endif
1053 +
1054 +/**
1055 + * struct bbr_table_entry
1056 + * @bad_sect: LBA of bad location.
1057 + * @replacement_sect: LBA of new location.
1058 + *
1059 + * Structure to describe one BBR remap.
1060 + **/
1061 +struct bbr_table_entry {
1062 + u64 bad_sect;
1063 + u64 replacement_sect;
1064 +};
1065 +
1066 +/**
1067 + * struct bbr_table
1068 + * @signature: Signature on each BBR table sector.
1069 + * @crc: CRC for this table sector.
1070 + * @sequence_number: Used to resolve conflicts when primary and secondary
1071 + * tables do not match.
1072 + * @in_use_cnt: Number of in-use table entries.
1073 + * @entries: Actual table of remaps.
1074 + *
1075 + * Structure to describe each sector of the metadata table. Each sector in this
1076 + * table can describe 31 remapped sectors.
1077 + **/
1078 +struct bbr_table {
1079 + u32 signature;
1080 + u32 crc;
1081 + u32 sequence_number;
1082 + u32 in_use_cnt;
1083 + struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT];
1084 +};
1085 +
1086 +/**
1087 + * struct bbr_runtime_remap
1088 + *
1089 + * Node in the binary tree used to keep track of remaps.
1090 + **/
1091 +struct bbr_runtime_remap {
1092 + struct bbr_table_entry remap;
1093 + struct bbr_runtime_remap *left;
1094 + struct bbr_runtime_remap *right;
1095 +};
1096 +
1097 +/**
1098 + * struct bbr_private
1099 + * @dev: Info about underlying device.
1100 + * @bbr_table: Copy of metadata table.
1101 + * @remap_root: Binary tree containing all remaps.
1102 + * @remap_root_lock: Lock for the binary tree.
1103 + * @remap_work: For adding work items to the work-queue.
1104 + * @remap_ios: List of I/Os for the work-queue to handle.
1105 + * @remap_ios_lock: Lock for the remap_ios list.
1106 + * @offset: LBA of data area.
1107 + * @lba_table1: LBA of primary BBR table.
1108 + * @lba_table2: LBA of secondary BBR table.
1109 + * @nr_sects_bbr_table: Size of each BBR table.
1110 + * @nr_replacement_blks: Number of replacement blocks.
1111 + * @start_replacement_sect: LBA of start of replacement blocks.
1112 + * @blksize_in_sects: Size of each block.
1113 + * @in_use_replacement_blks: Current number of remapped blocks.
1114 + *
1115 + * Private data for each BBR target.
1116 + **/
1117 +struct bbr_private {
1118 + struct dm_dev *dev;
1119 + struct bbr_table *bbr_table;
1120 + struct bbr_runtime_remap *remap_root;
1121 + spinlock_t remap_root_lock;
1122 +
1123 + struct work_struct remap_work;
1124 + struct bio_list remap_ios;
1125 + spinlock_t remap_ios_lock;
1126 +
1127 + u64 offset;
1128 + u64 lba_table1;
1129 + u64 lba_table2;
1130 + u64 nr_sects_bbr_table;
1131 + u64 start_replacement_sect;
1132 + u64 nr_replacement_blks;
1133 + u32 blksize_in_sects;
1134 + atomic_t in_use_replacement_blks;
1135 +};
1136 +
1137 diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/Kconfig linux-dsd/drivers/md/Kconfig
1138 --- linux-2.6.12-rc2-gentoo/drivers/md/Kconfig 2005-04-06 09:46:58.000000000 +0100
1139 +++ linux-dsd/drivers/md/Kconfig 2005-04-06 10:07:02.000000000 +0100
1140 @@ -236,5 +236,16 @@ config DM_MULTIPATH_EMC
1141 ---help---
1142 Multipath support for EMC CX/AX series hardware.
1143
1144 +config BLK_DEV_DM_BBR
1145 + tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
1146 + depends on BLK_DEV_DM && EXPERIMENTAL
1147 + ---help---
1148 + Support for devices with software-based bad-block-relocation.
1149 +
1150 + To compile this as a module, choose M here: the module will be
1151 + called dm-bbr.
1152 +
1153 + If unsure, say N.
1154 +
1155 endmenu
1156
1157 diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/Makefile linux-dsd/drivers/md/Makefile
1158 --- linux-2.6.12-rc2-gentoo/drivers/md/Makefile 2005-04-06 09:46:58.000000000 +0100
1159 +++ linux-dsd/drivers/md/Makefile 2005-04-06 10:06:16.000000000 +0100
1160 @@ -36,6 +36,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc
1161 obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
1162 obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
1163 obj-$(CONFIG_DM_ZERO) += dm-zero.o
1164 +obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o
1165
1166 quiet_cmd_unroll = UNROLL $@
1167 cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \

  ViewVC Help
Powered by ViewVC 1.1.20