/[linux-patches]/genpatches-2.6/trunk/2.6.18-pre/4105_dm-bbr.patch
Gentoo

Contents of /genpatches-2.6/trunk/2.6.18-pre/4105_dm-bbr.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 376 - (show annotations) (download)
Wed Apr 12 12:20:21 2006 UTC (12 years, 7 months ago) by phreak
Original Path: genpatches-2.6/trunk/2.6.17-pre/4105_dm-bbr.patch
File size: 32133 byte(s)
Fixing file ranges in 4105_dm-bbr.patch, 4110_promise-pdc2037x.patch, 4205_vesafb-tng-1.0-rc1-r3.patch, 4300_squashfs-3.0.patch, 4400_speakup-20060103.patch and 4405_alpha-sysctl-uac.patch
1 Index: linux-git/drivers/md/Kconfig
2 ===================================================================
3 --- linux-git.orig/drivers/md/Kconfig
4 +++ linux-git/drivers/md/Kconfig
5 @@ -263,5 +263,16 @@ config DM_MULTIPATH_EMC
6 ---help---
7 Multipath support for EMC CX/AX series hardware.
8
9 +config BLK_DEV_DM_BBR
10 + tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
11 + depends on BLK_DEV_DM && EXPERIMENTAL
12 + ---help---
13 + Support for devices with software-based bad-block-relocation.
14 +
15 + To compile this as a module, choose M here: the module will be
16 + called dm-bbr.
17 +
18 + If unsure, say N.
19 +
20 endmenu
21
22 Index: linux-git/drivers/md/Makefile
23 ===================================================================
24 --- linux-git.orig/drivers/md/Makefile
25 +++ linux-git/drivers/md/Makefile
26 @@ -37,6 +37,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc
27 obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
28 obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
29 obj-$(CONFIG_DM_ZERO) += dm-zero.o
30 +obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o
31
32 quiet_cmd_unroll = UNROLL $@
33 cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
34 Index: linux-git/drivers/md/dm-bbr.c
35 ===================================================================
36 --- /dev/null
37 +++ linux-git/drivers/md/dm-bbr.c
38 @@ -0,0 +1,1003 @@
39 +/*
40 + * (C) Copyright IBM Corp. 2002, 2004
41 + *
42 + * This program is free software; you can redistribute it and/or modify
43 + * it under the terms of the GNU General Public License as published by
44 + * the Free Software Foundation; either version 2 of the License, or
45 + * (at your option) any later version.
46 + *
47 + * This program is distributed in the hope that it will be useful,
48 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
49 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
50 + * the GNU General Public License for more details.
51 + *
52 + * You should have received a copy of the GNU General Public License
53 + * along with this program; if not, write to the Free Software
54 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
55 + *
56 + * linux/drivers/md/dm-bbr.c
57 + *
58 + * Bad-block-relocation (BBR) target for device-mapper.
59 + *
60 + * The BBR target is designed to remap I/O write failures to another safe
61 + * location on disk. Note that most disk drives have BBR built into them,
62 + * this means that our software BBR will be only activated when all hardware
63 + * BBR replacement sectors have been used.
64 + */
65 +
66 +#include <linux/module.h>
67 +#include <linux/init.h>
68 +#include <linux/bio.h>
69 +#include <linux/spinlock.h>
70 +#include <linux/slab.h>
71 +#include <linux/mempool.h>
72 +#include <linux/workqueue.h>
73 +#include <linux/vmalloc.h>
74 +
75 +#include "dm.h"
76 +#include "dm-bio-list.h"
77 +#include "dm-bio-record.h"
78 +#include "dm-bbr.h"
79 +#include "dm-io.h"
80 +
81 +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
82 +
83 +static struct workqueue_struct *dm_bbr_wq = NULL;
84 +static void bbr_remap_handler(void *data);
85 +static kmem_cache_t *bbr_remap_cache;
86 +static kmem_cache_t *bbr_io_cache;
87 +static mempool_t *bbr_io_pool;
88 +
89 +/**
90 + * bbr_binary_tree_destroy
91 + *
92 + * Destroy the binary tree.
93 + **/
94 +static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
95 +{
96 + struct bbr_runtime_remap **link = NULL;
97 + struct bbr_runtime_remap *node = root;
98 +
99 + while (node) {
100 + if (node->left) {
101 + link = &(node->left);
102 + node = node->left;
103 + continue;
104 + }
105 + if (node->right) {
106 + link = &(node->right);
107 + node = node->right;
108 + continue;
109 + }
110 +
111 + kmem_cache_free(bbr_remap_cache, node);
112 + if (node == root) {
113 + /* If root is deleted, we're done. */
114 + break;
115 + }
116 +
117 + /* Back to root. */
118 + node = root;
119 + *link = NULL;
120 + }
121 +}
122 +
123 +static void bbr_free_remap(struct bbr_private *bbr_id)
124 +{
125 + spin_lock_irq(&bbr_id->remap_root_lock);
126 + bbr_binary_tree_destroy(bbr_id->remap_root);
127 + bbr_id->remap_root = NULL;
128 + spin_unlock_irq(&bbr_id->remap_root_lock);
129 +}
130 +
131 +static struct bbr_private *bbr_alloc_private(void)
132 +{
133 + struct bbr_private *bbr_id;
134 +
135 + bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
136 + if (bbr_id) {
137 + memset(bbr_id, 0, sizeof(*bbr_id));
138 + INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
139 + bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
140 + bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;
141 + bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
142 + }
143 +
144 + return bbr_id;
145 +}
146 +
147 +static void bbr_free_private(struct bbr_private *bbr_id)
148 +{
149 + if (bbr_id->bbr_table) {
150 + vfree(bbr_id->bbr_table);
151 + }
152 + bbr_free_remap(bbr_id);
153 + kfree(bbr_id);
154 +}
155 +
156 +static u32 crc_table[256];
157 +static u32 crc_table_built = 0;
158 +
159 +static void build_crc_table(void)
160 +{
161 + u32 i, j, crc;
162 +
163 + for (i = 0; i <= 255; i++) {
164 + crc = i;
165 + for (j = 8; j > 0; j--) {
166 + if (crc & 1)
167 + crc = (crc >> 1) ^ CRC_POLYNOMIAL;
168 + else
169 + crc >>= 1;
170 + }
171 + crc_table[i] = crc;
172 + }
173 + crc_table_built = 1;
174 +}
175 +
176 +static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
177 +{
178 + unsigned char *current_byte;
179 + u32 temp1, temp2, i;
180 +
181 + current_byte = (unsigned char *) buffer;
182 + /* Make sure the crc table is available */
183 + if (!crc_table_built)
184 + build_crc_table();
185 + /* Process each byte in the buffer. */
186 + for (i = 0; i < buffersize; i++) {
187 + temp1 = (crc >> 8) & 0x00FFFFFF;
188 + temp2 = crc_table[(crc ^ (u32) * current_byte) &
189 + (u32) 0xff];
190 + current_byte++;
191 + crc = temp1 ^ temp2;
192 + }
193 + return crc;
194 +}
195 +
196 +/**
197 + * le_bbr_table_sector_to_cpu
198 + *
199 + * Convert bbr meta data from on-disk (LE) format
200 + * to the native cpu endian format.
201 + **/
202 +static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
203 +{
204 + int i;
205 + p->signature = le32_to_cpup(&p->signature);
206 + p->crc = le32_to_cpup(&p->crc);
207 + p->sequence_number = le32_to_cpup(&p->sequence_number);
208 + p->in_use_cnt = le32_to_cpup(&p->in_use_cnt);
209 + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
210 + p->entries[i].bad_sect =
211 + le64_to_cpup(&p->entries[i].bad_sect);
212 + p->entries[i].replacement_sect =
213 + le64_to_cpup(&p->entries[i].replacement_sect);
214 + }
215 +}
216 +
217 +/**
218 + * cpu_bbr_table_sector_to_le
219 + *
220 + * Convert bbr meta data from cpu endian format to on-disk (LE) format
221 + **/
222 +static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
223 + struct bbr_table *le)
224 +{
225 + int i;
226 + le->signature = cpu_to_le32p(&p->signature);
227 + le->crc = cpu_to_le32p(&p->crc);
228 + le->sequence_number = cpu_to_le32p(&p->sequence_number);
229 + le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt);
230 + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
231 + le->entries[i].bad_sect =
232 + cpu_to_le64p(&p->entries[i].bad_sect);
233 + le->entries[i].replacement_sect =
234 + cpu_to_le64p(&p->entries[i].replacement_sect);
235 + }
236 +}
237 +
238 +/**
239 + * validate_bbr_table_sector
240 + *
241 + * Check the specified BBR table sector for a valid signature and CRC. If it's
242 + * valid, endian-convert the table sector.
243 + **/
244 +static int validate_bbr_table_sector(struct bbr_table *p)
245 +{
246 + int rc = 0;
247 + int org_crc, final_crc;
248 +
249 + if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
250 + DMERR("dm-bbr: BBR table signature doesn't match!");
251 + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
252 + le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
253 + rc = -EINVAL;
254 + goto out;
255 + }
256 +
257 + if (!p->crc) {
258 + DMERR("dm-bbr: BBR table sector has no CRC!");
259 + rc = -EINVAL;
260 + goto out;
261 + }
262 +
263 + org_crc = le32_to_cpup(&p->crc);
264 + p->crc = 0;
265 + final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
266 + if (final_crc != org_crc) {
267 + DMERR("dm-bbr: CRC failed!");
268 + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
269 + org_crc, final_crc);
270 + rc = -EINVAL;
271 + goto out;
272 + }
273 +
274 + p->crc = cpu_to_le32p(&org_crc);
275 + le_bbr_table_sector_to_cpu(p);
276 +
277 +out:
278 + return rc;
279 +}
280 +
281 +/**
282 + * bbr_binary_tree_insert
283 + *
284 + * Insert a node into the binary tree.
285 + **/
286 +static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
287 + struct bbr_runtime_remap *newnode)
288 +{
289 + struct bbr_runtime_remap **node = root;
290 + while (node && *node) {
291 + if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
292 + node = &((*node)->right);
293 + } else {
294 + node = &((*node)->left);
295 + }
296 + }
297 +
298 + newnode->left = newnode->right = NULL;
299 + *node = newnode;
300 +}
301 +
302 +/**
303 + * bbr_binary_search
304 + *
305 + * Search for a node that contains bad_sect == lsn.
306 + **/
307 +static struct bbr_runtime_remap *bbr_binary_search(
308 + struct bbr_runtime_remap *root,
309 + u64 lsn)
310 +{
311 + struct bbr_runtime_remap *node = root;
312 + while (node) {
313 + if (node->remap.bad_sect == lsn) {
314 + break;
315 + }
316 + if (lsn > node->remap.bad_sect) {
317 + node = node->right;
318 + } else {
319 + node = node->left;
320 + }
321 + }
322 + return node;
323 +}
324 +
325 +/**
326 + * bbr_insert_remap_entry
327 + *
328 + * Create a new remap entry and add it to the binary tree for this node.
329 + **/
330 +static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
331 + struct bbr_table_entry *new_bbr_entry)
332 +{
333 + struct bbr_runtime_remap *newnode;
334 +
335 + newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
336 + if (!newnode) {
337 + DMERR("dm-bbr: Could not allocate from remap cache!");
338 + return -ENOMEM;
339 + }
340 + newnode->remap.bad_sect = new_bbr_entry->bad_sect;
341 + newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
342 + spin_lock_irq(&bbr_id->remap_root_lock);
343 + bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
344 + spin_unlock_irq(&bbr_id->remap_root_lock);
345 + return 0;
346 +}
347 +
348 +/**
349 + * bbr_table_to_remap_list
350 + *
351 + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
352 + * improve run time performance, the in memory remap list must be sorted by
353 + * the bad sector LBA. This function is called at discovery time to initialize
354 + * the remap list. This function assumes that at least one copy of meta data
355 + * is valid.
356 + **/
357 +static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
358 +{
359 + u32 in_use_blks = 0;
360 + int i, j;
361 + struct bbr_table *p;
362 +
363 + for (i = 0, p = bbr_id->bbr_table;
364 + i < bbr_id->nr_sects_bbr_table;
365 + i++, p++) {
366 + if (!p->in_use_cnt) {
367 + break;
368 + }
369 + in_use_blks += p->in_use_cnt;
370 + for (j = 0; j < p->in_use_cnt; j++) {
371 + bbr_insert_remap_entry(bbr_id, &p->entries[j]);
372 + }
373 + }
374 + if (in_use_blks) {
375 + char b[32];
376 + DMWARN("dm-bbr: There are %u BBR entries for device %s",
377 + in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
378 + }
379 +
380 + return in_use_blks;
381 +}
382 +
383 +/**
384 + * bbr_search_remap_entry
385 + *
386 + * Search remap entry for the specified sector. If found, return a pointer to
387 + * the table entry. Otherwise, return NULL.
388 + **/
389 +static struct bbr_table_entry *bbr_search_remap_entry(
390 + struct bbr_private *bbr_id,
391 + u64 lsn)
392 +{
393 + struct bbr_runtime_remap *p;
394 +
395 + spin_lock_irq(&bbr_id->remap_root_lock);
396 + p = bbr_binary_search(bbr_id->remap_root, lsn);
397 + spin_unlock_irq(&bbr_id->remap_root_lock);
398 + if (p) {
399 + return (&p->remap);
400 + } else {
401 + return NULL;
402 + }
403 +}
404 +
405 +/**
406 + * bbr_remap
407 + *
408 + * If *lsn is in the remap table, return TRUE and modify *lsn,
409 + * else, return FALSE.
410 + **/
411 +static inline int bbr_remap(struct bbr_private *bbr_id,
412 + u64 *lsn)
413 +{
414 + struct bbr_table_entry *e;
415 +
416 + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
417 + e = bbr_search_remap_entry(bbr_id, *lsn);
418 + if (e) {
419 + *lsn = e->replacement_sect;
420 + return 1;
421 + }
422 + }
423 + return 0;
424 +}
425 +
426 +/**
427 + * bbr_remap_probe
428 + *
429 + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
430 + * table return TRUE, Else, return FALSE.
431 + **/
432 +static inline int bbr_remap_probe(struct bbr_private *bbr_id,
433 + u64 lsn, u64 nr_sects)
434 +{
435 + u64 tmp, cnt;
436 +
437 + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
438 + for (cnt = 0, tmp = lsn;
439 + cnt < nr_sects;
440 + cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
441 + if (bbr_remap(bbr_id,&tmp)) {
442 + return 1;
443 + }
444 + }
445 + }
446 + return 0;
447 +}
448 +
449 +/**
450 + * bbr_setup
451 + *
452 + * Read the remap tables from disk and set up the initial remap tree.
453 + **/
454 +static int bbr_setup(struct bbr_private *bbr_id)
455 +{
456 + struct bbr_table *table = bbr_id->bbr_table;
457 + struct io_region job;
458 + unsigned long error;
459 + int i, rc = 0;
460 +
461 + job.bdev = bbr_id->dev->bdev;
462 + job.count = 1;
463 +
464 + /* Read and verify each BBR table sector individually. */
465 + for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
466 + job.sector = bbr_id->lba_table1 + i;
467 + rc = dm_io_sync_vm(1, &job, READ, table, &error);
468 + if (rc && bbr_id->lba_table2) {
469 + job.sector = bbr_id->lba_table2 + i;
470 + rc = dm_io_sync_vm(1, &job, READ, table, &error);
471 + }
472 + if (rc) {
473 + goto out;
474 + }
475 +
476 + rc = validate_bbr_table_sector(table);
477 + if (rc) {
478 + goto out;
479 + }
480 + }
481 + atomic_set(&bbr_id->in_use_replacement_blks,
482 + bbr_table_to_remap_list(bbr_id));
483 +
484 +out:
485 + if (rc) {
486 + DMERR("dm-bbr: error during device setup: %d", rc);
487 + }
488 + return rc;
489 +}
490 +
491 +/**
492 + * bbr_io_remap_error
493 + * @bbr_id: Private data for the BBR node.
494 + * @rw: READ or WRITE.
495 + * @starting_lsn: Starting sector of request to remap.
496 + * @count: Number of sectors in the request.
497 + * @page: Page containing the data for the request.
498 + * @offset: Byte-offset of the data within the page.
499 + *
500 + * For the requested range, try to write each sector individually. For each
501 + * sector that fails, find the next available remap location and write the
502 + * data to that new location. Then update the table and write both copies
503 + * of the table to disk. Finally, update the in-memory mapping and do any
504 + * other necessary bookkeeping.
505 + **/
506 +static int bbr_io_remap_error(struct bbr_private *bbr_id,
507 + int rw,
508 + u64 starting_lsn,
509 + u64 count,
510 + struct page *page,
511 + unsigned int offset)
512 +{
513 + struct bbr_table *bbr_table;
514 + struct io_region job;
515 + struct page_list pl;
516 + unsigned long table_sector_index;
517 + unsigned long table_sector_offset;
518 + unsigned long index;
519 + unsigned long error;
520 + u64 lsn, new_lsn;
521 + char b[32];
522 + int rc;
523 +
524 + job.bdev = bbr_id->dev->bdev;
525 + job.count = 1;
526 + pl.page = page;
527 + pl.next = NULL;
528 +
529 + /* For each sector in the request. */
530 + for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
531 + job.sector = starting_lsn + lsn;
532 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
533 + while (rc) {
534 + /* Find the next available relocation sector. */
535 + new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
536 + if (new_lsn >= bbr_id->nr_replacement_blks) {
537 + /* No more replacement sectors available. */
538 + return -EIO;
539 + }
540 + new_lsn += bbr_id->start_replacement_sect;
541 +
542 + /* Write the data to its new location. */
543 + DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
544 + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
545 + starting_lsn + lsn, new_lsn);
546 + job.sector = new_lsn;
547 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
548 + if (rc) {
549 + /* This replacement sector is bad.
550 + * Try the next one.
551 + */
552 + DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.",
553 + format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
554 + atomic_inc(&bbr_id->in_use_replacement_blks);
555 + continue;
556 + }
557 +
558 + /* Add this new entry to the on-disk table. */
559 + table_sector_index = new_lsn -
560 + bbr_id->start_replacement_sect;
561 + table_sector_offset = table_sector_index /
562 + BBR_ENTRIES_PER_SECT;
563 + index = table_sector_index % BBR_ENTRIES_PER_SECT;
564 +
565 + bbr_table = &bbr_id->bbr_table[table_sector_offset];
566 + bbr_table->entries[index].bad_sect = starting_lsn + lsn;
567 + bbr_table->entries[index].replacement_sect = new_lsn;
568 + bbr_table->in_use_cnt++;
569 + bbr_table->sequence_number++;
570 + bbr_table->crc = 0;
571 + bbr_table->crc = calculate_crc(INITIAL_CRC,
572 + bbr_table,
573 + sizeof(struct bbr_table));
574 +
575 + /* Write the table to disk. */
576 + cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
577 + if (bbr_id->lba_table1) {
578 + job.sector = bbr_id->lba_table1 + table_sector_offset;
579 + rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
580 + }
581 + if (bbr_id->lba_table2) {
582 + job.sector = bbr_id->lba_table2 + table_sector_offset;
583 + rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
584 + }
585 + le_bbr_table_sector_to_cpu(bbr_table);
586 +
587 + if (rc) {
588 + /* Error writing one of the tables to disk. */
589 + DMERR("dm-bbr: device %s: error updating BBR tables on disk.",
590 + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
591 + return rc;
592 + }
593 +
594 + /* Insert a new entry in the remapping binary-tree. */
595 + rc = bbr_insert_remap_entry(bbr_id,
596 + &bbr_table->entries[index]);
597 + if (rc) {
598 + DMERR("dm-bbr: device %s: error adding new entry to remap tree.",
599 + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
600 + return rc;
601 + }
602 +
603 + atomic_inc(&bbr_id->in_use_replacement_blks);
604 + }
605 + }
606 +
607 + return 0;
608 +}
609 +
610 +/**
611 + * bbr_io_process_request
612 + *
613 + * For each sector in this request, check if the sector has already
614 + * been remapped. If so, process all previous sectors in the request,
615 + * followed by the remapped sector. Then reset the starting lsn and
616 + * count, and keep going with the rest of the request as if it were
617 + * a whole new request. If any of the sync_io's return an error,
618 + * call the remapper to relocate the bad sector(s).
619 + *
620 + * 2.5 Note: When switching over to bio's for the I/O path, we have made
621 + * the assumption that the I/O request described by the bio is one
622 + * virtually contiguous piece of memory (even though the bio vector
623 + * describes it using a series of physical page addresses).
624 + **/
625 +static int bbr_io_process_request(struct bbr_private *bbr_id,
626 + struct bio *bio)
627 +{
628 + struct io_region job;
629 + u64 starting_lsn = bio->bi_sector;
630 + u64 count, lsn, remapped_lsn;
631 + struct page_list pl;
632 + unsigned int offset;
633 + unsigned long error;
634 + int i, rw = bio_data_dir(bio);
635 + int rc = 0;
636 +
637 + job.bdev = bbr_id->dev->bdev;
638 + pl.next = NULL;
639 +
640 + /* Each bio can contain multiple vectors, each with a different page.
641 + * Treat each vector as a separate request.
642 + */
643 + /* KMC: Is this the right way to walk the bvec list? */
644 + for (i = 0;
645 + i < bio->bi_vcnt;
646 + i++, bio->bi_idx++, starting_lsn += count) {
647 +
648 + /* Bvec info: number of sectors, page,
649 + * and byte-offset within page.
650 + */
651 + count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
652 + pl.page = bio_iovec(bio)->bv_page;
653 + offset = bio_iovec(bio)->bv_offset;
654 +
655 + /* For each sector in this bvec, check if the sector has
656 + * already been remapped. If so, process all previous sectors
657 + * in this request, followed by the remapped sector. Then reset
658 + * the starting lsn and count and keep going with the rest of
659 + * the request as if it were a whole new request.
660 + */
661 + for (lsn = 0; lsn < count; lsn++) {
662 + remapped_lsn = starting_lsn + lsn;
663 + rc = bbr_remap(bbr_id, &remapped_lsn);
664 + if (!rc) {
665 + /* This sector is fine. */
666 + continue;
667 + }
668 +
669 + /* Process all sectors in the request up to this one. */
670 + if (lsn > 0) {
671 + job.sector = starting_lsn;
672 + job.count = lsn;
673 + rc = dm_io_sync(1, &job, rw, &pl,
674 + offset, &error);
675 + if (rc) {
676 + /* If this I/O failed, then one of the
677 + * sectors in this request needs to be
678 + * relocated.
679 + */
680 + rc = bbr_io_remap_error(bbr_id, rw,
681 + starting_lsn,
682 + lsn, pl.page,
683 + offset);
684 + if (rc) {
685 + /* KMC: Return? Or continue to next bvec? */
686 + return rc;
687 + }
688 + }
689 + offset += (lsn << SECTOR_SHIFT);
690 + }
691 +
692 + /* Process the remapped sector. */
693 + job.sector = remapped_lsn;
694 + job.count = 1;
695 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
696 + if (rc) {
697 + /* BUGBUG - Need more processing if this caused
698 + * an error. If this I/O failed, then the
699 + * existing remap is now bad, and we need to
700 + * find a new remap. Can't use
701 + * bbr_io_remap_error(), because the existing
702 + * map entry needs to be changed, not added
703 + * again, and the original table entry also
704 + * needs to be changed.
705 + */
706 + return rc;
707 + }
708 +
709 + starting_lsn += (lsn + 1);
710 + count -= (lsn + 1);
711 + lsn = -1;
712 + offset += SECTOR_SIZE;
713 + }
714 +
715 + /* Check for any remaining sectors after the last split. This
716 + * could potentially be the whole request, but that should be a
717 + * rare case because requests should only be processed by the
718 + * thread if we know an error occurred or they contained one or
719 + * more remapped sectors.
720 + */
721 + if (count) {
722 + job.sector = starting_lsn;
723 + job.count = count;
724 + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
725 + if (rc) {
726 + /* If this I/O failed, then one of the sectors
727 + * in this request needs to be relocated.
728 + */
729 + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
730 + count, pl.page, offset);
731 + if (rc) {
732 + /* KMC: Return? Or continue to next bvec? */
733 + return rc;
734 + }
735 + }
736 + }
737 + }
738 +
739 + return 0;
740 +}
741 +
742 +static void bbr_io_process_requests(struct bbr_private *bbr_id,
743 + struct bio *bio)
744 +{
745 + struct bio *next;
746 + int rc;
747 +
748 + while (bio) {
749 + next = bio->bi_next;
750 + bio->bi_next = NULL;
751 +
752 + rc = bbr_io_process_request(bbr_id, bio);
753 +
754 + bio_endio(bio, bio->bi_size, rc);
755 +
756 + bio = next;
757 + }
758 +}
759 +
760 +/**
761 + * bbr_remap_handler
762 + *
763 + * This is the handler for the bbr work-queue.
764 + *
765 + * I/O requests should only be sent to this handler if we know that:
766 + * a) the request contains at least one remapped sector.
767 + * or
768 + * b) the request caused an error on the normal I/O path.
769 + *
770 + * This function uses synchronous I/O, so sending a request to this
771 + * thread that doesn't need special processing will cause severe
772 + * performance degredation.
773 + **/
774 +static void bbr_remap_handler(void *data)
775 +{
776 + struct bbr_private *bbr_id = data;
777 + struct bio *bio;
778 + unsigned long flags;
779 +
780 + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
781 + bio = bio_list_get(&bbr_id->remap_ios);
782 + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
783 +
784 + bbr_io_process_requests(bbr_id, bio);
785 +}
786 +
787 +/**
788 + * bbr_endio
789 + *
790 + * This is the callback for normal write requests. Check for an error
791 + * during the I/O, and send to the thread for processing if necessary.
792 + **/
793 +static int bbr_endio(struct dm_target *ti, struct bio *bio,
794 + int error, union map_info *map_context)
795 +{
796 + struct bbr_private *bbr_id = ti->private;
797 + struct dm_bio_details *bbr_io = map_context->ptr;
798 +
799 + if (error && bbr_io) {
800 + unsigned long flags;
801 + char b[32];
802 +
803 + dm_bio_restore(bbr_io, bio);
804 + map_context->ptr = NULL;
805 +
806 + DMERR("dm-bbr: device %s: I/O failure on sector %lu. "
807 + "Scheduling for retry.",
808 + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
809 + (unsigned long)bio->bi_sector);
810 +
811 + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
812 + bio_list_add(&bbr_id->remap_ios, bio);
813 + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
814 +
815 + queue_work(dm_bbr_wq, &bbr_id->remap_work);
816 +
817 + error = 1;
818 + }
819 +
820 + if (bbr_io)
821 + mempool_free(bbr_io, bbr_io_pool);
822 +
823 + return error;
824 +}
825 +
826 +/**
827 + * Construct a bbr mapping
828 + **/
829 +static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
830 +{
831 + struct bbr_private *bbr_id;
832 + unsigned long block_size;
833 + char *end;
834 + int rc = -EINVAL;
835 +
836 + if (argc != 8) {
837 + ti->error = "dm-bbr requires exactly 8 arguments: "
838 + "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
839 + goto out1;
840 + }
841 +
842 + bbr_id = bbr_alloc_private();
843 + if (!bbr_id) {
844 + ti->error = "dm-bbr: Error allocating bbr private data.";
845 + goto out1;
846 + }
847 +
848 + bbr_id->offset = simple_strtoull(argv[1], &end, 10);
849 + bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
850 + bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
851 + bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
852 + bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
853 + bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
854 + block_size = simple_strtoul(argv[7], &end, 10);
855 + bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
856 +
857 + bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
858 + if (!bbr_id->bbr_table) {
859 + ti->error = "dm-bbr: Error allocating bbr table.";
860 + goto out2;
861 + }
862 +
863 + if (dm_get_device(ti, argv[0], 0, ti->len,
864 + dm_table_get_mode(ti->table), &bbr_id->dev)) {
865 + ti->error = "dm-bbr: Device lookup failed";
866 + goto out2;
867 + }
868 +
869 + rc = bbr_setup(bbr_id);
870 + if (rc) {
871 + ti->error = "dm-bbr: Device setup failed";
872 + goto out3;
873 + }
874 +
875 + ti->private = bbr_id;
876 + return 0;
877 +
878 +out3:
879 + dm_put_device(ti, bbr_id->dev);
880 +out2:
881 + bbr_free_private(bbr_id);
882 +out1:
883 + return rc;
884 +}
885 +
886 +static void bbr_dtr(struct dm_target *ti)
887 +{
888 + struct bbr_private *bbr_id = ti->private;
889 +
890 + dm_put_device(ti, bbr_id->dev);
891 + bbr_free_private(bbr_id);
892 +}
893 +
894 +static int bbr_map(struct dm_target *ti, struct bio *bio,
895 + union map_info *map_context)
896 +{
897 + struct bbr_private *bbr_id = ti->private;
898 + struct dm_bio_details *bbr_io;
899 + unsigned long flags;
900 + int rc = 1;
901 +
902 + bio->bi_sector += bbr_id->offset;
903 +
904 + if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
905 + !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
906 + /* No existing remaps or this request doesn't
907 + * contain any remapped sectors.
908 + */
909 + bio->bi_bdev = bbr_id->dev->bdev;
910 +
911 + bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
912 + dm_bio_record(bbr_io, bio);
913 + map_context->ptr = bbr_io;
914 + } else {
915 + /* This request has at least one remapped sector.
916 + * Give it to the work-queue for processing.
917 + */
918 + map_context->ptr = NULL;
919 + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
920 + bio_list_add(&bbr_id->remap_ios, bio);
921 + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
922 +
923 + queue_work(dm_bbr_wq, &bbr_id->remap_work);
924 + rc = 0;
925 + }
926 +
927 + return rc;
928 +}
929 +
930 +static int bbr_status(struct dm_target *ti, status_type_t type,
931 + char *result, unsigned int maxlen)
932 +{
933 + struct bbr_private *bbr_id = ti->private;
934 + char b[BDEVNAME_SIZE];
935 +
936 + switch (type) {
937 + case STATUSTYPE_INFO:
938 + result[0] = '\0';
939 + break;
940 +
941 + case STATUSTYPE_TABLE:
942 + snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
943 + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
944 + bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
945 + bbr_id->nr_sects_bbr_table,
946 + bbr_id->start_replacement_sect,
947 + bbr_id->nr_replacement_blks,
948 + bbr_id->blksize_in_sects << SECTOR_SHIFT);
949 + break;
950 + }
951 + return 0;
952 +}
953 +
954 +static struct target_type bbr_target = {
955 + .name = "bbr",
956 + .version= {1, 0, 1},
957 + .module = THIS_MODULE,
958 + .ctr = bbr_ctr,
959 + .dtr = bbr_dtr,
960 + .map = bbr_map,
961 + .end_io = bbr_endio,
962 + .status = bbr_status,
963 +};
964 +
965 +int __init dm_bbr_init(void)
966 +{
967 + int rc;
968 +
969 + rc = dm_register_target(&bbr_target);
970 + if (rc) {
971 + DMERR("dm-bbr: error registering target.");
972 + goto err1;
973 + }
974 +
975 + bbr_remap_cache = kmem_cache_create("bbr-remap",
976 + sizeof(struct bbr_runtime_remap),
977 + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
978 + if (!bbr_remap_cache) {
979 + DMERR("dm-bbr: error creating remap cache.");
980 + rc = ENOMEM;
981 + goto err2;
982 + }
983 +
984 + bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
985 + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
986 + if (!bbr_io_cache) {
987 + DMERR("dm-bbr: error creating io cache.");
988 + rc = ENOMEM;
989 + goto err3;
990 + }
991 +
992 + bbr_io_pool = mempool_create(256, mempool_alloc_slab,
993 + mempool_free_slab, bbr_io_cache);
994 + if (!bbr_io_pool) {
995 + DMERR("dm-bbr: error creating io mempool.");
996 + rc = ENOMEM;
997 + goto err4;
998 + }
999 +
1000 + dm_bbr_wq = create_workqueue("dm-bbr");
1001 + if (!dm_bbr_wq) {
1002 + DMERR("dm-bbr: error creating work-queue.");
1003 + rc = ENOMEM;
1004 + goto err5;
1005 + }
1006 +
1007 + rc = dm_io_get(1);
1008 + if (rc) {
1009 + DMERR("dm-bbr: error initializing I/O service.");
1010 + goto err6;
1011 + }
1012 +
1013 + return 0;
1014 +
1015 +err6:
1016 + destroy_workqueue(dm_bbr_wq);
1017 +err5:
1018 + mempool_destroy(bbr_io_pool);
1019 +err4:
1020 + kmem_cache_destroy(bbr_io_cache);
1021 +err3:
1022 + kmem_cache_destroy(bbr_remap_cache);
1023 +err2:
1024 + dm_unregister_target(&bbr_target);
1025 +err1:
1026 + return rc;
1027 +}
1028 +
1029 +void __exit dm_bbr_exit(void)
1030 +{
1031 + dm_io_put(1);
1032 + destroy_workqueue(dm_bbr_wq);
1033 + mempool_destroy(bbr_io_pool);
1034 + kmem_cache_destroy(bbr_io_cache);
1035 + kmem_cache_destroy(bbr_remap_cache);
1036 + dm_unregister_target(&bbr_target);
1037 +}
1038 +
1039 +module_init(dm_bbr_init);
1040 +module_exit(dm_bbr_exit);
1041 +MODULE_LICENSE("GPL");
1042 Index: linux-git/drivers/md/dm-bbr.h
1043 ===================================================================
1044 --- /dev/null
1045 +++ linux-git/drivers/md/dm-bbr.h
1046 @@ -0,0 +1,125 @@
1047 +/*
1048 + * (C) Copyright IBM Corp. 2002, 2004
1049 + *
1050 + * This program is free software; you can redistribute it and/or modify
1051 + * it under the terms of the GNU General Public License as published by
1052 + * the Free Software Foundation; either version 2 of the License, or
1053 + * (at your option) any later version.
1054 + *
1055 + * This program is distributed in the hope that it will be useful,
1056 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1057 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
1058 + * the GNU General Public License for more details.
1059 + *
1060 + * You should have received a copy of the GNU General Public License
1061 + * along with this program; if not, write to the Free Software
1062 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1063 + *
1064 + * linux/drivers/md/dm-bbr.h
1065 + *
1066 + * Bad-block-relocation (BBR) target for device-mapper.
1067 + *
1068 + * The BBR target is designed to remap I/O write failures to another safe
1069 + * location on disk. Note that most disk drives have BBR built into them,
1070 + * this means that our software BBR will be only activated when all hardware
1071 + * BBR replacement sectors have been used.
1072 + */
1073 +
1074 +#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */
1075 +#define BBR_ENTRIES_PER_SECT 31
1076 +#define INITIAL_CRC 0xFFFFFFFF
1077 +#define CRC_POLYNOMIAL 0xEDB88320L
1078 +
1079 +/**
1080 + * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1081 + * Use these in place of %Ld, %Lu, and %Lx.
1082 + **/
1083 +#if BITS_PER_LONG > 32
1084 +#define PFU64 "%lu"
1085 +#else
1086 +#define PFU64 "%Lu"
1087 +#endif
1088 +
1089 +/**
1090 + * struct bbr_table_entry
1091 + * @bad_sect: LBA of bad location.
1092 + * @replacement_sect: LBA of new location.
1093 + *
1094 + * Structure to describe one BBR remap.
1095 + **/
1096 +struct bbr_table_entry {
1097 + u64 bad_sect;
1098 + u64 replacement_sect;
1099 +};
1100 +
1101 +/**
1102 + * struct bbr_table
1103 + * @signature: Signature on each BBR table sector.
1104 + * @crc: CRC for this table sector.
1105 + * @sequence_number: Used to resolve conflicts when primary and secondary
1106 + * tables do not match.
1107 + * @in_use_cnt: Number of in-use table entries.
1108 + * @entries: Actual table of remaps.
1109 + *
1110 + * Structure to describe each sector of the metadata table. Each sector in this
1111 + * table can describe 31 remapped sectors.
1112 + **/
1113 +struct bbr_table {
1114 + u32 signature;
1115 + u32 crc;
1116 + u32 sequence_number;
1117 + u32 in_use_cnt;
1118 + struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT];
1119 +};
1120 +
1121 +/**
1122 + * struct bbr_runtime_remap
1123 + *
1124 + * Node in the binary tree used to keep track of remaps.
1125 + **/
1126 +struct bbr_runtime_remap {
1127 + struct bbr_table_entry remap;
1128 + struct bbr_runtime_remap *left;
1129 + struct bbr_runtime_remap *right;
1130 +};
1131 +
1132 +/**
1133 + * struct bbr_private
1134 + * @dev: Info about underlying device.
1135 + * @bbr_table: Copy of metadata table.
1136 + * @remap_root: Binary tree containing all remaps.
1137 + * @remap_root_lock: Lock for the binary tree.
1138 + * @remap_work: For adding work items to the work-queue.
1139 + * @remap_ios: List of I/Os for the work-queue to handle.
1140 + * @remap_ios_lock: Lock for the remap_ios list.
1141 + * @offset: LBA of data area.
1142 + * @lba_table1: LBA of primary BBR table.
1143 + * @lba_table2: LBA of secondary BBR table.
1144 + * @nr_sects_bbr_table: Size of each BBR table.
1145 + * @nr_replacement_blks: Number of replacement blocks.
1146 + * @start_replacement_sect: LBA of start of replacement blocks.
1147 + * @blksize_in_sects: Size of each block.
1148 + * @in_use_replacement_blks: Current number of remapped blocks.
1149 + *
1150 + * Private data for each BBR target.
1151 + **/
1152 +struct bbr_private {
1153 + struct dm_dev *dev;
1154 + struct bbr_table *bbr_table;
1155 + struct bbr_runtime_remap *remap_root;
1156 + spinlock_t remap_root_lock;
1157 +
1158 + struct work_struct remap_work;
1159 + struct bio_list remap_ios;
1160 + spinlock_t remap_ios_lock;
1161 +
1162 + u64 offset;
1163 + u64 lba_table1;
1164 + u64 lba_table2;
1165 + u64 nr_sects_bbr_table;
1166 + u64 start_replacement_sect;
1167 + u64 nr_replacement_blks;
1168 + u32 blksize_in_sects;
1169 + atomic_t in_use_replacement_blks;
1170 +};
1171 +

  ViewVC Help
Powered by ViewVC 1.1.20