/[linux-patches]/genpatches-2.6/trunk/2.6.19/4105_dm-bbr.patch
Gentoo

Contents of /genpatches-2.6/trunk/2.6.19/4105_dm-bbr.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 665 - (hide annotations) (download)
Sat Oct 7 10:34:09 2006 UTC (12 years, 2 months ago) by phreak
Original Path: genpatches-2.6/trunk/2.6.19-pre/4105_dm-bbr.patch
File size: 32199 byte(s)
Offset fixes for 2500_via-irq-quirk-revert.patch, 4000_deprecate-sk98lin.patch and 4105_dm-bbr.patch.
1 phreak 665 Index: linux-2.6.19/drivers/md/Kconfig
2 phreak 366 ===================================================================
3 phreak 665 --- linux-2.6.19.orig/drivers/md/Kconfig
4     +++ linux-2.6.19/drivers/md/Kconfig
5     @@ -261,6 +261,17 @@ config DM_MULTIPATH_EMC
6 phreak 366 ---help---
7     Multipath support for EMC CX/AX series hardware.
8    
9     +config BLK_DEV_DM_BBR
10     + tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
11     + depends on BLK_DEV_DM && EXPERIMENTAL
12     + ---help---
13     + Support for devices with software-based bad-block-relocation.
14     +
15     + To compile this as a module, choose M here: the module will be
16     + called dm-bbr.
17     +
18     + If unsure, say N.
19     +
20     endmenu
21    
22 phreak 665 endif
23     Index: linux-2.6.19/drivers/md/Makefile
24 phreak 366 ===================================================================
25 phreak 665 --- linux-2.6.19.orig/drivers/md/Makefile
26     +++ linux-2.6.19/drivers/md/Makefile
27 phreak 520 @@ -36,6 +36,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc
28 phreak 366 obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
29     obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
30     obj-$(CONFIG_DM_ZERO) += dm-zero.o
31     +obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o
32    
33     quiet_cmd_unroll = UNROLL $@
34     cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
35 phreak 665 Index: linux-2.6.19/drivers/md/dm-bbr.c
36 phreak 274 ===================================================================
37     --- /dev/null
38 phreak 665 +++ linux-2.6.19/drivers/md/dm-bbr.c
39 phreak 534 @@ -0,0 +1,1004 @@
40 dsd 7 +/*
41     + * (C) Copyright IBM Corp. 2002, 2004
42     + *
43     + * This program is free software; you can redistribute it and/or modify
44     + * it under the terms of the GNU General Public License as published by
45     + * the Free Software Foundation; either version 2 of the License, or
46     + * (at your option) any later version.
47     + *
48     + * This program is distributed in the hope that it will be useful,
49     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
50     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
51     + * the GNU General Public License for more details.
52     + *
53     + * You should have received a copy of the GNU General Public License
54     + * along with this program; if not, write to the Free Software
55     + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
56     + *
57     + * linux/drivers/md/dm-bbr.c
58     + *
59     + * Bad-block-relocation (BBR) target for device-mapper.
60     + *
61     + * The BBR target is designed to remap I/O write failures to another safe
62     + * location on disk. Note that most disk drives have BBR built into them,
63     + * this means that our software BBR will be only activated when all hardware
64     + * BBR replacement sectors have been used.
65     + */
66     +
67     +#include <linux/module.h>
68     +#include <linux/init.h>
69     +#include <linux/bio.h>
70     +#include <linux/spinlock.h>
71     +#include <linux/slab.h>
72     +#include <linux/mempool.h>
73     +#include <linux/workqueue.h>
74     +#include <linux/vmalloc.h>
75     +
76     +#include "dm.h"
77     +#include "dm-bio-list.h"
78     +#include "dm-bio-record.h"
79     +#include "dm-bbr.h"
80     +#include "dm-io.h"
81     +
82 phreak 534 +#define DM_MSG_PREFIX "bbr"
83 dsd 7 +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
84     +
85     +static struct workqueue_struct *dm_bbr_wq = NULL;
86     +static void bbr_remap_handler(void *data);
87     +static kmem_cache_t *bbr_remap_cache;
88     +static kmem_cache_t *bbr_io_cache;
89     +static mempool_t *bbr_io_pool;
90     +
91     +/**
92     + * bbr_binary_tree_destroy
93     + *
94     + * Destroy the binary tree.
95     + **/
96     +static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
97     +{
98     + struct bbr_runtime_remap **link = NULL;
99     + struct bbr_runtime_remap *node = root;
100     +
101     + while (node) {
102     + if (node->left) {
103     + link = &(node->left);
104     + node = node->left;
105     + continue;
106     + }
107     + if (node->right) {
108     + link = &(node->right);
109     + node = node->right;
110     + continue;
111     + }
112     +
113     + kmem_cache_free(bbr_remap_cache, node);
114     + if (node == root) {
115     + /* If root is deleted, we're done. */
116     + break;
117     + }
118     +
119     + /* Back to root. */
120     + node = root;
121     + *link = NULL;
122     + }
123     +}
124     +
125     +static void bbr_free_remap(struct bbr_private *bbr_id)
126     +{
127     + spin_lock_irq(&bbr_id->remap_root_lock);
128     + bbr_binary_tree_destroy(bbr_id->remap_root);
129     + bbr_id->remap_root = NULL;
130     + spin_unlock_irq(&bbr_id->remap_root_lock);
131     +}
132     +
133     +static struct bbr_private *bbr_alloc_private(void)
134     +{
135     + struct bbr_private *bbr_id;
136     +
137     + bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
138     + if (bbr_id) {
139     + memset(bbr_id, 0, sizeof(*bbr_id));
140     + INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
141     + bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
142     + bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;
143     + bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
144     + }
145     +
146     + return bbr_id;
147     +}
148     +
149     +static void bbr_free_private(struct bbr_private *bbr_id)
150     +{
151     + if (bbr_id->bbr_table) {
152     + vfree(bbr_id->bbr_table);
153     + }
154     + bbr_free_remap(bbr_id);
155     + kfree(bbr_id);
156     +}
157     +
158     +static u32 crc_table[256];
159     +static u32 crc_table_built = 0;
160     +
161     +static void build_crc_table(void)
162     +{
163     + u32 i, j, crc;
164     +
165     + for (i = 0; i <= 255; i++) {
166     + crc = i;
167     + for (j = 8; j > 0; j--) {
168     + if (crc & 1)
169     + crc = (crc >> 1) ^ CRC_POLYNOMIAL;
170     + else
171     + crc >>= 1;
172     + }
173     + crc_table[i] = crc;
174     + }
175     + crc_table_built = 1;
176     +}
177     +
178     +static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
179     +{
180     + unsigned char *current_byte;
181     + u32 temp1, temp2, i;
182     +
183     + current_byte = (unsigned char *) buffer;
184     + /* Make sure the crc table is available */
185     + if (!crc_table_built)
186     + build_crc_table();
187     + /* Process each byte in the buffer. */
188     + for (i = 0; i < buffersize; i++) {
189     + temp1 = (crc >> 8) & 0x00FFFFFF;
190     + temp2 = crc_table[(crc ^ (u32) * current_byte) &
191     + (u32) 0xff];
192     + current_byte++;
193     + crc = temp1 ^ temp2;
194     + }
195     + return crc;
196     +}
197     +
198     +/**
199     + * le_bbr_table_sector_to_cpu
200     + *
201     + * Convert bbr meta data from on-disk (LE) format
202     + * to the native cpu endian format.
203     + **/
204     +static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
205     +{
206     + int i;
207     + p->signature = le32_to_cpup(&p->signature);
208     + p->crc = le32_to_cpup(&p->crc);
209     + p->sequence_number = le32_to_cpup(&p->sequence_number);
210     + p->in_use_cnt = le32_to_cpup(&p->in_use_cnt);
211     + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
212     + p->entries[i].bad_sect =
213     + le64_to_cpup(&p->entries[i].bad_sect);
214     + p->entries[i].replacement_sect =
215     + le64_to_cpup(&p->entries[i].replacement_sect);
216     + }
217     +}
218     +
219     +/**
220     + * cpu_bbr_table_sector_to_le
221     + *
222     + * Convert bbr meta data from cpu endian format to on-disk (LE) format
223     + **/
224     +static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
225     + struct bbr_table *le)
226     +{
227     + int i;
228     + le->signature = cpu_to_le32p(&p->signature);
229     + le->crc = cpu_to_le32p(&p->crc);
230     + le->sequence_number = cpu_to_le32p(&p->sequence_number);
231     + le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt);
232     + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
233     + le->entries[i].bad_sect =
234     + cpu_to_le64p(&p->entries[i].bad_sect);
235     + le->entries[i].replacement_sect =
236     + cpu_to_le64p(&p->entries[i].replacement_sect);
237     + }
238     +}
239     +
240     +/**
241     + * validate_bbr_table_sector
242     + *
243     + * Check the specified BBR table sector for a valid signature and CRC. If it's
244     + * valid, endian-convert the table sector.
245     + **/
246     +static int validate_bbr_table_sector(struct bbr_table *p)
247     +{
248     + int rc = 0;
249     + int org_crc, final_crc;
250     +
251     + if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
252     + DMERR("dm-bbr: BBR table signature doesn't match!");
253     + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
254     + le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
255     + rc = -EINVAL;
256     + goto out;
257     + }
258     +
259     + if (!p->crc) {
260     + DMERR("dm-bbr: BBR table sector has no CRC!");
261     + rc = -EINVAL;
262     + goto out;
263     + }
264     +
265     + org_crc = le32_to_cpup(&p->crc);
266     + p->crc = 0;
267     + final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
268     + if (final_crc != org_crc) {
269     + DMERR("dm-bbr: CRC failed!");
270     + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
271     + org_crc, final_crc);
272     + rc = -EINVAL;
273     + goto out;
274     + }
275     +
276     + p->crc = cpu_to_le32p(&org_crc);
277     + le_bbr_table_sector_to_cpu(p);
278     +
279     +out:
280     + return rc;
281     +}
282     +
283     +/**
284     + * bbr_binary_tree_insert
285     + *
286     + * Insert a node into the binary tree.
287     + **/
288     +static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
289     + struct bbr_runtime_remap *newnode)
290     +{
291     + struct bbr_runtime_remap **node = root;
292     + while (node && *node) {
293     + if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
294     + node = &((*node)->right);
295     + } else {
296     + node = &((*node)->left);
297     + }
298     + }
299     +
300     + newnode->left = newnode->right = NULL;
301     + *node = newnode;
302     +}
303     +
304     +/**
305     + * bbr_binary_search
306     + *
307     + * Search for a node that contains bad_sect == lsn.
308     + **/
309     +static struct bbr_runtime_remap *bbr_binary_search(
310     + struct bbr_runtime_remap *root,
311     + u64 lsn)
312     +{
313     + struct bbr_runtime_remap *node = root;
314     + while (node) {
315     + if (node->remap.bad_sect == lsn) {
316     + break;
317     + }
318     + if (lsn > node->remap.bad_sect) {
319     + node = node->right;
320     + } else {
321     + node = node->left;
322     + }
323     + }
324     + return node;
325     +}
326     +
327     +/**
328     + * bbr_insert_remap_entry
329     + *
330     + * Create a new remap entry and add it to the binary tree for this node.
331     + **/
332     +static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
333     + struct bbr_table_entry *new_bbr_entry)
334     +{
335     + struct bbr_runtime_remap *newnode;
336     +
337     + newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
338     + if (!newnode) {
339     + DMERR("dm-bbr: Could not allocate from remap cache!");
340     + return -ENOMEM;
341     + }
342     + newnode->remap.bad_sect = new_bbr_entry->bad_sect;
343     + newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
344     + spin_lock_irq(&bbr_id->remap_root_lock);
345     + bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
346     + spin_unlock_irq(&bbr_id->remap_root_lock);
347     + return 0;
348     +}
349     +
350     +/**
351     + * bbr_table_to_remap_list
352     + *
353     + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
354     + * improve run time performance, the in memory remap list must be sorted by
355     + * the bad sector LBA. This function is called at discovery time to initialize
356     + * the remap list. This function assumes that at least one copy of meta data
357     + * is valid.
358     + **/
359     +static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
360     +{
361     + u32 in_use_blks = 0;
362     + int i, j;
363     + struct bbr_table *p;
364     +
365     + for (i = 0, p = bbr_id->bbr_table;
366     + i < bbr_id->nr_sects_bbr_table;
367     + i++, p++) {
368     + if (!p->in_use_cnt) {
369     + break;
370     + }
371     + in_use_blks += p->in_use_cnt;
372     + for (j = 0; j < p->in_use_cnt; j++) {
373     + bbr_insert_remap_entry(bbr_id, &p->entries[j]);
374     + }
375     + }
376     + if (in_use_blks) {
377     + char b[32];
378     + DMWARN("dm-bbr: There are %u BBR entries for device %s",
379     + in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
380     + }
381     +
382     + return in_use_blks;
383     +}
384     +
385     +/**
386     + * bbr_search_remap_entry
387     + *
388     + * Search remap entry for the specified sector. If found, return a pointer to
389     + * the table entry. Otherwise, return NULL.
390     + **/
391     +static struct bbr_table_entry *bbr_search_remap_entry(
392     + struct bbr_private *bbr_id,
393     + u64 lsn)
394     +{
395     + struct bbr_runtime_remap *p;
396     +
397     + spin_lock_irq(&bbr_id->remap_root_lock);
398     + p = bbr_binary_search(bbr_id->remap_root, lsn);
399     + spin_unlock_irq(&bbr_id->remap_root_lock);
400     + if (p) {
401     + return (&p->remap);
402     + } else {
403     + return NULL;
404     + }
405     +}
406     +
407     +/**
408     + * bbr_remap
409     + *
410     + * If *lsn is in the remap table, return TRUE and modify *lsn,
411     + * else, return FALSE.
412     + **/
413     +static inline int bbr_remap(struct bbr_private *bbr_id,
414     + u64 *lsn)
415     +{
416     + struct bbr_table_entry *e;
417     +
418     + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
419     + e = bbr_search_remap_entry(bbr_id, *lsn);
420     + if (e) {
421     + *lsn = e->replacement_sect;
422     + return 1;
423     + }
424     + }
425     + return 0;
426     +}
427     +
428     +/**
429     + * bbr_remap_probe
430     + *
431     + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
432     + * table return TRUE, Else, return FALSE.
433     + **/
434     +static inline int bbr_remap_probe(struct bbr_private *bbr_id,
435     + u64 lsn, u64 nr_sects)
436     +{
437     + u64 tmp, cnt;
438     +
439     + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
440     + for (cnt = 0, tmp = lsn;
441     + cnt < nr_sects;
442     + cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
443     + if (bbr_remap(bbr_id,&tmp)) {
444     + return 1;
445     + }
446     + }
447     + }
448     + return 0;
449     +}
450     +
451     +/**
452     + * bbr_setup
453     + *
454     + * Read the remap tables from disk and set up the initial remap tree.
455     + **/
456     +static int bbr_setup(struct bbr_private *bbr_id)
457     +{
458     + struct bbr_table *table = bbr_id->bbr_table;
459     + struct io_region job;
460     + unsigned long error;
461     + int i, rc = 0;
462     +
463     + job.bdev = bbr_id->dev->bdev;
464     + job.count = 1;
465     +
466     + /* Read and verify each BBR table sector individually. */
467     + for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
468     + job.sector = bbr_id->lba_table1 + i;
469     + rc = dm_io_sync_vm(1, &job, READ, table, &error);
470     + if (rc && bbr_id->lba_table2) {
471     + job.sector = bbr_id->lba_table2 + i;
472     + rc = dm_io_sync_vm(1, &job, READ, table, &error);
473     + }
474     + if (rc) {
475     + goto out;
476     + }
477     +
478     + rc = validate_bbr_table_sector(table);
479     + if (rc) {
480     + goto out;
481     + }
482     + }
483     + atomic_set(&bbr_id->in_use_replacement_blks,
484     + bbr_table_to_remap_list(bbr_id));
485     +
486     +out:
487     + if (rc) {
488     + DMERR("dm-bbr: error during device setup: %d", rc);
489     + }
490     + return rc;
491     +}
492     +
493     +/**
494     + * bbr_io_remap_error
495     + * @bbr_id: Private data for the BBR node.
496     + * @rw: READ or WRITE.
497     + * @starting_lsn: Starting sector of request to remap.
498     + * @count: Number of sectors in the request.
499     + * @page: Page containing the data for the request.
500     + * @offset: Byte-offset of the data within the page.
501     + *
502     + * For the requested range, try to write each sector individually. For each
503     + * sector that fails, find the next available remap location and write the
504     + * data to that new location. Then update the table and write both copies
505     + * of the table to disk. Finally, update the in-memory mapping and do any
506     + * other necessary bookkeeping.
507     + **/
508     +static int bbr_io_remap_error(struct bbr_private *bbr_id,
509     + int rw,
510     + u64 starting_lsn,
511     + u64 count,
512     + struct page *page,
513     + unsigned int offset)
514     +{
515     + struct bbr_table *bbr_table;
516     + struct io_region job;
517     + struct page_list pl;
518     + unsigned long table_sector_index;
519     + unsigned long table_sector_offset;
520     + unsigned long index;
521     + unsigned long error;
522     + u64 lsn, new_lsn;
523     + char b[32];
524     + int rc;
525     +
526     + job.bdev = bbr_id->dev->bdev;
527     + job.count = 1;
528     + pl.page = page;
529     + pl.next = NULL;
530     +
531     + /* For each sector in the request. */
532     + for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
533     + job.sector = starting_lsn + lsn;
534     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
535     + while (rc) {
536     + /* Find the next available relocation sector. */
537     + new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
538     + if (new_lsn >= bbr_id->nr_replacement_blks) {
539     + /* No more replacement sectors available. */
540     + return -EIO;
541     + }
542     + new_lsn += bbr_id->start_replacement_sect;
543     +
544     + /* Write the data to its new location. */
545     + DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
546     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
547     + starting_lsn + lsn, new_lsn);
548     + job.sector = new_lsn;
549     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
550     + if (rc) {
551     + /* This replacement sector is bad.
552     + * Try the next one.
553     + */
554     + DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.",
555     + format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
556     + atomic_inc(&bbr_id->in_use_replacement_blks);
557     + continue;
558     + }
559     +
560     + /* Add this new entry to the on-disk table. */
561     + table_sector_index = new_lsn -
562     + bbr_id->start_replacement_sect;
563     + table_sector_offset = table_sector_index /
564     + BBR_ENTRIES_PER_SECT;
565     + index = table_sector_index % BBR_ENTRIES_PER_SECT;
566     +
567     + bbr_table = &bbr_id->bbr_table[table_sector_offset];
568     + bbr_table->entries[index].bad_sect = starting_lsn + lsn;
569     + bbr_table->entries[index].replacement_sect = new_lsn;
570     + bbr_table->in_use_cnt++;
571     + bbr_table->sequence_number++;
572     + bbr_table->crc = 0;
573     + bbr_table->crc = calculate_crc(INITIAL_CRC,
574     + bbr_table,
575     + sizeof(struct bbr_table));
576     +
577     + /* Write the table to disk. */
578     + cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
579     + if (bbr_id->lba_table1) {
580     + job.sector = bbr_id->lba_table1 + table_sector_offset;
581     + rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
582     + }
583     + if (bbr_id->lba_table2) {
584     + job.sector = bbr_id->lba_table2 + table_sector_offset;
585     + rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
586     + }
587     + le_bbr_table_sector_to_cpu(bbr_table);
588     +
589     + if (rc) {
590     + /* Error writing one of the tables to disk. */
591     + DMERR("dm-bbr: device %s: error updating BBR tables on disk.",
592     + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
593     + return rc;
594     + }
595     +
596     + /* Insert a new entry in the remapping binary-tree. */
597     + rc = bbr_insert_remap_entry(bbr_id,
598     + &bbr_table->entries[index]);
599     + if (rc) {
600     + DMERR("dm-bbr: device %s: error adding new entry to remap tree.",
601     + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
602     + return rc;
603     + }
604     +
605     + atomic_inc(&bbr_id->in_use_replacement_blks);
606     + }
607     + }
608     +
609     + return 0;
610     +}
611     +
612     +/**
613     + * bbr_io_process_request
614     + *
615     + * For each sector in this request, check if the sector has already
616     + * been remapped. If so, process all previous sectors in the request,
617     + * followed by the remapped sector. Then reset the starting lsn and
618     + * count, and keep going with the rest of the request as if it were
619     + * a whole new request. If any of the sync_io's return an error,
620     + * call the remapper to relocate the bad sector(s).
621     + *
622     + * 2.5 Note: When switching over to bio's for the I/O path, we have made
623     + * the assumption that the I/O request described by the bio is one
624     + * virtually contiguous piece of memory (even though the bio vector
625     + * describes it using a series of physical page addresses).
626     + **/
627     +static int bbr_io_process_request(struct bbr_private *bbr_id,
628     + struct bio *bio)
629     +{
630     + struct io_region job;
631     + u64 starting_lsn = bio->bi_sector;
632     + u64 count, lsn, remapped_lsn;
633     + struct page_list pl;
634     + unsigned int offset;
635     + unsigned long error;
636     + int i, rw = bio_data_dir(bio);
637     + int rc = 0;
638     +
639     + job.bdev = bbr_id->dev->bdev;
640     + pl.next = NULL;
641     +
642     + /* Each bio can contain multiple vectors, each with a different page.
643     + * Treat each vector as a separate request.
644     + */
645     + /* KMC: Is this the right way to walk the bvec list? */
646     + for (i = 0;
647     + i < bio->bi_vcnt;
648     + i++, bio->bi_idx++, starting_lsn += count) {
649     +
650     + /* Bvec info: number of sectors, page,
651     + * and byte-offset within page.
652     + */
653     + count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
654     + pl.page = bio_iovec(bio)->bv_page;
655     + offset = bio_iovec(bio)->bv_offset;
656     +
657     + /* For each sector in this bvec, check if the sector has
658     + * already been remapped. If so, process all previous sectors
659     + * in this request, followed by the remapped sector. Then reset
660     + * the starting lsn and count and keep going with the rest of
661     + * the request as if it were a whole new request.
662     + */
663     + for (lsn = 0; lsn < count; lsn++) {
664     + remapped_lsn = starting_lsn + lsn;
665     + rc = bbr_remap(bbr_id, &remapped_lsn);
666     + if (!rc) {
667     + /* This sector is fine. */
668     + continue;
669     + }
670     +
671     + /* Process all sectors in the request up to this one. */
672     + if (lsn > 0) {
673     + job.sector = starting_lsn;
674     + job.count = lsn;
675     + rc = dm_io_sync(1, &job, rw, &pl,
676     + offset, &error);
677     + if (rc) {
678     + /* If this I/O failed, then one of the
679     + * sectors in this request needs to be
680     + * relocated.
681     + */
682     + rc = bbr_io_remap_error(bbr_id, rw,
683     + starting_lsn,
684     + lsn, pl.page,
685     + offset);
686     + if (rc) {
687     + /* KMC: Return? Or continue to next bvec? */
688     + return rc;
689     + }
690     + }
691     + offset += (lsn << SECTOR_SHIFT);
692     + }
693 phreak 274 +
694 dsd 7 + /* Process the remapped sector. */
695     + job.sector = remapped_lsn;
696     + job.count = 1;
697     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
698     + if (rc) {
699     + /* BUGBUG - Need more processing if this caused
700     + * an error. If this I/O failed, then the
701     + * existing remap is now bad, and we need to
702     + * find a new remap. Can't use
703     + * bbr_io_remap_error(), because the existing
704     + * map entry needs to be changed, not added
705     + * again, and the original table entry also
706     + * needs to be changed.
707     + */
708     + return rc;
709     + }
710     +
711     + starting_lsn += (lsn + 1);
712     + count -= (lsn + 1);
713     + lsn = -1;
714     + offset += SECTOR_SIZE;
715     + }
716     +
717     + /* Check for any remaining sectors after the last split. This
718     + * could potentially be the whole request, but that should be a
719     + * rare case because requests should only be processed by the
720     + * thread if we know an error occurred or they contained one or
721     + * more remapped sectors.
722     + */
723     + if (count) {
724     + job.sector = starting_lsn;
725     + job.count = count;
726     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
727     + if (rc) {
728     + /* If this I/O failed, then one of the sectors
729     + * in this request needs to be relocated.
730     + */
731     + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
732     + count, pl.page, offset);
733     + if (rc) {
734     + /* KMC: Return? Or continue to next bvec? */
735     + return rc;
736     + }
737     + }
738     + }
739     + }
740     +
741     + return 0;
742     +}
743     +
744     +static void bbr_io_process_requests(struct bbr_private *bbr_id,
745     + struct bio *bio)
746     +{
747     + struct bio *next;
748     + int rc;
749     +
750     + while (bio) {
751     + next = bio->bi_next;
752     + bio->bi_next = NULL;
753     +
754     + rc = bbr_io_process_request(bbr_id, bio);
755     +
756     + bio_endio(bio, bio->bi_size, rc);
757     +
758     + bio = next;
759     + }
760     +}
761     +
762     +/**
763     + * bbr_remap_handler
764     + *
765     + * This is the handler for the bbr work-queue.
766     + *
767     + * I/O requests should only be sent to this handler if we know that:
768     + * a) the request contains at least one remapped sector.
769     + * or
770     + * b) the request caused an error on the normal I/O path.
771     + *
772     + * This function uses synchronous I/O, so sending a request to this
773     + * thread that doesn't need special processing will cause severe
774     + * performance degredation.
775     + **/
776     +static void bbr_remap_handler(void *data)
777     +{
778     + struct bbr_private *bbr_id = data;
779     + struct bio *bio;
780     + unsigned long flags;
781     +
782     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
783     + bio = bio_list_get(&bbr_id->remap_ios);
784     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
785     +
786     + bbr_io_process_requests(bbr_id, bio);
787     +}
788     +
789     +/**
790     + * bbr_endio
791     + *
792     + * This is the callback for normal write requests. Check for an error
793     + * during the I/O, and send to the thread for processing if necessary.
794     + **/
795     +static int bbr_endio(struct dm_target *ti, struct bio *bio,
796     + int error, union map_info *map_context)
797     +{
798     + struct bbr_private *bbr_id = ti->private;
799     + struct dm_bio_details *bbr_io = map_context->ptr;
800     +
801     + if (error && bbr_io) {
802     + unsigned long flags;
803     + char b[32];
804     +
805     + dm_bio_restore(bbr_io, bio);
806     + map_context->ptr = NULL;
807     +
808     + DMERR("dm-bbr: device %s: I/O failure on sector %lu. "
809     + "Scheduling for retry.",
810     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
811     + (unsigned long)bio->bi_sector);
812     +
813     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
814     + bio_list_add(&bbr_id->remap_ios, bio);
815     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
816     +
817     + queue_work(dm_bbr_wq, &bbr_id->remap_work);
818     +
819     + error = 1;
820     + }
821     +
822     + if (bbr_io)
823     + mempool_free(bbr_io, bbr_io_pool);
824     +
825     + return error;
826     +}
827     +
828     +/**
829     + * Construct a bbr mapping
830     + **/
831     +static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
832     +{
833     + struct bbr_private *bbr_id;
834     + unsigned long block_size;
835     + char *end;
836     + int rc = -EINVAL;
837     +
838     + if (argc != 8) {
839     + ti->error = "dm-bbr requires exactly 8 arguments: "
840     + "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
841     + goto out1;
842     + }
843     +
844     + bbr_id = bbr_alloc_private();
845     + if (!bbr_id) {
846     + ti->error = "dm-bbr: Error allocating bbr private data.";
847     + goto out1;
848     + }
849     +
850     + bbr_id->offset = simple_strtoull(argv[1], &end, 10);
851     + bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
852     + bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
853     + bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
854     + bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
855     + bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
856     + block_size = simple_strtoul(argv[7], &end, 10);
857     + bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
858     +
859     + bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
860     + if (!bbr_id->bbr_table) {
861     + ti->error = "dm-bbr: Error allocating bbr table.";
862     + goto out2;
863     + }
864     +
865     + if (dm_get_device(ti, argv[0], 0, ti->len,
866     + dm_table_get_mode(ti->table), &bbr_id->dev)) {
867     + ti->error = "dm-bbr: Device lookup failed";
868     + goto out2;
869     + }
870     +
871     + rc = bbr_setup(bbr_id);
872     + if (rc) {
873     + ti->error = "dm-bbr: Device setup failed";
874     + goto out3;
875     + }
876     +
877     + ti->private = bbr_id;
878     + return 0;
879     +
880     +out3:
881     + dm_put_device(ti, bbr_id->dev);
882     +out2:
883     + bbr_free_private(bbr_id);
884     +out1:
885     + return rc;
886     +}
887     +
888     +static void bbr_dtr(struct dm_target *ti)
889     +{
890     + struct bbr_private *bbr_id = ti->private;
891     +
892     + dm_put_device(ti, bbr_id->dev);
893     + bbr_free_private(bbr_id);
894     +}
895     +
896     +static int bbr_map(struct dm_target *ti, struct bio *bio,
897     + union map_info *map_context)
898     +{
899     + struct bbr_private *bbr_id = ti->private;
900     + struct dm_bio_details *bbr_io;
901     + unsigned long flags;
902     + int rc = 1;
903     +
904     + bio->bi_sector += bbr_id->offset;
905     +
906     + if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
907     + !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
908     + /* No existing remaps or this request doesn't
909     + * contain any remapped sectors.
910     + */
911     + bio->bi_bdev = bbr_id->dev->bdev;
912     +
913     + bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
914     + dm_bio_record(bbr_io, bio);
915     + map_context->ptr = bbr_io;
916     + } else {
917     + /* This request has at least one remapped sector.
918     + * Give it to the work-queue for processing.
919     + */
920     + map_context->ptr = NULL;
921     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
922     + bio_list_add(&bbr_id->remap_ios, bio);
923     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
924     +
925     + queue_work(dm_bbr_wq, &bbr_id->remap_work);
926     + rc = 0;
927     + }
928     +
929     + return rc;
930     +}
931     +
932     +static int bbr_status(struct dm_target *ti, status_type_t type,
933     + char *result, unsigned int maxlen)
934     +{
935     + struct bbr_private *bbr_id = ti->private;
936     + char b[BDEVNAME_SIZE];
937     +
938     + switch (type) {
939     + case STATUSTYPE_INFO:
940     + result[0] = '\0';
941     + break;
942     +
943     + case STATUSTYPE_TABLE:
944     + snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
945     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
946     + bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
947     + bbr_id->nr_sects_bbr_table,
948     + bbr_id->start_replacement_sect,
949     + bbr_id->nr_replacement_blks,
950     + bbr_id->blksize_in_sects << SECTOR_SHIFT);
951     + break;
952     + }
953     + return 0;
954     +}
955     +
956     +static struct target_type bbr_target = {
957     + .name = "bbr",
958     + .version= {1, 0, 1},
959     + .module = THIS_MODULE,
960     + .ctr = bbr_ctr,
961     + .dtr = bbr_dtr,
962     + .map = bbr_map,
963     + .end_io = bbr_endio,
964     + .status = bbr_status,
965     +};
966     +
967     +int __init dm_bbr_init(void)
968     +{
969     + int rc;
970     +
971     + rc = dm_register_target(&bbr_target);
972     + if (rc) {
973     + DMERR("dm-bbr: error registering target.");
974     + goto err1;
975     + }
976     +
977     + bbr_remap_cache = kmem_cache_create("bbr-remap",
978     + sizeof(struct bbr_runtime_remap),
979     + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
980     + if (!bbr_remap_cache) {
981     + DMERR("dm-bbr: error creating remap cache.");
982     + rc = ENOMEM;
983     + goto err2;
984     + }
985     +
986     + bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
987     + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
988     + if (!bbr_io_cache) {
989     + DMERR("dm-bbr: error creating io cache.");
990     + rc = ENOMEM;
991     + goto err3;
992     + }
993     +
994     + bbr_io_pool = mempool_create(256, mempool_alloc_slab,
995     + mempool_free_slab, bbr_io_cache);
996     + if (!bbr_io_pool) {
997     + DMERR("dm-bbr: error creating io mempool.");
998     + rc = ENOMEM;
999     + goto err4;
1000     + }
1001     +
1002     + dm_bbr_wq = create_workqueue("dm-bbr");
1003     + if (!dm_bbr_wq) {
1004     + DMERR("dm-bbr: error creating work-queue.");
1005     + rc = ENOMEM;
1006     + goto err5;
1007     + }
1008     +
1009     + rc = dm_io_get(1);
1010     + if (rc) {
1011     + DMERR("dm-bbr: error initializing I/O service.");
1012     + goto err6;
1013     + }
1014     +
1015     + return 0;
1016     +
1017     +err6:
1018     + destroy_workqueue(dm_bbr_wq);
1019     +err5:
1020     + mempool_destroy(bbr_io_pool);
1021     +err4:
1022     + kmem_cache_destroy(bbr_io_cache);
1023     +err3:
1024     + kmem_cache_destroy(bbr_remap_cache);
1025     +err2:
1026     + dm_unregister_target(&bbr_target);
1027     +err1:
1028     + return rc;
1029     +}
1030     +
1031     +void __exit dm_bbr_exit(void)
1032     +{
1033     + dm_io_put(1);
1034     + destroy_workqueue(dm_bbr_wq);
1035     + mempool_destroy(bbr_io_pool);
1036     + kmem_cache_destroy(bbr_io_cache);
1037     + kmem_cache_destroy(bbr_remap_cache);
1038     + dm_unregister_target(&bbr_target);
1039     +}
1040     +
1041     +module_init(dm_bbr_init);
1042     +module_exit(dm_bbr_exit);
1043     +MODULE_LICENSE("GPL");
1044 phreak 665 Index: linux-2.6.19/drivers/md/dm-bbr.h
1045 phreak 274 ===================================================================
1046     --- /dev/null
1047 phreak 665 +++ linux-2.6.19/drivers/md/dm-bbr.h
1048 dsd 7 @@ -0,0 +1,125 @@
1049     +/*
1050     + * (C) Copyright IBM Corp. 2002, 2004
1051     + *
1052     + * This program is free software; you can redistribute it and/or modify
1053     + * it under the terms of the GNU General Public License as published by
1054     + * the Free Software Foundation; either version 2 of the License, or
1055     + * (at your option) any later version.
1056     + *
1057     + * This program is distributed in the hope that it will be useful,
1058     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1059     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
1060     + * the GNU General Public License for more details.
1061     + *
1062     + * You should have received a copy of the GNU General Public License
1063     + * along with this program; if not, write to the Free Software
1064     + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1065     + *
1066     + * linux/drivers/md/dm-bbr.h
1067     + *
1068     + * Bad-block-relocation (BBR) target for device-mapper.
1069     + *
1070     + * The BBR target is designed to remap I/O write failures to another safe
1071     + * location on disk. Note that most disk drives have BBR built into them,
1072     + * this means that our software BBR will be only activated when all hardware
1073     + * BBR replacement sectors have been used.
1074     + */
1075     +
1076     +#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */
1077     +#define BBR_ENTRIES_PER_SECT 31
1078     +#define INITIAL_CRC 0xFFFFFFFF
1079     +#define CRC_POLYNOMIAL 0xEDB88320L
1080     +
1081     +/**
1082     + * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1083     + * Use these in place of %Ld, %Lu, and %Lx.
1084     + **/
1085     +#if BITS_PER_LONG > 32
1086     +#define PFU64 "%lu"
1087     +#else
1088     +#define PFU64 "%Lu"
1089     +#endif
1090     +
1091     +/**
1092     + * struct bbr_table_entry
1093     + * @bad_sect: LBA of bad location.
1094     + * @replacement_sect: LBA of new location.
1095     + *
1096     + * Structure to describe one BBR remap.
1097     + **/
1098     +struct bbr_table_entry {
1099     + u64 bad_sect;
1100     + u64 replacement_sect;
1101     +};
1102     +
1103     +/**
1104     + * struct bbr_table
1105     + * @signature: Signature on each BBR table sector.
1106     + * @crc: CRC for this table sector.
1107     + * @sequence_number: Used to resolve conflicts when primary and secondary
1108     + * tables do not match.
1109     + * @in_use_cnt: Number of in-use table entries.
1110     + * @entries: Actual table of remaps.
1111     + *
1112     + * Structure to describe each sector of the metadata table. Each sector in this
1113     + * table can describe 31 remapped sectors.
1114     + **/
1115     +struct bbr_table {
1116     + u32 signature;
1117     + u32 crc;
1118     + u32 sequence_number;
1119     + u32 in_use_cnt;
1120     + struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT];
1121     +};
1122     +
1123     +/**
1124     + * struct bbr_runtime_remap
1125     + *
1126     + * Node in the binary tree used to keep track of remaps.
1127     + **/
1128     +struct bbr_runtime_remap {
1129     + struct bbr_table_entry remap;
1130     + struct bbr_runtime_remap *left;
1131     + struct bbr_runtime_remap *right;
1132     +};
1133     +
1134     +/**
1135     + * struct bbr_private
1136     + * @dev: Info about underlying device.
1137     + * @bbr_table: Copy of metadata table.
1138     + * @remap_root: Binary tree containing all remaps.
1139     + * @remap_root_lock: Lock for the binary tree.
1140     + * @remap_work: For adding work items to the work-queue.
1141     + * @remap_ios: List of I/Os for the work-queue to handle.
1142     + * @remap_ios_lock: Lock for the remap_ios list.
1143     + * @offset: LBA of data area.
1144     + * @lba_table1: LBA of primary BBR table.
1145     + * @lba_table2: LBA of secondary BBR table.
1146     + * @nr_sects_bbr_table: Size of each BBR table.
1147     + * @nr_replacement_blks: Number of replacement blocks.
1148     + * @start_replacement_sect: LBA of start of replacement blocks.
1149     + * @blksize_in_sects: Size of each block.
1150     + * @in_use_replacement_blks: Current number of remapped blocks.
1151     + *
1152     + * Private data for each BBR target.
1153     + **/
1154     +struct bbr_private {
1155     + struct dm_dev *dev;
1156     + struct bbr_table *bbr_table;
1157     + struct bbr_runtime_remap *remap_root;
1158     + spinlock_t remap_root_lock;
1159     +
1160     + struct work_struct remap_work;
1161     + struct bio_list remap_ios;
1162     + spinlock_t remap_ios_lock;
1163     +
1164     + u64 offset;
1165     + u64 lba_table1;
1166     + u64 lba_table2;
1167     + u64 nr_sects_bbr_table;
1168     + u64 start_replacement_sect;
1169     + u64 nr_replacement_blks;
1170     + u32 blksize_in_sects;
1171     + atomic_t in_use_replacement_blks;
1172     +};
1173     +

  ViewVC Help
Powered by ViewVC 1.1.20