/[linux-patches]/genpatches-2.6/trunk/2.6.18-pre/4105_dm-bbr.patch
Gentoo

Contents of /genpatches-2.6/trunk/2.6.18-pre/4105_dm-bbr.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 376 - (hide annotations) (download)
Wed Apr 12 12:20:21 2006 UTC (12 years, 5 months ago) by phreak
Original Path: genpatches-2.6/trunk/2.6.17-pre/4105_dm-bbr.patch
File size: 32133 byte(s)
Fixing file ranges in 4105_dm-bbr.patch, 4110_promise-pdc2037x.patch, 4205_vesafb-tng-1.0-rc1-r3.patch, 4300_squashfs-3.0.patch, 4400_speakup-20060103.patch and 4405_alpha-sysctl-uac.patch
1 phreak 366 Index: linux-git/drivers/md/Kconfig
2     ===================================================================
3     --- linux-git.orig/drivers/md/Kconfig
4     +++ linux-git/drivers/md/Kconfig
5 phreak 376 @@ -263,5 +263,16 @@ config DM_MULTIPATH_EMC
6 phreak 366 ---help---
7     Multipath support for EMC CX/AX series hardware.
8    
9     +config BLK_DEV_DM_BBR
10     + tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
11     + depends on BLK_DEV_DM && EXPERIMENTAL
12     + ---help---
13     + Support for devices with software-based bad-block-relocation.
14     +
15     + To compile this as a module, choose M here: the module will be
16     + called dm-bbr.
17     +
18     + If unsure, say N.
19     +
20     endmenu
21    
22     Index: linux-git/drivers/md/Makefile
23     ===================================================================
24     --- linux-git.orig/drivers/md/Makefile
25     +++ linux-git/drivers/md/Makefile
26     @@ -37,6 +37,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc
27     obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
28     obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
29     obj-$(CONFIG_DM_ZERO) += dm-zero.o
30     +obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o
31    
32     quiet_cmd_unroll = UNROLL $@
33     cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
34 phreak 274 Index: linux-git/drivers/md/dm-bbr.c
35     ===================================================================
36     --- /dev/null
37     +++ linux-git/drivers/md/dm-bbr.c
38 dsd 7 @@ -0,0 +1,1003 @@
39     +/*
40     + * (C) Copyright IBM Corp. 2002, 2004
41     + *
42     + * This program is free software; you can redistribute it and/or modify
43     + * it under the terms of the GNU General Public License as published by
44     + * the Free Software Foundation; either version 2 of the License, or
45     + * (at your option) any later version.
46     + *
47     + * This program is distributed in the hope that it will be useful,
48     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
49     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
50     + * the GNU General Public License for more details.
51     + *
52     + * You should have received a copy of the GNU General Public License
53     + * along with this program; if not, write to the Free Software
54     + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
55     + *
56     + * linux/drivers/md/dm-bbr.c
57     + *
58     + * Bad-block-relocation (BBR) target for device-mapper.
59     + *
60     + * The BBR target is designed to remap I/O write failures to another safe
61     + * location on disk. Note that most disk drives have BBR built into them,
62     + * this means that our software BBR will be only activated when all hardware
63     + * BBR replacement sectors have been used.
64     + */
65     +
66     +#include <linux/module.h>
67     +#include <linux/init.h>
68     +#include <linux/bio.h>
69     +#include <linux/spinlock.h>
70     +#include <linux/slab.h>
71     +#include <linux/mempool.h>
72     +#include <linux/workqueue.h>
73     +#include <linux/vmalloc.h>
74     +
75     +#include "dm.h"
76     +#include "dm-bio-list.h"
77     +#include "dm-bio-record.h"
78     +#include "dm-bbr.h"
79     +#include "dm-io.h"
80     +
81     +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
82     +
83     +static struct workqueue_struct *dm_bbr_wq = NULL;
84     +static void bbr_remap_handler(void *data);
85     +static kmem_cache_t *bbr_remap_cache;
86     +static kmem_cache_t *bbr_io_cache;
87     +static mempool_t *bbr_io_pool;
88     +
89     +/**
90     + * bbr_binary_tree_destroy
91     + *
92     + * Destroy the binary tree.
93     + **/
94     +static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
95     +{
96     + struct bbr_runtime_remap **link = NULL;
97     + struct bbr_runtime_remap *node = root;
98     +
99     + while (node) {
100     + if (node->left) {
101     + link = &(node->left);
102     + node = node->left;
103     + continue;
104     + }
105     + if (node->right) {
106     + link = &(node->right);
107     + node = node->right;
108     + continue;
109     + }
110     +
111     + kmem_cache_free(bbr_remap_cache, node);
112     + if (node == root) {
113     + /* If root is deleted, we're done. */
114     + break;
115     + }
116     +
117     + /* Back to root. */
118     + node = root;
119     + *link = NULL;
120     + }
121     +}
122     +
123     +static void bbr_free_remap(struct bbr_private *bbr_id)
124     +{
125     + spin_lock_irq(&bbr_id->remap_root_lock);
126     + bbr_binary_tree_destroy(bbr_id->remap_root);
127     + bbr_id->remap_root = NULL;
128     + spin_unlock_irq(&bbr_id->remap_root_lock);
129     +}
130     +
131     +static struct bbr_private *bbr_alloc_private(void)
132     +{
133     + struct bbr_private *bbr_id;
134     +
135     + bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
136     + if (bbr_id) {
137     + memset(bbr_id, 0, sizeof(*bbr_id));
138     + INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
139     + bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
140     + bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;
141     + bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
142     + }
143     +
144     + return bbr_id;
145     +}
146     +
147     +static void bbr_free_private(struct bbr_private *bbr_id)
148     +{
149     + if (bbr_id->bbr_table) {
150     + vfree(bbr_id->bbr_table);
151     + }
152     + bbr_free_remap(bbr_id);
153     + kfree(bbr_id);
154     +}
155     +
156     +static u32 crc_table[256];
157     +static u32 crc_table_built = 0;
158     +
159     +static void build_crc_table(void)
160     +{
161     + u32 i, j, crc;
162     +
163     + for (i = 0; i <= 255; i++) {
164     + crc = i;
165     + for (j = 8; j > 0; j--) {
166     + if (crc & 1)
167     + crc = (crc >> 1) ^ CRC_POLYNOMIAL;
168     + else
169     + crc >>= 1;
170     + }
171     + crc_table[i] = crc;
172     + }
173     + crc_table_built = 1;
174     +}
175     +
176     +static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
177     +{
178     + unsigned char *current_byte;
179     + u32 temp1, temp2, i;
180     +
181     + current_byte = (unsigned char *) buffer;
182     + /* Make sure the crc table is available */
183     + if (!crc_table_built)
184     + build_crc_table();
185     + /* Process each byte in the buffer. */
186     + for (i = 0; i < buffersize; i++) {
187     + temp1 = (crc >> 8) & 0x00FFFFFF;
188     + temp2 = crc_table[(crc ^ (u32) * current_byte) &
189     + (u32) 0xff];
190     + current_byte++;
191     + crc = temp1 ^ temp2;
192     + }
193     + return crc;
194     +}
195     +
196     +/**
197     + * le_bbr_table_sector_to_cpu
198     + *
199     + * Convert bbr meta data from on-disk (LE) format
200     + * to the native cpu endian format.
201     + **/
202     +static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
203     +{
204     + int i;
205     + p->signature = le32_to_cpup(&p->signature);
206     + p->crc = le32_to_cpup(&p->crc);
207     + p->sequence_number = le32_to_cpup(&p->sequence_number);
208     + p->in_use_cnt = le32_to_cpup(&p->in_use_cnt);
209     + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
210     + p->entries[i].bad_sect =
211     + le64_to_cpup(&p->entries[i].bad_sect);
212     + p->entries[i].replacement_sect =
213     + le64_to_cpup(&p->entries[i].replacement_sect);
214     + }
215     +}
216     +
217     +/**
218     + * cpu_bbr_table_sector_to_le
219     + *
220     + * Convert bbr meta data from cpu endian format to on-disk (LE) format
221     + **/
222     +static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
223     + struct bbr_table *le)
224     +{
225     + int i;
226     + le->signature = cpu_to_le32p(&p->signature);
227     + le->crc = cpu_to_le32p(&p->crc);
228     + le->sequence_number = cpu_to_le32p(&p->sequence_number);
229     + le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt);
230     + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
231     + le->entries[i].bad_sect =
232     + cpu_to_le64p(&p->entries[i].bad_sect);
233     + le->entries[i].replacement_sect =
234     + cpu_to_le64p(&p->entries[i].replacement_sect);
235     + }
236     +}
237     +
238     +/**
239     + * validate_bbr_table_sector
240     + *
241     + * Check the specified BBR table sector for a valid signature and CRC. If it's
242     + * valid, endian-convert the table sector.
243     + **/
244     +static int validate_bbr_table_sector(struct bbr_table *p)
245     +{
246     + int rc = 0;
247     + int org_crc, final_crc;
248     +
249     + if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
250     + DMERR("dm-bbr: BBR table signature doesn't match!");
251     + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
252     + le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
253     + rc = -EINVAL;
254     + goto out;
255     + }
256     +
257     + if (!p->crc) {
258     + DMERR("dm-bbr: BBR table sector has no CRC!");
259     + rc = -EINVAL;
260     + goto out;
261     + }
262     +
263     + org_crc = le32_to_cpup(&p->crc);
264     + p->crc = 0;
265     + final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
266     + if (final_crc != org_crc) {
267     + DMERR("dm-bbr: CRC failed!");
268     + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
269     + org_crc, final_crc);
270     + rc = -EINVAL;
271     + goto out;
272     + }
273     +
274     + p->crc = cpu_to_le32p(&org_crc);
275     + le_bbr_table_sector_to_cpu(p);
276     +
277     +out:
278     + return rc;
279     +}
280     +
281     +/**
282     + * bbr_binary_tree_insert
283     + *
284     + * Insert a node into the binary tree.
285     + **/
286     +static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
287     + struct bbr_runtime_remap *newnode)
288     +{
289     + struct bbr_runtime_remap **node = root;
290     + while (node && *node) {
291     + if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
292     + node = &((*node)->right);
293     + } else {
294     + node = &((*node)->left);
295     + }
296     + }
297     +
298     + newnode->left = newnode->right = NULL;
299     + *node = newnode;
300     +}
301     +
302     +/**
303     + * bbr_binary_search
304     + *
305     + * Search for a node that contains bad_sect == lsn.
306     + **/
307     +static struct bbr_runtime_remap *bbr_binary_search(
308     + struct bbr_runtime_remap *root,
309     + u64 lsn)
310     +{
311     + struct bbr_runtime_remap *node = root;
312     + while (node) {
313     + if (node->remap.bad_sect == lsn) {
314     + break;
315     + }
316     + if (lsn > node->remap.bad_sect) {
317     + node = node->right;
318     + } else {
319     + node = node->left;
320     + }
321     + }
322     + return node;
323     +}
324     +
325     +/**
326     + * bbr_insert_remap_entry
327     + *
328     + * Create a new remap entry and add it to the binary tree for this node.
329     + **/
330     +static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
331     + struct bbr_table_entry *new_bbr_entry)
332     +{
333     + struct bbr_runtime_remap *newnode;
334     +
335     + newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
336     + if (!newnode) {
337     + DMERR("dm-bbr: Could not allocate from remap cache!");
338     + return -ENOMEM;
339     + }
340     + newnode->remap.bad_sect = new_bbr_entry->bad_sect;
341     + newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
342     + spin_lock_irq(&bbr_id->remap_root_lock);
343     + bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
344     + spin_unlock_irq(&bbr_id->remap_root_lock);
345     + return 0;
346     +}
347     +
348     +/**
349     + * bbr_table_to_remap_list
350     + *
351     + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
352     + * improve run time performance, the in memory remap list must be sorted by
353     + * the bad sector LBA. This function is called at discovery time to initialize
354     + * the remap list. This function assumes that at least one copy of meta data
355     + * is valid.
356     + **/
357     +static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
358     +{
359     + u32 in_use_blks = 0;
360     + int i, j;
361     + struct bbr_table *p;
362     +
363     + for (i = 0, p = bbr_id->bbr_table;
364     + i < bbr_id->nr_sects_bbr_table;
365     + i++, p++) {
366     + if (!p->in_use_cnt) {
367     + break;
368     + }
369     + in_use_blks += p->in_use_cnt;
370     + for (j = 0; j < p->in_use_cnt; j++) {
371     + bbr_insert_remap_entry(bbr_id, &p->entries[j]);
372     + }
373     + }
374     + if (in_use_blks) {
375     + char b[32];
376     + DMWARN("dm-bbr: There are %u BBR entries for device %s",
377     + in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
378     + }
379     +
380     + return in_use_blks;
381     +}
382     +
383     +/**
384     + * bbr_search_remap_entry
385     + *
386     + * Search remap entry for the specified sector. If found, return a pointer to
387     + * the table entry. Otherwise, return NULL.
388     + **/
389     +static struct bbr_table_entry *bbr_search_remap_entry(
390     + struct bbr_private *bbr_id,
391     + u64 lsn)
392     +{
393     + struct bbr_runtime_remap *p;
394     +
395     + spin_lock_irq(&bbr_id->remap_root_lock);
396     + p = bbr_binary_search(bbr_id->remap_root, lsn);
397     + spin_unlock_irq(&bbr_id->remap_root_lock);
398     + if (p) {
399     + return (&p->remap);
400     + } else {
401     + return NULL;
402     + }
403     +}
404     +
405     +/**
406     + * bbr_remap
407     + *
408     + * If *lsn is in the remap table, return TRUE and modify *lsn,
409     + * else, return FALSE.
410     + **/
411     +static inline int bbr_remap(struct bbr_private *bbr_id,
412     + u64 *lsn)
413     +{
414     + struct bbr_table_entry *e;
415     +
416     + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
417     + e = bbr_search_remap_entry(bbr_id, *lsn);
418     + if (e) {
419     + *lsn = e->replacement_sect;
420     + return 1;
421     + }
422     + }
423     + return 0;
424     +}
425     +
426     +/**
427     + * bbr_remap_probe
428     + *
429     + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
430     + * table return TRUE, Else, return FALSE.
431     + **/
432     +static inline int bbr_remap_probe(struct bbr_private *bbr_id,
433     + u64 lsn, u64 nr_sects)
434     +{
435     + u64 tmp, cnt;
436     +
437     + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
438     + for (cnt = 0, tmp = lsn;
439     + cnt < nr_sects;
440     + cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
441     + if (bbr_remap(bbr_id,&tmp)) {
442     + return 1;
443     + }
444     + }
445     + }
446     + return 0;
447     +}
448     +
449     +/**
450     + * bbr_setup
451     + *
452     + * Read the remap tables from disk and set up the initial remap tree.
453     + **/
454     +static int bbr_setup(struct bbr_private *bbr_id)
455     +{
456     + struct bbr_table *table = bbr_id->bbr_table;
457     + struct io_region job;
458     + unsigned long error;
459     + int i, rc = 0;
460     +
461     + job.bdev = bbr_id->dev->bdev;
462     + job.count = 1;
463     +
464     + /* Read and verify each BBR table sector individually. */
465     + for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
466     + job.sector = bbr_id->lba_table1 + i;
467     + rc = dm_io_sync_vm(1, &job, READ, table, &error);
468     + if (rc && bbr_id->lba_table2) {
469     + job.sector = bbr_id->lba_table2 + i;
470     + rc = dm_io_sync_vm(1, &job, READ, table, &error);
471     + }
472     + if (rc) {
473     + goto out;
474     + }
475     +
476     + rc = validate_bbr_table_sector(table);
477     + if (rc) {
478     + goto out;
479     + }
480     + }
481     + atomic_set(&bbr_id->in_use_replacement_blks,
482     + bbr_table_to_remap_list(bbr_id));
483     +
484     +out:
485     + if (rc) {
486     + DMERR("dm-bbr: error during device setup: %d", rc);
487     + }
488     + return rc;
489     +}
490     +
491     +/**
492     + * bbr_io_remap_error
493     + * @bbr_id: Private data for the BBR node.
494     + * @rw: READ or WRITE.
495     + * @starting_lsn: Starting sector of request to remap.
496     + * @count: Number of sectors in the request.
497     + * @page: Page containing the data for the request.
498     + * @offset: Byte-offset of the data within the page.
499     + *
500     + * For the requested range, try to write each sector individually. For each
501     + * sector that fails, find the next available remap location and write the
502     + * data to that new location. Then update the table and write both copies
503     + * of the table to disk. Finally, update the in-memory mapping and do any
504     + * other necessary bookkeeping.
505     + **/
506     +static int bbr_io_remap_error(struct bbr_private *bbr_id,
507     + int rw,
508     + u64 starting_lsn,
509     + u64 count,
510     + struct page *page,
511     + unsigned int offset)
512     +{
513     + struct bbr_table *bbr_table;
514     + struct io_region job;
515     + struct page_list pl;
516     + unsigned long table_sector_index;
517     + unsigned long table_sector_offset;
518     + unsigned long index;
519     + unsigned long error;
520     + u64 lsn, new_lsn;
521     + char b[32];
522     + int rc;
523     +
524     + job.bdev = bbr_id->dev->bdev;
525     + job.count = 1;
526     + pl.page = page;
527     + pl.next = NULL;
528     +
529     + /* For each sector in the request. */
530     + for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
531     + job.sector = starting_lsn + lsn;
532     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
533     + while (rc) {
534     + /* Find the next available relocation sector. */
535     + new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
536     + if (new_lsn >= bbr_id->nr_replacement_blks) {
537     + /* No more replacement sectors available. */
538     + return -EIO;
539     + }
540     + new_lsn += bbr_id->start_replacement_sect;
541     +
542     + /* Write the data to its new location. */
543     + DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
544     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
545     + starting_lsn + lsn, new_lsn);
546     + job.sector = new_lsn;
547     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
548     + if (rc) {
549     + /* This replacement sector is bad.
550     + * Try the next one.
551     + */
552     + DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.",
553     + format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
554     + atomic_inc(&bbr_id->in_use_replacement_blks);
555     + continue;
556     + }
557     +
558     + /* Add this new entry to the on-disk table. */
559     + table_sector_index = new_lsn -
560     + bbr_id->start_replacement_sect;
561     + table_sector_offset = table_sector_index /
562     + BBR_ENTRIES_PER_SECT;
563     + index = table_sector_index % BBR_ENTRIES_PER_SECT;
564     +
565     + bbr_table = &bbr_id->bbr_table[table_sector_offset];
566     + bbr_table->entries[index].bad_sect = starting_lsn + lsn;
567     + bbr_table->entries[index].replacement_sect = new_lsn;
568     + bbr_table->in_use_cnt++;
569     + bbr_table->sequence_number++;
570     + bbr_table->crc = 0;
571     + bbr_table->crc = calculate_crc(INITIAL_CRC,
572     + bbr_table,
573     + sizeof(struct bbr_table));
574     +
575     + /* Write the table to disk. */
576     + cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
577     + if (bbr_id->lba_table1) {
578     + job.sector = bbr_id->lba_table1 + table_sector_offset;
579     + rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
580     + }
581     + if (bbr_id->lba_table2) {
582     + job.sector = bbr_id->lba_table2 + table_sector_offset;
583     + rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
584     + }
585     + le_bbr_table_sector_to_cpu(bbr_table);
586     +
587     + if (rc) {
588     + /* Error writing one of the tables to disk. */
589     + DMERR("dm-bbr: device %s: error updating BBR tables on disk.",
590     + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
591     + return rc;
592     + }
593     +
594     + /* Insert a new entry in the remapping binary-tree. */
595     + rc = bbr_insert_remap_entry(bbr_id,
596     + &bbr_table->entries[index]);
597     + if (rc) {
598     + DMERR("dm-bbr: device %s: error adding new entry to remap tree.",
599     + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
600     + return rc;
601     + }
602     +
603     + atomic_inc(&bbr_id->in_use_replacement_blks);
604     + }
605     + }
606     +
607     + return 0;
608     +}
609     +
610     +/**
611     + * bbr_io_process_request
612     + *
613     + * For each sector in this request, check if the sector has already
614     + * been remapped. If so, process all previous sectors in the request,
615     + * followed by the remapped sector. Then reset the starting lsn and
616     + * count, and keep going with the rest of the request as if it were
617     + * a whole new request. If any of the sync_io's return an error,
618     + * call the remapper to relocate the bad sector(s).
619     + *
620     + * 2.5 Note: When switching over to bio's for the I/O path, we have made
621     + * the assumption that the I/O request described by the bio is one
622     + * virtually contiguous piece of memory (even though the bio vector
623     + * describes it using a series of physical page addresses).
624     + **/
625     +static int bbr_io_process_request(struct bbr_private *bbr_id,
626     + struct bio *bio)
627     +{
628     + struct io_region job;
629     + u64 starting_lsn = bio->bi_sector;
630     + u64 count, lsn, remapped_lsn;
631     + struct page_list pl;
632     + unsigned int offset;
633     + unsigned long error;
634     + int i, rw = bio_data_dir(bio);
635     + int rc = 0;
636     +
637     + job.bdev = bbr_id->dev->bdev;
638     + pl.next = NULL;
639     +
640     + /* Each bio can contain multiple vectors, each with a different page.
641     + * Treat each vector as a separate request.
642     + */
643     + /* KMC: Is this the right way to walk the bvec list? */
644     + for (i = 0;
645     + i < bio->bi_vcnt;
646     + i++, bio->bi_idx++, starting_lsn += count) {
647     +
648     + /* Bvec info: number of sectors, page,
649     + * and byte-offset within page.
650     + */
651     + count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
652     + pl.page = bio_iovec(bio)->bv_page;
653     + offset = bio_iovec(bio)->bv_offset;
654     +
655     + /* For each sector in this bvec, check if the sector has
656     + * already been remapped. If so, process all previous sectors
657     + * in this request, followed by the remapped sector. Then reset
658     + * the starting lsn and count and keep going with the rest of
659     + * the request as if it were a whole new request.
660     + */
661     + for (lsn = 0; lsn < count; lsn++) {
662     + remapped_lsn = starting_lsn + lsn;
663     + rc = bbr_remap(bbr_id, &remapped_lsn);
664     + if (!rc) {
665     + /* This sector is fine. */
666     + continue;
667     + }
668     +
669     + /* Process all sectors in the request up to this one. */
670     + if (lsn > 0) {
671     + job.sector = starting_lsn;
672     + job.count = lsn;
673     + rc = dm_io_sync(1, &job, rw, &pl,
674     + offset, &error);
675     + if (rc) {
676     + /* If this I/O failed, then one of the
677     + * sectors in this request needs to be
678     + * relocated.
679     + */
680     + rc = bbr_io_remap_error(bbr_id, rw,
681     + starting_lsn,
682     + lsn, pl.page,
683     + offset);
684     + if (rc) {
685     + /* KMC: Return? Or continue to next bvec? */
686     + return rc;
687     + }
688     + }
689     + offset += (lsn << SECTOR_SHIFT);
690     + }
691 phreak 274 +
692 dsd 7 + /* Process the remapped sector. */
693     + job.sector = remapped_lsn;
694     + job.count = 1;
695     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
696     + if (rc) {
697     + /* BUGBUG - Need more processing if this caused
698     + * an error. If this I/O failed, then the
699     + * existing remap is now bad, and we need to
700     + * find a new remap. Can't use
701     + * bbr_io_remap_error(), because the existing
702     + * map entry needs to be changed, not added
703     + * again, and the original table entry also
704     + * needs to be changed.
705     + */
706     + return rc;
707     + }
708     +
709     + starting_lsn += (lsn + 1);
710     + count -= (lsn + 1);
711     + lsn = -1;
712     + offset += SECTOR_SIZE;
713     + }
714     +
715     + /* Check for any remaining sectors after the last split. This
716     + * could potentially be the whole request, but that should be a
717     + * rare case because requests should only be processed by the
718     + * thread if we know an error occurred or they contained one or
719     + * more remapped sectors.
720     + */
721     + if (count) {
722     + job.sector = starting_lsn;
723     + job.count = count;
724     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
725     + if (rc) {
726     + /* If this I/O failed, then one of the sectors
727     + * in this request needs to be relocated.
728     + */
729     + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
730     + count, pl.page, offset);
731     + if (rc) {
732     + /* KMC: Return? Or continue to next bvec? */
733     + return rc;
734     + }
735     + }
736     + }
737     + }
738     +
739     + return 0;
740     +}
741     +
742     +static void bbr_io_process_requests(struct bbr_private *bbr_id,
743     + struct bio *bio)
744     +{
745     + struct bio *next;
746     + int rc;
747     +
748     + while (bio) {
749     + next = bio->bi_next;
750     + bio->bi_next = NULL;
751     +
752     + rc = bbr_io_process_request(bbr_id, bio);
753     +
754     + bio_endio(bio, bio->bi_size, rc);
755     +
756     + bio = next;
757     + }
758     +}
759     +
760     +/**
761     + * bbr_remap_handler
762     + *
763     + * This is the handler for the bbr work-queue.
764     + *
765     + * I/O requests should only be sent to this handler if we know that:
766     + * a) the request contains at least one remapped sector.
767     + * or
768     + * b) the request caused an error on the normal I/O path.
769     + *
770     + * This function uses synchronous I/O, so sending a request to this
771     + * thread that doesn't need special processing will cause severe
772     + * performance degredation.
773     + **/
774     +static void bbr_remap_handler(void *data)
775     +{
776     + struct bbr_private *bbr_id = data;
777     + struct bio *bio;
778     + unsigned long flags;
779     +
780     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
781     + bio = bio_list_get(&bbr_id->remap_ios);
782     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
783     +
784     + bbr_io_process_requests(bbr_id, bio);
785     +}
786     +
787     +/**
788     + * bbr_endio
789     + *
790     + * This is the callback for normal write requests. Check for an error
791     + * during the I/O, and send to the thread for processing if necessary.
792     + **/
793     +static int bbr_endio(struct dm_target *ti, struct bio *bio,
794     + int error, union map_info *map_context)
795     +{
796     + struct bbr_private *bbr_id = ti->private;
797     + struct dm_bio_details *bbr_io = map_context->ptr;
798     +
799     + if (error && bbr_io) {
800     + unsigned long flags;
801     + char b[32];
802     +
803     + dm_bio_restore(bbr_io, bio);
804     + map_context->ptr = NULL;
805     +
806     + DMERR("dm-bbr: device %s: I/O failure on sector %lu. "
807     + "Scheduling for retry.",
808     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
809     + (unsigned long)bio->bi_sector);
810     +
811     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
812     + bio_list_add(&bbr_id->remap_ios, bio);
813     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
814     +
815     + queue_work(dm_bbr_wq, &bbr_id->remap_work);
816     +
817     + error = 1;
818     + }
819     +
820     + if (bbr_io)
821     + mempool_free(bbr_io, bbr_io_pool);
822     +
823     + return error;
824     +}
825     +
826     +/**
827     + * Construct a bbr mapping
828     + **/
829     +static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
830     +{
831     + struct bbr_private *bbr_id;
832     + unsigned long block_size;
833     + char *end;
834     + int rc = -EINVAL;
835     +
836     + if (argc != 8) {
837     + ti->error = "dm-bbr requires exactly 8 arguments: "
838     + "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
839     + goto out1;
840     + }
841     +
842     + bbr_id = bbr_alloc_private();
843     + if (!bbr_id) {
844     + ti->error = "dm-bbr: Error allocating bbr private data.";
845     + goto out1;
846     + }
847     +
848     + bbr_id->offset = simple_strtoull(argv[1], &end, 10);
849     + bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
850     + bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
851     + bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
852     + bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
853     + bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
854     + block_size = simple_strtoul(argv[7], &end, 10);
855     + bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
856     +
857     + bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
858     + if (!bbr_id->bbr_table) {
859     + ti->error = "dm-bbr: Error allocating bbr table.";
860     + goto out2;
861     + }
862     +
863     + if (dm_get_device(ti, argv[0], 0, ti->len,
864     + dm_table_get_mode(ti->table), &bbr_id->dev)) {
865     + ti->error = "dm-bbr: Device lookup failed";
866     + goto out2;
867     + }
868     +
869     + rc = bbr_setup(bbr_id);
870     + if (rc) {
871     + ti->error = "dm-bbr: Device setup failed";
872     + goto out3;
873     + }
874     +
875     + ti->private = bbr_id;
876     + return 0;
877     +
878     +out3:
879     + dm_put_device(ti, bbr_id->dev);
880     +out2:
881     + bbr_free_private(bbr_id);
882     +out1:
883     + return rc;
884     +}
885     +
886     +static void bbr_dtr(struct dm_target *ti)
887     +{
888     + struct bbr_private *bbr_id = ti->private;
889     +
890     + dm_put_device(ti, bbr_id->dev);
891     + bbr_free_private(bbr_id);
892     +}
893     +
894     +static int bbr_map(struct dm_target *ti, struct bio *bio,
895     + union map_info *map_context)
896     +{
897     + struct bbr_private *bbr_id = ti->private;
898     + struct dm_bio_details *bbr_io;
899     + unsigned long flags;
900     + int rc = 1;
901     +
902     + bio->bi_sector += bbr_id->offset;
903     +
904     + if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
905     + !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
906     + /* No existing remaps or this request doesn't
907     + * contain any remapped sectors.
908     + */
909     + bio->bi_bdev = bbr_id->dev->bdev;
910     +
911     + bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
912     + dm_bio_record(bbr_io, bio);
913     + map_context->ptr = bbr_io;
914     + } else {
915     + /* This request has at least one remapped sector.
916     + * Give it to the work-queue for processing.
917     + */
918     + map_context->ptr = NULL;
919     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
920     + bio_list_add(&bbr_id->remap_ios, bio);
921     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
922     +
923     + queue_work(dm_bbr_wq, &bbr_id->remap_work);
924     + rc = 0;
925     + }
926     +
927     + return rc;
928     +}
929     +
930     +static int bbr_status(struct dm_target *ti, status_type_t type,
931     + char *result, unsigned int maxlen)
932     +{
933     + struct bbr_private *bbr_id = ti->private;
934     + char b[BDEVNAME_SIZE];
935     +
936     + switch (type) {
937     + case STATUSTYPE_INFO:
938     + result[0] = '\0';
939     + break;
940     +
941     + case STATUSTYPE_TABLE:
942     + snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
943     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
944     + bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
945     + bbr_id->nr_sects_bbr_table,
946     + bbr_id->start_replacement_sect,
947     + bbr_id->nr_replacement_blks,
948     + bbr_id->blksize_in_sects << SECTOR_SHIFT);
949     + break;
950     + }
951     + return 0;
952     +}
953     +
954     +static struct target_type bbr_target = {
955     + .name = "bbr",
956     + .version= {1, 0, 1},
957     + .module = THIS_MODULE,
958     + .ctr = bbr_ctr,
959     + .dtr = bbr_dtr,
960     + .map = bbr_map,
961     + .end_io = bbr_endio,
962     + .status = bbr_status,
963     +};
964     +
965     +int __init dm_bbr_init(void)
966     +{
967     + int rc;
968     +
969     + rc = dm_register_target(&bbr_target);
970     + if (rc) {
971     + DMERR("dm-bbr: error registering target.");
972     + goto err1;
973     + }
974     +
975     + bbr_remap_cache = kmem_cache_create("bbr-remap",
976     + sizeof(struct bbr_runtime_remap),
977     + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
978     + if (!bbr_remap_cache) {
979     + DMERR("dm-bbr: error creating remap cache.");
980     + rc = ENOMEM;
981     + goto err2;
982     + }
983     +
984     + bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
985     + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
986     + if (!bbr_io_cache) {
987     + DMERR("dm-bbr: error creating io cache.");
988     + rc = ENOMEM;
989     + goto err3;
990     + }
991     +
992     + bbr_io_pool = mempool_create(256, mempool_alloc_slab,
993     + mempool_free_slab, bbr_io_cache);
994     + if (!bbr_io_pool) {
995     + DMERR("dm-bbr: error creating io mempool.");
996     + rc = ENOMEM;
997     + goto err4;
998     + }
999     +
1000     + dm_bbr_wq = create_workqueue("dm-bbr");
1001     + if (!dm_bbr_wq) {
1002     + DMERR("dm-bbr: error creating work-queue.");
1003     + rc = ENOMEM;
1004     + goto err5;
1005     + }
1006     +
1007     + rc = dm_io_get(1);
1008     + if (rc) {
1009     + DMERR("dm-bbr: error initializing I/O service.");
1010     + goto err6;
1011     + }
1012     +
1013     + return 0;
1014     +
1015     +err6:
1016     + destroy_workqueue(dm_bbr_wq);
1017     +err5:
1018     + mempool_destroy(bbr_io_pool);
1019     +err4:
1020     + kmem_cache_destroy(bbr_io_cache);
1021     +err3:
1022     + kmem_cache_destroy(bbr_remap_cache);
1023     +err2:
1024     + dm_unregister_target(&bbr_target);
1025     +err1:
1026     + return rc;
1027     +}
1028     +
1029     +void __exit dm_bbr_exit(void)
1030     +{
1031     + dm_io_put(1);
1032     + destroy_workqueue(dm_bbr_wq);
1033     + mempool_destroy(bbr_io_pool);
1034     + kmem_cache_destroy(bbr_io_cache);
1035     + kmem_cache_destroy(bbr_remap_cache);
1036     + dm_unregister_target(&bbr_target);
1037     +}
1038     +
1039     +module_init(dm_bbr_init);
1040     +module_exit(dm_bbr_exit);
1041     +MODULE_LICENSE("GPL");
1042 phreak 274 Index: linux-git/drivers/md/dm-bbr.h
1043     ===================================================================
1044     --- /dev/null
1045     +++ linux-git/drivers/md/dm-bbr.h
1046 dsd 7 @@ -0,0 +1,125 @@
1047     +/*
1048     + * (C) Copyright IBM Corp. 2002, 2004
1049     + *
1050     + * This program is free software; you can redistribute it and/or modify
1051     + * it under the terms of the GNU General Public License as published by
1052     + * the Free Software Foundation; either version 2 of the License, or
1053     + * (at your option) any later version.
1054     + *
1055     + * This program is distributed in the hope that it will be useful,
1056     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1057     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
1058     + * the GNU General Public License for more details.
1059     + *
1060     + * You should have received a copy of the GNU General Public License
1061     + * along with this program; if not, write to the Free Software
1062     + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1063     + *
1064     + * linux/drivers/md/dm-bbr.h
1065     + *
1066     + * Bad-block-relocation (BBR) target for device-mapper.
1067     + *
1068     + * The BBR target is designed to remap I/O write failures to another safe
1069     + * location on disk. Note that most disk drives have BBR built into them,
1070     + * this means that our software BBR will be only activated when all hardware
1071     + * BBR replacement sectors have been used.
1072     + */
1073     +
1074     +#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */
1075     +#define BBR_ENTRIES_PER_SECT 31
1076     +#define INITIAL_CRC 0xFFFFFFFF
1077     +#define CRC_POLYNOMIAL 0xEDB88320L
1078     +
1079     +/**
1080     + * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1081     + * Use these in place of %Ld, %Lu, and %Lx.
1082     + **/
1083     +#if BITS_PER_LONG > 32
1084     +#define PFU64 "%lu"
1085     +#else
1086     +#define PFU64 "%Lu"
1087     +#endif
1088     +
1089     +/**
1090     + * struct bbr_table_entry
1091     + * @bad_sect: LBA of bad location.
1092     + * @replacement_sect: LBA of new location.
1093     + *
1094     + * Structure to describe one BBR remap.
1095     + **/
1096     +struct bbr_table_entry {
1097     + u64 bad_sect;
1098     + u64 replacement_sect;
1099     +};
1100     +
1101     +/**
1102     + * struct bbr_table
1103     + * @signature: Signature on each BBR table sector.
1104     + * @crc: CRC for this table sector.
1105     + * @sequence_number: Used to resolve conflicts when primary and secondary
1106     + * tables do not match.
1107     + * @in_use_cnt: Number of in-use table entries.
1108     + * @entries: Actual table of remaps.
1109     + *
1110     + * Structure to describe each sector of the metadata table. Each sector in this
1111     + * table can describe 31 remapped sectors.
1112     + **/
1113     +struct bbr_table {
1114     + u32 signature;
1115     + u32 crc;
1116     + u32 sequence_number;
1117     + u32 in_use_cnt;
1118     + struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT];
1119     +};
1120     +
1121     +/**
1122     + * struct bbr_runtime_remap
1123     + *
1124     + * Node in the binary tree used to keep track of remaps.
1125     + **/
1126     +struct bbr_runtime_remap {
1127     + struct bbr_table_entry remap;
1128     + struct bbr_runtime_remap *left;
1129     + struct bbr_runtime_remap *right;
1130     +};
1131     +
1132     +/**
1133     + * struct bbr_private
1134     + * @dev: Info about underlying device.
1135     + * @bbr_table: Copy of metadata table.
1136     + * @remap_root: Binary tree containing all remaps.
1137     + * @remap_root_lock: Lock for the binary tree.
1138     + * @remap_work: For adding work items to the work-queue.
1139     + * @remap_ios: List of I/Os for the work-queue to handle.
1140     + * @remap_ios_lock: Lock for the remap_ios list.
1141     + * @offset: LBA of data area.
1142     + * @lba_table1: LBA of primary BBR table.
1143     + * @lba_table2: LBA of secondary BBR table.
1144     + * @nr_sects_bbr_table: Size of each BBR table.
1145     + * @nr_replacement_blks: Number of replacement blocks.
1146     + * @start_replacement_sect: LBA of start of replacement blocks.
1147     + * @blksize_in_sects: Size of each block.
1148     + * @in_use_replacement_blks: Current number of remapped blocks.
1149     + *
1150     + * Private data for each BBR target.
1151     + **/
1152     +struct bbr_private {
1153     + struct dm_dev *dev;
1154     + struct bbr_table *bbr_table;
1155     + struct bbr_runtime_remap *remap_root;
1156     + spinlock_t remap_root_lock;
1157     +
1158     + struct work_struct remap_work;
1159     + struct bio_list remap_ios;
1160     + spinlock_t remap_ios_lock;
1161     +
1162     + u64 offset;
1163     + u64 lba_table1;
1164     + u64 lba_table2;
1165     + u64 nr_sects_bbr_table;
1166     + u64 start_replacement_sect;
1167     + u64 nr_replacement_blks;
1168     + u32 blksize_in_sects;
1169     + atomic_t in_use_replacement_blks;
1170     +};
1171     +

  ViewVC Help
Powered by ViewVC 1.1.20