/[linux-patches]/genpatches-2.6/trunk/2.6.18-pre/4105_dm-bbr.patch
Gentoo

Contents of /genpatches-2.6/trunk/2.6.18-pre/4105_dm-bbr.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 210 - (hide annotations) (download)
Mon Nov 21 13:08:30 2005 UTC (12 years, 8 months ago) by dsd
Original Path: genpatches-2.6/trunk/2.6.15-pre/4305_dm-bbr.patch
File size: 32467 byte(s)
Creating 2.6.15-pre branch from 2.6.14
1 dsd 7 diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c linux-dsd/drivers/md/dm-bbr.c
2     --- linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.c 1970-01-01 01:00:00.000000000 +0100
3     +++ linux-dsd/drivers/md/dm-bbr.c 2005-04-06 10:06:16.000000000 +0100
4     @@ -0,0 +1,1003 @@
5     +/*
6     + * (C) Copyright IBM Corp. 2002, 2004
7     + *
8     + * This program is free software; you can redistribute it and/or modify
9     + * it under the terms of the GNU General Public License as published by
10     + * the Free Software Foundation; either version 2 of the License, or
11     + * (at your option) any later version.
12     + *
13     + * This program is distributed in the hope that it will be useful,
14     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
15     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
16     + * the GNU General Public License for more details.
17     + *
18     + * You should have received a copy of the GNU General Public License
19     + * along with this program; if not, write to the Free Software
20     + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21     + *
22     + * linux/drivers/md/dm-bbr.c
23     + *
24     + * Bad-block-relocation (BBR) target for device-mapper.
25     + *
26     + * The BBR target is designed to remap I/O write failures to another safe
27     + * location on disk. Note that most disk drives have BBR built into them,
28     + * this means that our software BBR will be only activated when all hardware
29     + * BBR replacement sectors have been used.
30     + */
31     +
32     +#include <linux/module.h>
33     +#include <linux/init.h>
34     +#include <linux/bio.h>
35     +#include <linux/spinlock.h>
36     +#include <linux/slab.h>
37     +#include <linux/mempool.h>
38     +#include <linux/workqueue.h>
39     +#include <linux/vmalloc.h>
40     +
41     +#include "dm.h"
42     +#include "dm-bio-list.h"
43     +#include "dm-bio-record.h"
44     +#include "dm-bbr.h"
45     +#include "dm-io.h"
46     +
47     +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
48     +
49     +static struct workqueue_struct *dm_bbr_wq = NULL;
50     +static void bbr_remap_handler(void *data);
51     +static kmem_cache_t *bbr_remap_cache;
52     +static kmem_cache_t *bbr_io_cache;
53     +static mempool_t *bbr_io_pool;
54     +
55     +/**
56     + * bbr_binary_tree_destroy
57     + *
58     + * Destroy the binary tree.
59     + **/
60     +static void bbr_binary_tree_destroy(struct bbr_runtime_remap *root)
61     +{
62     + struct bbr_runtime_remap **link = NULL;
63     + struct bbr_runtime_remap *node = root;
64     +
65     + while (node) {
66     + if (node->left) {
67     + link = &(node->left);
68     + node = node->left;
69     + continue;
70     + }
71     + if (node->right) {
72     + link = &(node->right);
73     + node = node->right;
74     + continue;
75     + }
76     +
77     + kmem_cache_free(bbr_remap_cache, node);
78     + if (node == root) {
79     + /* If root is deleted, we're done. */
80     + break;
81     + }
82     +
83     + /* Back to root. */
84     + node = root;
85     + *link = NULL;
86     + }
87     +}
88     +
89     +static void bbr_free_remap(struct bbr_private *bbr_id)
90     +{
91     + spin_lock_irq(&bbr_id->remap_root_lock);
92     + bbr_binary_tree_destroy(bbr_id->remap_root);
93     + bbr_id->remap_root = NULL;
94     + spin_unlock_irq(&bbr_id->remap_root_lock);
95     +}
96     +
97     +static struct bbr_private *bbr_alloc_private(void)
98     +{
99     + struct bbr_private *bbr_id;
100     +
101     + bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
102     + if (bbr_id) {
103     + memset(bbr_id, 0, sizeof(*bbr_id));
104     + INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
105     + bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
106     + bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;
107     + bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
108     + }
109     +
110     + return bbr_id;
111     +}
112     +
113     +static void bbr_free_private(struct bbr_private *bbr_id)
114     +{
115     + if (bbr_id->bbr_table) {
116     + vfree(bbr_id->bbr_table);
117     + }
118     + bbr_free_remap(bbr_id);
119     + kfree(bbr_id);
120     +}
121     +
122     +static u32 crc_table[256];
123     +static u32 crc_table_built = 0;
124     +
125     +static void build_crc_table(void)
126     +{
127     + u32 i, j, crc;
128     +
129     + for (i = 0; i <= 255; i++) {
130     + crc = i;
131     + for (j = 8; j > 0; j--) {
132     + if (crc & 1)
133     + crc = (crc >> 1) ^ CRC_POLYNOMIAL;
134     + else
135     + crc >>= 1;
136     + }
137     + crc_table[i] = crc;
138     + }
139     + crc_table_built = 1;
140     +}
141     +
142     +static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
143     +{
144     + unsigned char *current_byte;
145     + u32 temp1, temp2, i;
146     +
147     + current_byte = (unsigned char *) buffer;
148     + /* Make sure the crc table is available */
149     + if (!crc_table_built)
150     + build_crc_table();
151     + /* Process each byte in the buffer. */
152     + for (i = 0; i < buffersize; i++) {
153     + temp1 = (crc >> 8) & 0x00FFFFFF;
154     + temp2 = crc_table[(crc ^ (u32) * current_byte) &
155     + (u32) 0xff];
156     + current_byte++;
157     + crc = temp1 ^ temp2;
158     + }
159     + return crc;
160     +}
161     +
162     +/**
163     + * le_bbr_table_sector_to_cpu
164     + *
165     + * Convert bbr meta data from on-disk (LE) format
166     + * to the native cpu endian format.
167     + **/
168     +static void le_bbr_table_sector_to_cpu(struct bbr_table *p)
169     +{
170     + int i;
171     + p->signature = le32_to_cpup(&p->signature);
172     + p->crc = le32_to_cpup(&p->crc);
173     + p->sequence_number = le32_to_cpup(&p->sequence_number);
174     + p->in_use_cnt = le32_to_cpup(&p->in_use_cnt);
175     + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
176     + p->entries[i].bad_sect =
177     + le64_to_cpup(&p->entries[i].bad_sect);
178     + p->entries[i].replacement_sect =
179     + le64_to_cpup(&p->entries[i].replacement_sect);
180     + }
181     +}
182     +
183     +/**
184     + * cpu_bbr_table_sector_to_le
185     + *
186     + * Convert bbr meta data from cpu endian format to on-disk (LE) format
187     + **/
188     +static void cpu_bbr_table_sector_to_le(struct bbr_table *p,
189     + struct bbr_table *le)
190     +{
191     + int i;
192     + le->signature = cpu_to_le32p(&p->signature);
193     + le->crc = cpu_to_le32p(&p->crc);
194     + le->sequence_number = cpu_to_le32p(&p->sequence_number);
195     + le->in_use_cnt = cpu_to_le32p(&p->in_use_cnt);
196     + for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
197     + le->entries[i].bad_sect =
198     + cpu_to_le64p(&p->entries[i].bad_sect);
199     + le->entries[i].replacement_sect =
200     + cpu_to_le64p(&p->entries[i].replacement_sect);
201     + }
202     +}
203     +
204     +/**
205     + * validate_bbr_table_sector
206     + *
207     + * Check the specified BBR table sector for a valid signature and CRC. If it's
208     + * valid, endian-convert the table sector.
209     + **/
210     +static int validate_bbr_table_sector(struct bbr_table *p)
211     +{
212     + int rc = 0;
213     + int org_crc, final_crc;
214     +
215     + if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
216     + DMERR("dm-bbr: BBR table signature doesn't match!");
217     + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
218     + le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
219     + rc = -EINVAL;
220     + goto out;
221     + }
222     +
223     + if (!p->crc) {
224     + DMERR("dm-bbr: BBR table sector has no CRC!");
225     + rc = -EINVAL;
226     + goto out;
227     + }
228     +
229     + org_crc = le32_to_cpup(&p->crc);
230     + p->crc = 0;
231     + final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
232     + if (final_crc != org_crc) {
233     + DMERR("dm-bbr: CRC failed!");
234     + DMERR("dm-bbr: Found 0x%x. Expecting 0x%x",
235     + org_crc, final_crc);
236     + rc = -EINVAL;
237     + goto out;
238     + }
239     +
240     + p->crc = cpu_to_le32p(&org_crc);
241     + le_bbr_table_sector_to_cpu(p);
242     +
243     +out:
244     + return rc;
245     +}
246     +
247     +/**
248     + * bbr_binary_tree_insert
249     + *
250     + * Insert a node into the binary tree.
251     + **/
252     +static void bbr_binary_tree_insert(struct bbr_runtime_remap **root,
253     + struct bbr_runtime_remap *newnode)
254     +{
255     + struct bbr_runtime_remap **node = root;
256     + while (node && *node) {
257     + if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
258     + node = &((*node)->right);
259     + } else {
260     + node = &((*node)->left);
261     + }
262     + }
263     +
264     + newnode->left = newnode->right = NULL;
265     + *node = newnode;
266     +}
267     +
268     +/**
269     + * bbr_binary_search
270     + *
271     + * Search for a node that contains bad_sect == lsn.
272     + **/
273     +static struct bbr_runtime_remap *bbr_binary_search(
274     + struct bbr_runtime_remap *root,
275     + u64 lsn)
276     +{
277     + struct bbr_runtime_remap *node = root;
278     + while (node) {
279     + if (node->remap.bad_sect == lsn) {
280     + break;
281     + }
282     + if (lsn > node->remap.bad_sect) {
283     + node = node->right;
284     + } else {
285     + node = node->left;
286     + }
287     + }
288     + return node;
289     +}
290     +
291     +/**
292     + * bbr_insert_remap_entry
293     + *
294     + * Create a new remap entry and add it to the binary tree for this node.
295     + **/
296     +static int bbr_insert_remap_entry(struct bbr_private *bbr_id,
297     + struct bbr_table_entry *new_bbr_entry)
298     +{
299     + struct bbr_runtime_remap *newnode;
300     +
301     + newnode = kmem_cache_alloc(bbr_remap_cache, GFP_NOIO);
302     + if (!newnode) {
303     + DMERR("dm-bbr: Could not allocate from remap cache!");
304     + return -ENOMEM;
305     + }
306     + newnode->remap.bad_sect = new_bbr_entry->bad_sect;
307     + newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
308     + spin_lock_irq(&bbr_id->remap_root_lock);
309     + bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
310     + spin_unlock_irq(&bbr_id->remap_root_lock);
311     + return 0;
312     +}
313     +
314     +/**
315     + * bbr_table_to_remap_list
316     + *
317     + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
318     + * improve run time performance, the in memory remap list must be sorted by
319     + * the bad sector LBA. This function is called at discovery time to initialize
320     + * the remap list. This function assumes that at least one copy of meta data
321     + * is valid.
322     + **/
323     +static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
324     +{
325     + u32 in_use_blks = 0;
326     + int i, j;
327     + struct bbr_table *p;
328     +
329     + for (i = 0, p = bbr_id->bbr_table;
330     + i < bbr_id->nr_sects_bbr_table;
331     + i++, p++) {
332     + if (!p->in_use_cnt) {
333     + break;
334     + }
335     + in_use_blks += p->in_use_cnt;
336     + for (j = 0; j < p->in_use_cnt; j++) {
337     + bbr_insert_remap_entry(bbr_id, &p->entries[j]);
338     + }
339     + }
340     + if (in_use_blks) {
341     + char b[32];
342     + DMWARN("dm-bbr: There are %u BBR entries for device %s",
343     + in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
344     + }
345     +
346     + return in_use_blks;
347     +}
348     +
349     +/**
350     + * bbr_search_remap_entry
351     + *
352     + * Search remap entry for the specified sector. If found, return a pointer to
353     + * the table entry. Otherwise, return NULL.
354     + **/
355     +static struct bbr_table_entry *bbr_search_remap_entry(
356     + struct bbr_private *bbr_id,
357     + u64 lsn)
358     +{
359     + struct bbr_runtime_remap *p;
360     +
361     + spin_lock_irq(&bbr_id->remap_root_lock);
362     + p = bbr_binary_search(bbr_id->remap_root, lsn);
363     + spin_unlock_irq(&bbr_id->remap_root_lock);
364     + if (p) {
365     + return (&p->remap);
366     + } else {
367     + return NULL;
368     + }
369     +}
370     +
371     +/**
372     + * bbr_remap
373     + *
374     + * If *lsn is in the remap table, return TRUE and modify *lsn,
375     + * else, return FALSE.
376     + **/
377     +static inline int bbr_remap(struct bbr_private *bbr_id,
378     + u64 *lsn)
379     +{
380     + struct bbr_table_entry *e;
381     +
382     + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
383     + e = bbr_search_remap_entry(bbr_id, *lsn);
384     + if (e) {
385     + *lsn = e->replacement_sect;
386     + return 1;
387     + }
388     + }
389     + return 0;
390     +}
391     +
392     +/**
393     + * bbr_remap_probe
394     + *
395     + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
396     + * table return TRUE, Else, return FALSE.
397     + **/
398     +static inline int bbr_remap_probe(struct bbr_private *bbr_id,
399     + u64 lsn, u64 nr_sects)
400     +{
401     + u64 tmp, cnt;
402     +
403     + if (atomic_read(&bbr_id->in_use_replacement_blks)) {
404     + for (cnt = 0, tmp = lsn;
405     + cnt < nr_sects;
406     + cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
407     + if (bbr_remap(bbr_id,&tmp)) {
408     + return 1;
409     + }
410     + }
411     + }
412     + return 0;
413     +}
414     +
415     +/**
416     + * bbr_setup
417     + *
418     + * Read the remap tables from disk and set up the initial remap tree.
419     + **/
420     +static int bbr_setup(struct bbr_private *bbr_id)
421     +{
422     + struct bbr_table *table = bbr_id->bbr_table;
423     + struct io_region job;
424     + unsigned long error;
425     + int i, rc = 0;
426     +
427     + job.bdev = bbr_id->dev->bdev;
428     + job.count = 1;
429     +
430     + /* Read and verify each BBR table sector individually. */
431     + for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
432     + job.sector = bbr_id->lba_table1 + i;
433     + rc = dm_io_sync_vm(1, &job, READ, table, &error);
434     + if (rc && bbr_id->lba_table2) {
435     + job.sector = bbr_id->lba_table2 + i;
436     + rc = dm_io_sync_vm(1, &job, READ, table, &error);
437     + }
438     + if (rc) {
439     + goto out;
440     + }
441     +
442     + rc = validate_bbr_table_sector(table);
443     + if (rc) {
444     + goto out;
445     + }
446     + }
447     + atomic_set(&bbr_id->in_use_replacement_blks,
448     + bbr_table_to_remap_list(bbr_id));
449     +
450     +out:
451     + if (rc) {
452     + DMERR("dm-bbr: error during device setup: %d", rc);
453     + }
454     + return rc;
455     +}
456     +
457     +/**
458     + * bbr_io_remap_error
459     + * @bbr_id: Private data for the BBR node.
460     + * @rw: READ or WRITE.
461     + * @starting_lsn: Starting sector of request to remap.
462     + * @count: Number of sectors in the request.
463     + * @page: Page containing the data for the request.
464     + * @offset: Byte-offset of the data within the page.
465     + *
466     + * For the requested range, try to write each sector individually. For each
467     + * sector that fails, find the next available remap location and write the
468     + * data to that new location. Then update the table and write both copies
469     + * of the table to disk. Finally, update the in-memory mapping and do any
470     + * other necessary bookkeeping.
471     + **/
472     +static int bbr_io_remap_error(struct bbr_private *bbr_id,
473     + int rw,
474     + u64 starting_lsn,
475     + u64 count,
476     + struct page *page,
477     + unsigned int offset)
478     +{
479     + struct bbr_table *bbr_table;
480     + struct io_region job;
481     + struct page_list pl;
482     + unsigned long table_sector_index;
483     + unsigned long table_sector_offset;
484     + unsigned long index;
485     + unsigned long error;
486     + u64 lsn, new_lsn;
487     + char b[32];
488     + int rc;
489     +
490     + job.bdev = bbr_id->dev->bdev;
491     + job.count = 1;
492     + pl.page = page;
493     + pl.next = NULL;
494     +
495     + /* For each sector in the request. */
496     + for (lsn = 0; lsn < count; lsn++, offset += SECTOR_SIZE) {
497     + job.sector = starting_lsn + lsn;
498     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
499     + while (rc) {
500     + /* Find the next available relocation sector. */
501     + new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
502     + if (new_lsn >= bbr_id->nr_replacement_blks) {
503     + /* No more replacement sectors available. */
504     + return -EIO;
505     + }
506     + new_lsn += bbr_id->start_replacement_sect;
507     +
508     + /* Write the data to its new location. */
509     + DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,
510     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
511     + starting_lsn + lsn, new_lsn);
512     + job.sector = new_lsn;
513     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
514     + if (rc) {
515     + /* This replacement sector is bad.
516     + * Try the next one.
517     + */
518     + DMERR("dm-bbr: device %s: replacement sector "PFU64" is bad. Skipping.",
519     + format_dev_t(b, bbr_id->dev->bdev->bd_dev), new_lsn);
520     + atomic_inc(&bbr_id->in_use_replacement_blks);
521     + continue;
522     + }
523     +
524     + /* Add this new entry to the on-disk table. */
525     + table_sector_index = new_lsn -
526     + bbr_id->start_replacement_sect;
527     + table_sector_offset = table_sector_index /
528     + BBR_ENTRIES_PER_SECT;
529     + index = table_sector_index % BBR_ENTRIES_PER_SECT;
530     +
531     + bbr_table = &bbr_id->bbr_table[table_sector_offset];
532     + bbr_table->entries[index].bad_sect = starting_lsn + lsn;
533     + bbr_table->entries[index].replacement_sect = new_lsn;
534     + bbr_table->in_use_cnt++;
535     + bbr_table->sequence_number++;
536     + bbr_table->crc = 0;
537     + bbr_table->crc = calculate_crc(INITIAL_CRC,
538     + bbr_table,
539     + sizeof(struct bbr_table));
540     +
541     + /* Write the table to disk. */
542     + cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
543     + if (bbr_id->lba_table1) {
544     + job.sector = bbr_id->lba_table1 + table_sector_offset;
545     + rc = dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
546     + }
547     + if (bbr_id->lba_table2) {
548     + job.sector = bbr_id->lba_table2 + table_sector_offset;
549     + rc |= dm_io_sync_vm(1, &job, WRITE, bbr_table, &error);
550     + }
551     + le_bbr_table_sector_to_cpu(bbr_table);
552     +
553     + if (rc) {
554     + /* Error writing one of the tables to disk. */
555     + DMERR("dm-bbr: device %s: error updating BBR tables on disk.",
556     + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
557     + return rc;
558     + }
559     +
560     + /* Insert a new entry in the remapping binary-tree. */
561     + rc = bbr_insert_remap_entry(bbr_id,
562     + &bbr_table->entries[index]);
563     + if (rc) {
564     + DMERR("dm-bbr: device %s: error adding new entry to remap tree.",
565     + format_dev_t(b, bbr_id->dev->bdev->bd_dev));
566     + return rc;
567     + }
568     +
569     + atomic_inc(&bbr_id->in_use_replacement_blks);
570     + }
571     + }
572     +
573     + return 0;
574     +}
575     +
576     +/**
577     + * bbr_io_process_request
578     + *
579     + * For each sector in this request, check if the sector has already
580     + * been remapped. If so, process all previous sectors in the request,
581     + * followed by the remapped sector. Then reset the starting lsn and
582     + * count, and keep going with the rest of the request as if it were
583     + * a whole new request. If any of the sync_io's return an error,
584     + * call the remapper to relocate the bad sector(s).
585     + *
586     + * 2.5 Note: When switching over to bio's for the I/O path, we have made
587     + * the assumption that the I/O request described by the bio is one
588     + * virtually contiguous piece of memory (even though the bio vector
589     + * describes it using a series of physical page addresses).
590     + **/
591     +static int bbr_io_process_request(struct bbr_private *bbr_id,
592     + struct bio *bio)
593     +{
594     + struct io_region job;
595     + u64 starting_lsn = bio->bi_sector;
596     + u64 count, lsn, remapped_lsn;
597     + struct page_list pl;
598     + unsigned int offset;
599     + unsigned long error;
600     + int i, rw = bio_data_dir(bio);
601     + int rc = 0;
602     +
603     + job.bdev = bbr_id->dev->bdev;
604     + pl.next = NULL;
605     +
606     + /* Each bio can contain multiple vectors, each with a different page.
607     + * Treat each vector as a separate request.
608     + */
609     + /* KMC: Is this the right way to walk the bvec list? */
610     + for (i = 0;
611     + i < bio->bi_vcnt;
612     + i++, bio->bi_idx++, starting_lsn += count) {
613     +
614     + /* Bvec info: number of sectors, page,
615     + * and byte-offset within page.
616     + */
617     + count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
618     + pl.page = bio_iovec(bio)->bv_page;
619     + offset = bio_iovec(bio)->bv_offset;
620     +
621     + /* For each sector in this bvec, check if the sector has
622     + * already been remapped. If so, process all previous sectors
623     + * in this request, followed by the remapped sector. Then reset
624     + * the starting lsn and count and keep going with the rest of
625     + * the request as if it were a whole new request.
626     + */
627     + for (lsn = 0; lsn < count; lsn++) {
628     + remapped_lsn = starting_lsn + lsn;
629     + rc = bbr_remap(bbr_id, &remapped_lsn);
630     + if (!rc) {
631     + /* This sector is fine. */
632     + continue;
633     + }
634     +
635     + /* Process all sectors in the request up to this one. */
636     + if (lsn > 0) {
637     + job.sector = starting_lsn;
638     + job.count = lsn;
639     + rc = dm_io_sync(1, &job, rw, &pl,
640     + offset, &error);
641     + if (rc) {
642     + /* If this I/O failed, then one of the
643     + * sectors in this request needs to be
644     + * relocated.
645     + */
646     + rc = bbr_io_remap_error(bbr_id, rw,
647     + starting_lsn,
648     + lsn, pl.page,
649     + offset);
650     + if (rc) {
651     + /* KMC: Return? Or continue to next bvec? */
652     + return rc;
653     + }
654     + }
655     + offset += (lsn << SECTOR_SHIFT);
656     + }
657     +
658     + /* Process the remapped sector. */
659     + job.sector = remapped_lsn;
660     + job.count = 1;
661     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
662     + if (rc) {
663     + /* BUGBUG - Need more processing if this caused
664     + * an error. If this I/O failed, then the
665     + * existing remap is now bad, and we need to
666     + * find a new remap. Can't use
667     + * bbr_io_remap_error(), because the existing
668     + * map entry needs to be changed, not added
669     + * again, and the original table entry also
670     + * needs to be changed.
671     + */
672     + return rc;
673     + }
674     +
675     + starting_lsn += (lsn + 1);
676     + count -= (lsn + 1);
677     + lsn = -1;
678     + offset += SECTOR_SIZE;
679     + }
680     +
681     + /* Check for any remaining sectors after the last split. This
682     + * could potentially be the whole request, but that should be a
683     + * rare case because requests should only be processed by the
684     + * thread if we know an error occurred or they contained one or
685     + * more remapped sectors.
686     + */
687     + if (count) {
688     + job.sector = starting_lsn;
689     + job.count = count;
690     + rc = dm_io_sync(1, &job, rw, &pl, offset, &error);
691     + if (rc) {
692     + /* If this I/O failed, then one of the sectors
693     + * in this request needs to be relocated.
694     + */
695     + rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
696     + count, pl.page, offset);
697     + if (rc) {
698     + /* KMC: Return? Or continue to next bvec? */
699     + return rc;
700     + }
701     + }
702     + }
703     + }
704     +
705     + return 0;
706     +}
707     +
708     +static void bbr_io_process_requests(struct bbr_private *bbr_id,
709     + struct bio *bio)
710     +{
711     + struct bio *next;
712     + int rc;
713     +
714     + while (bio) {
715     + next = bio->bi_next;
716     + bio->bi_next = NULL;
717     +
718     + rc = bbr_io_process_request(bbr_id, bio);
719     +
720     + bio_endio(bio, bio->bi_size, rc);
721     +
722     + bio = next;
723     + }
724     +}
725     +
726     +/**
727     + * bbr_remap_handler
728     + *
729     + * This is the handler for the bbr work-queue.
730     + *
731     + * I/O requests should only be sent to this handler if we know that:
732     + * a) the request contains at least one remapped sector.
733     + * or
734     + * b) the request caused an error on the normal I/O path.
735     + *
736     + * This function uses synchronous I/O, so sending a request to this
737     + * thread that doesn't need special processing will cause severe
738     + * performance degredation.
739     + **/
740     +static void bbr_remap_handler(void *data)
741     +{
742     + struct bbr_private *bbr_id = data;
743     + struct bio *bio;
744     + unsigned long flags;
745     +
746     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
747     + bio = bio_list_get(&bbr_id->remap_ios);
748     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
749     +
750     + bbr_io_process_requests(bbr_id, bio);
751     +}
752     +
753     +/**
754     + * bbr_endio
755     + *
756     + * This is the callback for normal write requests. Check for an error
757     + * during the I/O, and send to the thread for processing if necessary.
758     + **/
759     +static int bbr_endio(struct dm_target *ti, struct bio *bio,
760     + int error, union map_info *map_context)
761     +{
762     + struct bbr_private *bbr_id = ti->private;
763     + struct dm_bio_details *bbr_io = map_context->ptr;
764     +
765     + if (error && bbr_io) {
766     + unsigned long flags;
767     + char b[32];
768     +
769     + dm_bio_restore(bbr_io, bio);
770     + map_context->ptr = NULL;
771     +
772     + DMERR("dm-bbr: device %s: I/O failure on sector %lu. "
773     + "Scheduling for retry.",
774     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
775     + (unsigned long)bio->bi_sector);
776     +
777     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
778     + bio_list_add(&bbr_id->remap_ios, bio);
779     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
780     +
781     + queue_work(dm_bbr_wq, &bbr_id->remap_work);
782     +
783     + error = 1;
784     + }
785     +
786     + if (bbr_io)
787     + mempool_free(bbr_io, bbr_io_pool);
788     +
789     + return error;
790     +}
791     +
792     +/**
793     + * Construct a bbr mapping
794     + **/
795     +static int bbr_ctr(struct dm_target *ti, unsigned int argc, char **argv)
796     +{
797     + struct bbr_private *bbr_id;
798     + unsigned long block_size;
799     + char *end;
800     + int rc = -EINVAL;
801     +
802     + if (argc != 8) {
803     + ti->error = "dm-bbr requires exactly 8 arguments: "
804     + "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
805     + goto out1;
806     + }
807     +
808     + bbr_id = bbr_alloc_private();
809     + if (!bbr_id) {
810     + ti->error = "dm-bbr: Error allocating bbr private data.";
811     + goto out1;
812     + }
813     +
814     + bbr_id->offset = simple_strtoull(argv[1], &end, 10);
815     + bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
816     + bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
817     + bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
818     + bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
819     + bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
820     + block_size = simple_strtoul(argv[7], &end, 10);
821     + bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
822     +
823     + bbr_id->bbr_table = vmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT);
824     + if (!bbr_id->bbr_table) {
825     + ti->error = "dm-bbr: Error allocating bbr table.";
826     + goto out2;
827     + }
828     +
829     + if (dm_get_device(ti, argv[0], 0, ti->len,
830     + dm_table_get_mode(ti->table), &bbr_id->dev)) {
831     + ti->error = "dm-bbr: Device lookup failed";
832     + goto out2;
833     + }
834     +
835     + rc = bbr_setup(bbr_id);
836     + if (rc) {
837     + ti->error = "dm-bbr: Device setup failed";
838     + goto out3;
839     + }
840     +
841     + ti->private = bbr_id;
842     + return 0;
843     +
844     +out3:
845     + dm_put_device(ti, bbr_id->dev);
846     +out2:
847     + bbr_free_private(bbr_id);
848     +out1:
849     + return rc;
850     +}
851     +
852     +static void bbr_dtr(struct dm_target *ti)
853     +{
854     + struct bbr_private *bbr_id = ti->private;
855     +
856     + dm_put_device(ti, bbr_id->dev);
857     + bbr_free_private(bbr_id);
858     +}
859     +
860     +static int bbr_map(struct dm_target *ti, struct bio *bio,
861     + union map_info *map_context)
862     +{
863     + struct bbr_private *bbr_id = ti->private;
864     + struct dm_bio_details *bbr_io;
865     + unsigned long flags;
866     + int rc = 1;
867     +
868     + bio->bi_sector += bbr_id->offset;
869     +
870     + if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
871     + !bbr_remap_probe(bbr_id, bio->bi_sector, bio_sectors(bio))) {
872     + /* No existing remaps or this request doesn't
873     + * contain any remapped sectors.
874     + */
875     + bio->bi_bdev = bbr_id->dev->bdev;
876     +
877     + bbr_io = mempool_alloc(bbr_io_pool, GFP_NOIO);
878     + dm_bio_record(bbr_io, bio);
879     + map_context->ptr = bbr_io;
880     + } else {
881     + /* This request has at least one remapped sector.
882     + * Give it to the work-queue for processing.
883     + */
884     + map_context->ptr = NULL;
885     + spin_lock_irqsave(&bbr_id->remap_ios_lock, flags);
886     + bio_list_add(&bbr_id->remap_ios, bio);
887     + spin_unlock_irqrestore(&bbr_id->remap_ios_lock, flags);
888     +
889     + queue_work(dm_bbr_wq, &bbr_id->remap_work);
890     + rc = 0;
891     + }
892     +
893     + return rc;
894     +}
895     +
896     +static int bbr_status(struct dm_target *ti, status_type_t type,
897     + char *result, unsigned int maxlen)
898     +{
899     + struct bbr_private *bbr_id = ti->private;
900     + char b[BDEVNAME_SIZE];
901     +
902     + switch (type) {
903     + case STATUSTYPE_INFO:
904     + result[0] = '\0';
905     + break;
906     +
907     + case STATUSTYPE_TABLE:
908     + snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
909     + format_dev_t(b, bbr_id->dev->bdev->bd_dev),
910     + bbr_id->offset, bbr_id->lba_table1, bbr_id->lba_table2,
911     + bbr_id->nr_sects_bbr_table,
912     + bbr_id->start_replacement_sect,
913     + bbr_id->nr_replacement_blks,
914     + bbr_id->blksize_in_sects << SECTOR_SHIFT);
915     + break;
916     + }
917     + return 0;
918     +}
919     +
920     +static struct target_type bbr_target = {
921     + .name = "bbr",
922     + .version= {1, 0, 1},
923     + .module = THIS_MODULE,
924     + .ctr = bbr_ctr,
925     + .dtr = bbr_dtr,
926     + .map = bbr_map,
927     + .end_io = bbr_endio,
928     + .status = bbr_status,
929     +};
930     +
931     +int __init dm_bbr_init(void)
932     +{
933     + int rc;
934     +
935     + rc = dm_register_target(&bbr_target);
936     + if (rc) {
937     + DMERR("dm-bbr: error registering target.");
938     + goto err1;
939     + }
940     +
941     + bbr_remap_cache = kmem_cache_create("bbr-remap",
942     + sizeof(struct bbr_runtime_remap),
943     + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
944     + if (!bbr_remap_cache) {
945     + DMERR("dm-bbr: error creating remap cache.");
946     + rc = ENOMEM;
947     + goto err2;
948     + }
949     +
950     + bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
951     + 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
952     + if (!bbr_io_cache) {
953     + DMERR("dm-bbr: error creating io cache.");
954     + rc = ENOMEM;
955     + goto err3;
956     + }
957     +
958     + bbr_io_pool = mempool_create(256, mempool_alloc_slab,
959     + mempool_free_slab, bbr_io_cache);
960     + if (!bbr_io_pool) {
961     + DMERR("dm-bbr: error creating io mempool.");
962     + rc = ENOMEM;
963     + goto err4;
964     + }
965     +
966     + dm_bbr_wq = create_workqueue("dm-bbr");
967     + if (!dm_bbr_wq) {
968     + DMERR("dm-bbr: error creating work-queue.");
969     + rc = ENOMEM;
970     + goto err5;
971     + }
972     +
973     + rc = dm_io_get(1);
974     + if (rc) {
975     + DMERR("dm-bbr: error initializing I/O service.");
976     + goto err6;
977     + }
978     +
979     + return 0;
980     +
981     +err6:
982     + destroy_workqueue(dm_bbr_wq);
983     +err5:
984     + mempool_destroy(bbr_io_pool);
985     +err4:
986     + kmem_cache_destroy(bbr_io_cache);
987     +err3:
988     + kmem_cache_destroy(bbr_remap_cache);
989     +err2:
990     + dm_unregister_target(&bbr_target);
991     +err1:
992     + return rc;
993     +}
994     +
995     +void __exit dm_bbr_exit(void)
996     +{
997     + dm_io_put(1);
998     + destroy_workqueue(dm_bbr_wq);
999     + mempool_destroy(bbr_io_pool);
1000     + kmem_cache_destroy(bbr_io_cache);
1001     + kmem_cache_destroy(bbr_remap_cache);
1002     + dm_unregister_target(&bbr_target);
1003     +}
1004     +
1005     +module_init(dm_bbr_init);
1006     +module_exit(dm_bbr_exit);
1007     +MODULE_LICENSE("GPL");
1008     diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h linux-dsd/drivers/md/dm-bbr.h
1009     --- linux-2.6.12-rc2-gentoo/drivers/md/dm-bbr.h 1970-01-01 01:00:00.000000000 +0100
1010     +++ linux-dsd/drivers/md/dm-bbr.h 2005-04-06 10:06:16.000000000 +0100
1011     @@ -0,0 +1,125 @@
1012     +/*
1013     + * (C) Copyright IBM Corp. 2002, 2004
1014     + *
1015     + * This program is free software; you can redistribute it and/or modify
1016     + * it under the terms of the GNU General Public License as published by
1017     + * the Free Software Foundation; either version 2 of the License, or
1018     + * (at your option) any later version.
1019     + *
1020     + * This program is distributed in the hope that it will be useful,
1021     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
1022     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
1023     + * the GNU General Public License for more details.
1024     + *
1025     + * You should have received a copy of the GNU General Public License
1026     + * along with this program; if not, write to the Free Software
1027     + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1028     + *
1029     + * linux/drivers/md/dm-bbr.h
1030     + *
1031     + * Bad-block-relocation (BBR) target for device-mapper.
1032     + *
1033     + * The BBR target is designed to remap I/O write failures to another safe
1034     + * location on disk. Note that most disk drives have BBR built into them,
1035     + * this means that our software BBR will be only activated when all hardware
1036     + * BBR replacement sectors have been used.
1037     + */
1038     +
1039     +#define BBR_TABLE_SIGNATURE 0x42627254 /* BbrT */
1040     +#define BBR_ENTRIES_PER_SECT 31
1041     +#define INITIAL_CRC 0xFFFFFFFF
1042     +#define CRC_POLYNOMIAL 0xEDB88320L
1043     +
1044     +/**
1045     + * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1046     + * Use these in place of %Ld, %Lu, and %Lx.
1047     + **/
1048     +#if BITS_PER_LONG > 32
1049     +#define PFU64 "%lu"
1050     +#else
1051     +#define PFU64 "%Lu"
1052     +#endif
1053     +
1054     +/**
1055     + * struct bbr_table_entry
1056     + * @bad_sect: LBA of bad location.
1057     + * @replacement_sect: LBA of new location.
1058     + *
1059     + * Structure to describe one BBR remap.
1060     + **/
1061     +struct bbr_table_entry {
1062     + u64 bad_sect;
1063     + u64 replacement_sect;
1064     +};
1065     +
1066     +/**
1067     + * struct bbr_table
1068     + * @signature: Signature on each BBR table sector.
1069     + * @crc: CRC for this table sector.
1070     + * @sequence_number: Used to resolve conflicts when primary and secondary
1071     + * tables do not match.
1072     + * @in_use_cnt: Number of in-use table entries.
1073     + * @entries: Actual table of remaps.
1074     + *
1075     + * Structure to describe each sector of the metadata table. Each sector in this
1076     + * table can describe 31 remapped sectors.
1077     + **/
1078     +struct bbr_table {
1079     + u32 signature;
1080     + u32 crc;
1081     + u32 sequence_number;
1082     + u32 in_use_cnt;
1083     + struct bbr_table_entry entries[BBR_ENTRIES_PER_SECT];
1084     +};
1085     +
1086     +/**
1087     + * struct bbr_runtime_remap
1088     + *
1089     + * Node in the binary tree used to keep track of remaps.
1090     + **/
1091     +struct bbr_runtime_remap {
1092     + struct bbr_table_entry remap;
1093     + struct bbr_runtime_remap *left;
1094     + struct bbr_runtime_remap *right;
1095     +};
1096     +
1097     +/**
1098     + * struct bbr_private
1099     + * @dev: Info about underlying device.
1100     + * @bbr_table: Copy of metadata table.
1101     + * @remap_root: Binary tree containing all remaps.
1102     + * @remap_root_lock: Lock for the binary tree.
1103     + * @remap_work: For adding work items to the work-queue.
1104     + * @remap_ios: List of I/Os for the work-queue to handle.
1105     + * @remap_ios_lock: Lock for the remap_ios list.
1106     + * @offset: LBA of data area.
1107     + * @lba_table1: LBA of primary BBR table.
1108     + * @lba_table2: LBA of secondary BBR table.
1109     + * @nr_sects_bbr_table: Size of each BBR table.
1110     + * @nr_replacement_blks: Number of replacement blocks.
1111     + * @start_replacement_sect: LBA of start of replacement blocks.
1112     + * @blksize_in_sects: Size of each block.
1113     + * @in_use_replacement_blks: Current number of remapped blocks.
1114     + *
1115     + * Private data for each BBR target.
1116     + **/
1117     +struct bbr_private {
1118     + struct dm_dev *dev;
1119     + struct bbr_table *bbr_table;
1120     + struct bbr_runtime_remap *remap_root;
1121     + spinlock_t remap_root_lock;
1122     +
1123     + struct work_struct remap_work;
1124     + struct bio_list remap_ios;
1125     + spinlock_t remap_ios_lock;
1126     +
1127     + u64 offset;
1128     + u64 lba_table1;
1129     + u64 lba_table2;
1130     + u64 nr_sects_bbr_table;
1131     + u64 start_replacement_sect;
1132     + u64 nr_replacement_blks;
1133     + u32 blksize_in_sects;
1134     + atomic_t in_use_replacement_blks;
1135     +};
1136     +
1137     diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/Kconfig linux-dsd/drivers/md/Kconfig
1138     --- linux-2.6.12-rc2-gentoo/drivers/md/Kconfig 2005-04-06 09:46:58.000000000 +0100
1139     +++ linux-dsd/drivers/md/Kconfig 2005-04-06 10:07:02.000000000 +0100
1140     @@ -236,5 +236,16 @@ config DM_MULTIPATH_EMC
1141     ---help---
1142     Multipath support for EMC CX/AX series hardware.
1143    
1144     +config BLK_DEV_DM_BBR
1145     + tristate "Bad Block Relocation Device Target (EXPERIMENTAL)"
1146     + depends on BLK_DEV_DM && EXPERIMENTAL
1147     + ---help---
1148     + Support for devices with software-based bad-block-relocation.
1149     +
1150     + To compile this as a module, choose M here: the module will be
1151     + called dm-bbr.
1152     +
1153     + If unsure, say N.
1154     +
1155     endmenu
1156    
1157     diff -urNpX dontdiff linux-2.6.12-rc2-gentoo/drivers/md/Makefile linux-dsd/drivers/md/Makefile
1158     --- linux-2.6.12-rc2-gentoo/drivers/md/Makefile 2005-04-06 09:46:58.000000000 +0100
1159     +++ linux-dsd/drivers/md/Makefile 2005-04-06 10:06:16.000000000 +0100
1160     @@ -36,6 +36,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc
1161     obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
1162     obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
1163     obj-$(CONFIG_DM_ZERO) += dm-zero.o
1164     +obj-$(CONFIG_BLK_DEV_DM_BBR) += dm-bbr.o
1165    
1166     quiet_cmd_unroll = UNROLL $@
1167     cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \

  ViewVC Help
Powered by ViewVC 1.1.20