| 1 |
From: David S. Miller <davem@sunset.davemloft.net> |
| 2 |
Date: Thu, 12 Apr 2007 04:38:45 +0000 (-0700) |
| 3 |
Subject: [SPARC64]: Fix SBUS IOMMU allocation code. |
| 4 |
X-Git-Tag: v2.6.21-rc7~10^2~5 |
| 5 |
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=2f3a2efd85b698e51e90f06a37d85066725fb4c4 |
| 6 |
|
| 7 |
[SPARC64]: Fix SBUS IOMMU allocation code. |
| 8 |
|
| 9 |
There are several IOMMU allocator bugs. Instead of trying to fix this |
| 10 |
overly complicated code, just mirror the PCI IOMMU arena allocator |
| 11 |
which is very stable and well stress tested. |
| 12 |
|
| 13 |
I tried to make the code as identical as possible so we can switch |
| 14 |
sun4u PCI and SBUS over to a common piece of IOMMU code. All that |
| 15 |
will be need are two callbacks, one to do a full IOMMU flush and one |
| 16 |
to do a streaming buffer flush. |
| 17 |
|
| 18 |
This patch gets rid of a lot of hangs and mysterious crashes on SBUS |
| 19 |
sparc64 systems, at least for me. |
| 20 |
|
| 21 |
Signed-off-by: David S. Miller <davem@davemloft.net> |
| 22 |
--- |
| 23 |
|
| 24 |
diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c |
| 25 |
index 01d6d86..14f78fb 100644 |
| 26 |
--- a/arch/sparc64/kernel/sbus.c |
| 27 |
+++ b/arch/sparc64/kernel/sbus.c |
| 28 |
@@ -24,48 +24,25 @@ |
| 29 |
|
| 30 |
#include "iommu_common.h" |
| 31 |
|
| 32 |
-/* These should be allocated on an SMP_CACHE_BYTES |
| 33 |
- * aligned boundary for optimal performance. |
| 34 |
- * |
| 35 |
- * On SYSIO, using an 8K page size we have 1GB of SBUS |
| 36 |
- * DMA space mapped. We divide this space into equally |
| 37 |
- * sized clusters. We allocate a DMA mapping from the |
| 38 |
- * cluster that matches the order of the allocation, or |
| 39 |
- * if the order is greater than the number of clusters, |
| 40 |
- * we try to allocate from the last cluster. |
| 41 |
- */ |
| 42 |
- |
| 43 |
-#define NCLUSTERS 8UL |
| 44 |
-#define ONE_GIG (1UL * 1024UL * 1024UL * 1024UL) |
| 45 |
-#define CLUSTER_SIZE (ONE_GIG / NCLUSTERS) |
| 46 |
-#define CLUSTER_MASK (CLUSTER_SIZE - 1) |
| 47 |
-#define CLUSTER_NPAGES (CLUSTER_SIZE >> IO_PAGE_SHIFT) |
| 48 |
#define MAP_BASE ((u32)0xc0000000) |
| 49 |
|
| 50 |
+struct sbus_iommu_arena { |
| 51 |
+ unsigned long *map; |
| 52 |
+ unsigned int hint; |
| 53 |
+ unsigned int limit; |
| 54 |
+}; |
| 55 |
+ |
| 56 |
struct sbus_iommu { |
| 57 |
-/*0x00*/spinlock_t lock; |
| 58 |
+ spinlock_t lock; |
| 59 |
|
| 60 |
-/*0x08*/iopte_t *page_table; |
| 61 |
-/*0x10*/unsigned long strbuf_regs; |
| 62 |
-/*0x18*/unsigned long iommu_regs; |
| 63 |
-/*0x20*/unsigned long sbus_control_reg; |
| 64 |
+ struct sbus_iommu_arena arena; |
| 65 |
|
| 66 |
-/*0x28*/volatile unsigned long strbuf_flushflag; |
| 67 |
+ iopte_t *page_table; |
| 68 |
+ unsigned long strbuf_regs; |
| 69 |
+ unsigned long iommu_regs; |
| 70 |
+ unsigned long sbus_control_reg; |
| 71 |
|
| 72 |
- /* If NCLUSTERS is ever decresed to 4 or lower, |
| 73 |
- * you must increase the size of the type of |
| 74 |
- * these counters. You have been duly warned. -DaveM |
| 75 |
- */ |
| 76 |
-/*0x30*/struct { |
| 77 |
- u16 next; |
| 78 |
- u16 flush; |
| 79 |
- } alloc_info[NCLUSTERS]; |
| 80 |
- |
| 81 |
- /* The lowest used consistent mapping entry. Since |
| 82 |
- * we allocate consistent maps out of cluster 0 this |
| 83 |
- * is relative to the beginning of closter 0. |
| 84 |
- */ |
| 85 |
-/*0x50*/u32 lowest_consistent_map; |
| 86 |
+ volatile unsigned long strbuf_flushflag; |
| 87 |
}; |
| 88 |
|
| 89 |
/* Offsets from iommu_regs */ |
| 90 |
@@ -91,19 +68,6 @@ static void __iommu_flushall(struct sbus_iommu *iommu) |
| 91 |
tag += 8UL; |
| 92 |
} |
| 93 |
upa_readq(iommu->sbus_control_reg); |
| 94 |
- |
| 95 |
- for (entry = 0; entry < NCLUSTERS; entry++) { |
| 96 |
- iommu->alloc_info[entry].flush = |
| 97 |
- iommu->alloc_info[entry].next; |
| 98 |
- } |
| 99 |
-} |
| 100 |
- |
| 101 |
-static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages) |
| 102 |
-{ |
| 103 |
- while (npages--) |
| 104 |
- upa_writeq(base + (npages << IO_PAGE_SHIFT), |
| 105 |
- iommu->iommu_regs + IOMMU_FLUSH); |
| 106 |
- upa_readq(iommu->sbus_control_reg); |
| 107 |
} |
| 108 |
|
| 109 |
/* Offsets from strbuf_regs */ |
| 110 |
@@ -156,178 +120,115 @@ static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long |
| 111 |
base, npages); |
| 112 |
} |
| 113 |
|
| 114 |
-static iopte_t *alloc_streaming_cluster(struct sbus_iommu *iommu, unsigned long npages) |
| 115 |
+/* Based largely upon the ppc64 iommu allocator. */ |
| 116 |
+static long sbus_arena_alloc(struct sbus_iommu *iommu, unsigned long npages) |
| 117 |
{ |
| 118 |
- iopte_t *iopte, *limit, *first, *cluster; |
| 119 |
- unsigned long cnum, ent, nent, flush_point, found; |
| 120 |
- |
| 121 |
- cnum = 0; |
| 122 |
- nent = 1; |
| 123 |
- while ((1UL << cnum) < npages) |
| 124 |
- cnum++; |
| 125 |
- if(cnum >= NCLUSTERS) { |
| 126 |
- nent = 1UL << (cnum - NCLUSTERS); |
| 127 |
- cnum = NCLUSTERS - 1; |
| 128 |
- } |
| 129 |
- iopte = iommu->page_table + (cnum * CLUSTER_NPAGES); |
| 130 |
- |
| 131 |
- if (cnum == 0) |
| 132 |
- limit = (iommu->page_table + |
| 133 |
- iommu->lowest_consistent_map); |
| 134 |
- else |
| 135 |
- limit = (iopte + CLUSTER_NPAGES); |
| 136 |
- |
| 137 |
- iopte += ((ent = iommu->alloc_info[cnum].next) << cnum); |
| 138 |
- flush_point = iommu->alloc_info[cnum].flush; |
| 139 |
- |
| 140 |
- first = iopte; |
| 141 |
- cluster = NULL; |
| 142 |
- found = 0; |
| 143 |
- for (;;) { |
| 144 |
- if (iopte_val(*iopte) == 0UL) { |
| 145 |
- found++; |
| 146 |
- if (!cluster) |
| 147 |
- cluster = iopte; |
| 148 |
+ struct sbus_iommu_arena *arena = &iommu->arena; |
| 149 |
+ unsigned long n, i, start, end, limit; |
| 150 |
+ int pass; |
| 151 |
+ |
| 152 |
+ limit = arena->limit; |
| 153 |
+ start = arena->hint; |
| 154 |
+ pass = 0; |
| 155 |
+ |
| 156 |
+again: |
| 157 |
+ n = find_next_zero_bit(arena->map, limit, start); |
| 158 |
+ end = n + npages; |
| 159 |
+ if (unlikely(end >= limit)) { |
| 160 |
+ if (likely(pass < 1)) { |
| 161 |
+ limit = start; |
| 162 |
+ start = 0; |
| 163 |
+ __iommu_flushall(iommu); |
| 164 |
+ pass++; |
| 165 |
+ goto again; |
| 166 |
} else { |
| 167 |
- /* Used cluster in the way */ |
| 168 |
- cluster = NULL; |
| 169 |
- found = 0; |
| 170 |
+ /* Scanned the whole thing, give up. */ |
| 171 |
+ return -1; |
| 172 |
} |
| 173 |
+ } |
| 174 |
|
| 175 |
- if (found == nent) |
| 176 |
- break; |
| 177 |
- |
| 178 |
- iopte += (1 << cnum); |
| 179 |
- ent++; |
| 180 |
- if (iopte >= limit) { |
| 181 |
- iopte = (iommu->page_table + (cnum * CLUSTER_NPAGES)); |
| 182 |
- ent = 0; |
| 183 |
- |
| 184 |
- /* Multiple cluster allocations must not wrap */ |
| 185 |
- cluster = NULL; |
| 186 |
- found = 0; |
| 187 |
+ for (i = n; i < end; i++) { |
| 188 |
+ if (test_bit(i, arena->map)) { |
| 189 |
+ start = i + 1; |
| 190 |
+ goto again; |
| 191 |
} |
| 192 |
- if (ent == flush_point) |
| 193 |
- __iommu_flushall(iommu); |
| 194 |
- if (iopte == first) |
| 195 |
- goto bad; |
| 196 |
} |
| 197 |
|
| 198 |
- /* ent/iopte points to the last cluster entry we're going to use, |
| 199 |
- * so save our place for the next allocation. |
| 200 |
- */ |
| 201 |
- if ((iopte + (1 << cnum)) >= limit) |
| 202 |
- ent = 0; |
| 203 |
- else |
| 204 |
- ent = ent + 1; |
| 205 |
- iommu->alloc_info[cnum].next = ent; |
| 206 |
- if (ent == flush_point) |
| 207 |
- __iommu_flushall(iommu); |
| 208 |
- |
| 209 |
- /* I've got your streaming cluster right here buddy boy... */ |
| 210 |
- return cluster; |
| 211 |
- |
| 212 |
-bad: |
| 213 |
- printk(KERN_EMERG "sbus: alloc_streaming_cluster of npages(%ld) failed!\n", |
| 214 |
- npages); |
| 215 |
- return NULL; |
| 216 |
+ for (i = n; i < end; i++) |
| 217 |
+ __set_bit(i, arena->map); |
| 218 |
+ |
| 219 |
+ arena->hint = end; |
| 220 |
+ |
| 221 |
+ return n; |
| 222 |
} |
| 223 |
|
| 224 |
-static void free_streaming_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) |
| 225 |
+static void sbus_arena_free(struct sbus_iommu_arena *arena, unsigned long base, unsigned long npages) |
| 226 |
{ |
| 227 |
- unsigned long cnum, ent, nent; |
| 228 |
- iopte_t *iopte; |
| 229 |
+ unsigned long i; |
| 230 |
|
| 231 |
- cnum = 0; |
| 232 |
- nent = 1; |
| 233 |
- while ((1UL << cnum) < npages) |
| 234 |
- cnum++; |
| 235 |
- if(cnum >= NCLUSTERS) { |
| 236 |
- nent = 1UL << (cnum - NCLUSTERS); |
| 237 |
- cnum = NCLUSTERS - 1; |
| 238 |
- } |
| 239 |
- ent = (base & CLUSTER_MASK) >> (IO_PAGE_SHIFT + cnum); |
| 240 |
- iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT); |
| 241 |
- do { |
| 242 |
- iopte_val(*iopte) = 0UL; |
| 243 |
- iopte += 1 << cnum; |
| 244 |
- } while(--nent); |
| 245 |
- |
| 246 |
- /* If the global flush might not have caught this entry, |
| 247 |
- * adjust the flush point such that we will flush before |
| 248 |
- * ever trying to reuse it. |
| 249 |
- */ |
| 250 |
-#define between(X,Y,Z) (((Z) - (Y)) >= ((X) - (Y))) |
| 251 |
- if (between(ent, iommu->alloc_info[cnum].next, iommu->alloc_info[cnum].flush)) |
| 252 |
- iommu->alloc_info[cnum].flush = ent; |
| 253 |
-#undef between |
| 254 |
+ for (i = base; i < (base + npages); i++) |
| 255 |
+ __clear_bit(i, arena->map); |
| 256 |
} |
| 257 |
|
| 258 |
-/* We allocate consistent mappings from the end of cluster zero. */ |
| 259 |
-static iopte_t *alloc_consistent_cluster(struct sbus_iommu *iommu, unsigned long npages) |
| 260 |
+static void sbus_iommu_table_init(struct sbus_iommu *iommu, unsigned int tsbsize) |
| 261 |
{ |
| 262 |
- iopte_t *iopte; |
| 263 |
+ unsigned long tsbbase, order, sz, num_tsb_entries; |
| 264 |
|
| 265 |
- iopte = iommu->page_table + (1 * CLUSTER_NPAGES); |
| 266 |
- while (iopte > iommu->page_table) { |
| 267 |
- iopte--; |
| 268 |
- if (!(iopte_val(*iopte) & IOPTE_VALID)) { |
| 269 |
- unsigned long tmp = npages; |
| 270 |
+ num_tsb_entries = tsbsize / sizeof(iopte_t); |
| 271 |
|
| 272 |
- while (--tmp) { |
| 273 |
- iopte--; |
| 274 |
- if (iopte_val(*iopte) & IOPTE_VALID) |
| 275 |
- break; |
| 276 |
- } |
| 277 |
- if (tmp == 0) { |
| 278 |
- u32 entry = (iopte - iommu->page_table); |
| 279 |
+ /* Setup initial software IOMMU state. */ |
| 280 |
+ spin_lock_init(&iommu->lock); |
| 281 |
|
| 282 |
- if (entry < iommu->lowest_consistent_map) |
| 283 |
- iommu->lowest_consistent_map = entry; |
| 284 |
- return iopte; |
| 285 |
- } |
| 286 |
- } |
| 287 |
+ /* Allocate and initialize the free area map. */ |
| 288 |
+ sz = num_tsb_entries / 8; |
| 289 |
+ sz = (sz + 7UL) & ~7UL; |
| 290 |
+ iommu->arena.map = kzalloc(sz, GFP_KERNEL); |
| 291 |
+ if (!iommu->arena.map) { |
| 292 |
+ prom_printf("PCI_IOMMU: Error, kmalloc(arena.map) failed.\n"); |
| 293 |
+ prom_halt(); |
| 294 |
+ } |
| 295 |
+ iommu->arena.limit = num_tsb_entries; |
| 296 |
+ |
| 297 |
+ /* Now allocate and setup the IOMMU page table itself. */ |
| 298 |
+ order = get_order(tsbsize); |
| 299 |
+ tsbbase = __get_free_pages(GFP_KERNEL, order); |
| 300 |
+ if (!tsbbase) { |
| 301 |
+ prom_printf("IOMMU: Error, gfp(tsb) failed.\n"); |
| 302 |
+ prom_halt(); |
| 303 |
} |
| 304 |
- return NULL; |
| 305 |
+ iommu->page_table = (iopte_t *)tsbbase; |
| 306 |
+ memset(iommu->page_table, 0, tsbsize); |
| 307 |
} |
| 308 |
|
| 309 |
-static void free_consistent_cluster(struct sbus_iommu *iommu, u32 base, unsigned long npages) |
| 310 |
+static inline iopte_t *alloc_npages(struct sbus_iommu *iommu, unsigned long npages) |
| 311 |
{ |
| 312 |
- iopte_t *iopte = iommu->page_table + ((base - MAP_BASE) >> IO_PAGE_SHIFT); |
| 313 |
+ long entry; |
| 314 |
|
| 315 |
- if ((iopte - iommu->page_table) == iommu->lowest_consistent_map) { |
| 316 |
- iopte_t *walk = iopte + npages; |
| 317 |
- iopte_t *limit; |
| 318 |
+ entry = sbus_arena_alloc(iommu, npages); |
| 319 |
+ if (unlikely(entry < 0)) |
| 320 |
+ return NULL; |
| 321 |
|
| 322 |
- limit = iommu->page_table + CLUSTER_NPAGES; |
| 323 |
- while (walk < limit) { |
| 324 |
- if (iopte_val(*walk) != 0UL) |
| 325 |
- break; |
| 326 |
- walk++; |
| 327 |
- } |
| 328 |
- iommu->lowest_consistent_map = |
| 329 |
- (walk - iommu->page_table); |
| 330 |
- } |
| 331 |
+ return iommu->page_table + entry; |
| 332 |
+} |
| 333 |
|
| 334 |
- while (npages--) |
| 335 |
- *iopte++ = __iopte(0UL); |
| 336 |
+static inline void free_npages(struct sbus_iommu *iommu, dma_addr_t base, unsigned long npages) |
| 337 |
+{ |
| 338 |
+ sbus_arena_free(&iommu->arena, base >> IO_PAGE_SHIFT, npages); |
| 339 |
} |
| 340 |
|
| 341 |
void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma_addr) |
| 342 |
{ |
| 343 |
- unsigned long order, first_page, flags; |
| 344 |
struct sbus_iommu *iommu; |
| 345 |
iopte_t *iopte; |
| 346 |
+ unsigned long flags, order, first_page; |
| 347 |
void *ret; |
| 348 |
int npages; |
| 349 |
|
| 350 |
- if (size <= 0 || sdev == NULL || dvma_addr == NULL) |
| 351 |
- return NULL; |
| 352 |
- |
| 353 |
size = IO_PAGE_ALIGN(size); |
| 354 |
order = get_order(size); |
| 355 |
if (order >= 10) |
| 356 |
return NULL; |
| 357 |
+ |
| 358 |
first_page = __get_free_pages(GFP_KERNEL|__GFP_COMP, order); |
| 359 |
if (first_page == 0UL) |
| 360 |
return NULL; |
| 361 |
@@ -336,108 +237,121 @@ void *sbus_alloc_consistent(struct sbus_dev *sdev, size_t size, dma_addr_t *dvma |
| 362 |
iommu = sdev->bus->iommu; |
| 363 |
|
| 364 |
spin_lock_irqsave(&iommu->lock, flags); |
| 365 |
- iopte = alloc_consistent_cluster(iommu, size >> IO_PAGE_SHIFT); |
| 366 |
- if (iopte == NULL) { |
| 367 |
- spin_unlock_irqrestore(&iommu->lock, flags); |
| 368 |
+ iopte = alloc_npages(iommu, size >> IO_PAGE_SHIFT); |
| 369 |
+ spin_unlock_irqrestore(&iommu->lock, flags); |
| 370 |
+ |
| 371 |
+ if (unlikely(iopte == NULL)) { |
| 372 |
free_pages(first_page, order); |
| 373 |
return NULL; |
| 374 |
} |
| 375 |
|
| 376 |
- /* Ok, we're committed at this point. */ |
| 377 |
- *dvma_addr = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); |
| 378 |
+ *dvma_addr = (MAP_BASE + |
| 379 |
+ ((iopte - iommu->page_table) << IO_PAGE_SHIFT)); |
| 380 |
ret = (void *) first_page; |
| 381 |
npages = size >> IO_PAGE_SHIFT; |
| 382 |
+ first_page = __pa(first_page); |
| 383 |
while (npages--) { |
| 384 |
- *iopte++ = __iopte(IOPTE_VALID | IOPTE_CACHE | IOPTE_WRITE | |
| 385 |
- (__pa(first_page) & IOPTE_PAGE)); |
| 386 |
+ iopte_val(*iopte) = (IOPTE_VALID | IOPTE_CACHE | |
| 387 |
+ IOPTE_WRITE | |
| 388 |
+ (first_page & IOPTE_PAGE)); |
| 389 |
+ iopte++; |
| 390 |
first_page += IO_PAGE_SIZE; |
| 391 |
} |
| 392 |
- iommu_flush(iommu, *dvma_addr, size >> IO_PAGE_SHIFT); |
| 393 |
- spin_unlock_irqrestore(&iommu->lock, flags); |
| 394 |
|
| 395 |
return ret; |
| 396 |
} |
| 397 |
|
| 398 |
void sbus_free_consistent(struct sbus_dev *sdev, size_t size, void *cpu, dma_addr_t dvma) |
| 399 |
{ |
| 400 |
- unsigned long order, npages; |
| 401 |
struct sbus_iommu *iommu; |
| 402 |
- |
| 403 |
- if (size <= 0 || sdev == NULL || cpu == NULL) |
| 404 |
- return; |
| 405 |
+ iopte_t *iopte; |
| 406 |
+ unsigned long flags, order, npages; |
| 407 |
|
| 408 |
npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; |
| 409 |
iommu = sdev->bus->iommu; |
| 410 |
+ iopte = iommu->page_table + |
| 411 |
+ ((dvma - MAP_BASE) >> IO_PAGE_SHIFT); |
| 412 |
+ |
| 413 |
+ spin_lock_irqsave(&iommu->lock, flags); |
| 414 |
+ |
| 415 |
+ free_npages(iommu, dvma - MAP_BASE, npages); |
| 416 |
|
| 417 |
- spin_lock_irq(&iommu->lock); |
| 418 |
- free_consistent_cluster(iommu, dvma, npages); |
| 419 |
- iommu_flush(iommu, dvma, npages); |
| 420 |
- spin_unlock_irq(&iommu->lock); |
| 421 |
+ spin_unlock_irqrestore(&iommu->lock, flags); |
| 422 |
|
| 423 |
order = get_order(size); |
| 424 |
if (order < 10) |
| 425 |
free_pages((unsigned long)cpu, order); |
| 426 |
} |
| 427 |
|
| 428 |
-dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t size, int dir) |
| 429 |
+dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *ptr, size_t sz, int direction) |
| 430 |
{ |
| 431 |
- struct sbus_iommu *iommu = sdev->bus->iommu; |
| 432 |
- unsigned long npages, pbase, flags; |
| 433 |
- iopte_t *iopte; |
| 434 |
- u32 dma_base, offset; |
| 435 |
- unsigned long iopte_bits; |
| 436 |
+ struct sbus_iommu *iommu; |
| 437 |
+ iopte_t *base; |
| 438 |
+ unsigned long flags, npages, oaddr; |
| 439 |
+ unsigned long i, base_paddr; |
| 440 |
+ u32 bus_addr, ret; |
| 441 |
+ unsigned long iopte_protection; |
| 442 |
+ |
| 443 |
+ iommu = sdev->bus->iommu; |
| 444 |
|
| 445 |
- if (dir == SBUS_DMA_NONE) |
| 446 |
+ if (unlikely(direction == SBUS_DMA_NONE)) |
| 447 |
BUG(); |
| 448 |
|
| 449 |
- pbase = (unsigned long) ptr; |
| 450 |
- offset = (u32) (pbase & ~IO_PAGE_MASK); |
| 451 |
- size = (IO_PAGE_ALIGN(pbase + size) - (pbase & IO_PAGE_MASK)); |
| 452 |
- pbase = (unsigned long) __pa(pbase & IO_PAGE_MASK); |
| 453 |
+ oaddr = (unsigned long)ptr; |
| 454 |
+ npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); |
| 455 |
+ npages >>= IO_PAGE_SHIFT; |
| 456 |
|
| 457 |
spin_lock_irqsave(&iommu->lock, flags); |
| 458 |
- npages = size >> IO_PAGE_SHIFT; |
| 459 |
- iopte = alloc_streaming_cluster(iommu, npages); |
| 460 |
- if (iopte == NULL) |
| 461 |
- goto bad; |
| 462 |
- dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); |
| 463 |
- npages = size >> IO_PAGE_SHIFT; |
| 464 |
- iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; |
| 465 |
- if (dir != SBUS_DMA_TODEVICE) |
| 466 |
- iopte_bits |= IOPTE_WRITE; |
| 467 |
- while (npages--) { |
| 468 |
- *iopte++ = __iopte(iopte_bits | (pbase & IOPTE_PAGE)); |
| 469 |
- pbase += IO_PAGE_SIZE; |
| 470 |
- } |
| 471 |
- npages = size >> IO_PAGE_SHIFT; |
| 472 |
+ base = alloc_npages(iommu, npages); |
| 473 |
spin_unlock_irqrestore(&iommu->lock, flags); |
| 474 |
|
| 475 |
- return (dma_base | offset); |
| 476 |
+ if (unlikely(!base)) |
| 477 |
+ BUG(); |
| 478 |
|
| 479 |
-bad: |
| 480 |
- spin_unlock_irqrestore(&iommu->lock, flags); |
| 481 |
- BUG(); |
| 482 |
- return 0; |
| 483 |
+ bus_addr = (MAP_BASE + |
| 484 |
+ ((base - iommu->page_table) << IO_PAGE_SHIFT)); |
| 485 |
+ ret = bus_addr | (oaddr & ~IO_PAGE_MASK); |
| 486 |
+ base_paddr = __pa(oaddr & IO_PAGE_MASK); |
| 487 |
+ |
| 488 |
+ iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; |
| 489 |
+ if (direction != SBUS_DMA_TODEVICE) |
| 490 |
+ iopte_protection |= IOPTE_WRITE; |
| 491 |
+ |
| 492 |
+ for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE) |
| 493 |
+ iopte_val(*base) = iopte_protection | base_paddr; |
| 494 |
+ |
| 495 |
+ return ret; |
| 496 |
} |
| 497 |
|
| 498 |
-void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size, int direction) |
| 499 |
+void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction) |
| 500 |
{ |
| 501 |
struct sbus_iommu *iommu = sdev->bus->iommu; |
| 502 |
- u32 dma_base = dma_addr & IO_PAGE_MASK; |
| 503 |
- unsigned long flags; |
| 504 |
+ iopte_t *base; |
| 505 |
+ unsigned long flags, npages, i; |
| 506 |
+ |
| 507 |
+ if (unlikely(direction == SBUS_DMA_NONE)) |
| 508 |
+ BUG(); |
| 509 |
+ |
| 510 |
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); |
| 511 |
+ npages >>= IO_PAGE_SHIFT; |
| 512 |
+ base = iommu->page_table + |
| 513 |
+ ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT); |
| 514 |
|
| 515 |
- size = (IO_PAGE_ALIGN(dma_addr + size) - dma_base); |
| 516 |
+ bus_addr &= IO_PAGE_MASK; |
| 517 |
|
| 518 |
spin_lock_irqsave(&iommu->lock, flags); |
| 519 |
- free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT); |
| 520 |
- sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction); |
| 521 |
+ sbus_strbuf_flush(iommu, bus_addr, npages, direction); |
| 522 |
+ for (i = 0; i < npages; i++) |
| 523 |
+ iopte_val(base[i]) = 0UL; |
| 524 |
+ free_npages(iommu, bus_addr - MAP_BASE, npages); |
| 525 |
spin_unlock_irqrestore(&iommu->lock, flags); |
| 526 |
} |
| 527 |
|
| 528 |
#define SG_ENT_PHYS_ADDRESS(SG) \ |
| 529 |
(__pa(page_address((SG)->page)) + (SG)->offset) |
| 530 |
|
| 531 |
-static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, unsigned long iopte_bits) |
| 532 |
+static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, |
| 533 |
+ int nused, int nelems, unsigned long iopte_protection) |
| 534 |
{ |
| 535 |
struct scatterlist *dma_sg = sg; |
| 536 |
struct scatterlist *sg_end = sg + nelems; |
| 537 |
@@ -462,7 +376,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, in |
| 538 |
for (;;) { |
| 539 |
unsigned long tmp; |
| 540 |
|
| 541 |
- tmp = (unsigned long) SG_ENT_PHYS_ADDRESS(sg); |
| 542 |
+ tmp = SG_ENT_PHYS_ADDRESS(sg); |
| 543 |
len = sg->length; |
| 544 |
if (((tmp ^ pteval) >> IO_PAGE_SHIFT) != 0UL) { |
| 545 |
pteval = tmp & IO_PAGE_MASK; |
| 546 |
@@ -478,7 +392,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, in |
| 547 |
sg++; |
| 548 |
} |
| 549 |
|
| 550 |
- pteval = ((pteval & IOPTE_PAGE) | iopte_bits); |
| 551 |
+ pteval = iopte_protection | (pteval & IOPTE_PAGE); |
| 552 |
while (len > 0) { |
| 553 |
*iopte++ = __iopte(pteval); |
| 554 |
pteval += IO_PAGE_SIZE; |
| 555 |
@@ -509,103 +423,111 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, in |
| 556 |
} |
| 557 |
} |
| 558 |
|
| 559 |
-int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int dir) |
| 560 |
+int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction) |
| 561 |
{ |
| 562 |
- struct sbus_iommu *iommu = sdev->bus->iommu; |
| 563 |
- unsigned long flags, npages; |
| 564 |
- iopte_t *iopte; |
| 565 |
+ struct sbus_iommu *iommu; |
| 566 |
+ unsigned long flags, npages, iopte_protection; |
| 567 |
+ iopte_t *base; |
| 568 |
u32 dma_base; |
| 569 |
struct scatterlist *sgtmp; |
| 570 |
int used; |
| 571 |
- unsigned long iopte_bits; |
| 572 |
- |
| 573 |
- if (dir == SBUS_DMA_NONE) |
| 574 |
- BUG(); |
| 575 |
|
| 576 |
/* Fast path single entry scatterlists. */ |
| 577 |
- if (nents == 1) { |
| 578 |
- sg->dma_address = |
| 579 |
+ if (nelems == 1) { |
| 580 |
+ sglist->dma_address = |
| 581 |
sbus_map_single(sdev, |
| 582 |
- (page_address(sg->page) + sg->offset), |
| 583 |
- sg->length, dir); |
| 584 |
- sg->dma_length = sg->length; |
| 585 |
+ (page_address(sglist->page) + sglist->offset), |
| 586 |
+ sglist->length, direction); |
| 587 |
+ sglist->dma_length = sglist->length; |
| 588 |
return 1; |
| 589 |
} |
| 590 |
|
| 591 |
- npages = prepare_sg(sg, nents); |
| 592 |
+ iommu = sdev->bus->iommu; |
| 593 |
+ |
| 594 |
+ if (unlikely(direction == SBUS_DMA_NONE)) |
| 595 |
+ BUG(); |
| 596 |
+ |
| 597 |
+ npages = prepare_sg(sglist, nelems); |
| 598 |
|
| 599 |
spin_lock_irqsave(&iommu->lock, flags); |
| 600 |
- iopte = alloc_streaming_cluster(iommu, npages); |
| 601 |
- if (iopte == NULL) |
| 602 |
- goto bad; |
| 603 |
- dma_base = MAP_BASE + ((iopte - iommu->page_table) << IO_PAGE_SHIFT); |
| 604 |
+ base = alloc_npages(iommu, npages); |
| 605 |
+ spin_unlock_irqrestore(&iommu->lock, flags); |
| 606 |
+ |
| 607 |
+ if (unlikely(base == NULL)) |
| 608 |
+ BUG(); |
| 609 |
+ |
| 610 |
+ dma_base = MAP_BASE + |
| 611 |
+ ((base - iommu->page_table) << IO_PAGE_SHIFT); |
| 612 |
|
| 613 |
/* Normalize DVMA addresses. */ |
| 614 |
- sgtmp = sg; |
| 615 |
- used = nents; |
| 616 |
+ used = nelems; |
| 617 |
|
| 618 |
+ sgtmp = sglist; |
| 619 |
while (used && sgtmp->dma_length) { |
| 620 |
sgtmp->dma_address += dma_base; |
| 621 |
sgtmp++; |
| 622 |
used--; |
| 623 |
} |
| 624 |
- used = nents - used; |
| 625 |
+ used = nelems - used; |
| 626 |
|
| 627 |
- iopte_bits = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; |
| 628 |
- if (dir != SBUS_DMA_TODEVICE) |
| 629 |
- iopte_bits |= IOPTE_WRITE; |
| 630 |
+ iopte_protection = IOPTE_VALID | IOPTE_STBUF | IOPTE_CACHE; |
| 631 |
+ if (direction != SBUS_DMA_TODEVICE) |
| 632 |
+ iopte_protection |= IOPTE_WRITE; |
| 633 |
+ |
| 634 |
+ fill_sg(base, sglist, used, nelems, iopte_protection); |
| 635 |
|
| 636 |
- fill_sg(iopte, sg, used, nents, iopte_bits); |
| 637 |
#ifdef VERIFY_SG |
| 638 |
- verify_sglist(sg, nents, iopte, npages); |
| 639 |
+ verify_sglist(sglist, nelems, base, npages); |
| 640 |
#endif |
| 641 |
- spin_unlock_irqrestore(&iommu->lock, flags); |
| 642 |
|
| 643 |
return used; |
| 644 |
- |
| 645 |
-bad: |
| 646 |
- spin_unlock_irqrestore(&iommu->lock, flags); |
| 647 |
- BUG(); |
| 648 |
- return 0; |
| 649 |
} |
| 650 |
|
| 651 |
-void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction) |
| 652 |
+void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction) |
| 653 |
{ |
| 654 |
- unsigned long size, flags; |
| 655 |
struct sbus_iommu *iommu; |
| 656 |
- u32 dvma_base; |
| 657 |
- int i; |
| 658 |
+ iopte_t *base; |
| 659 |
+ unsigned long flags, i, npages; |
| 660 |
+ u32 bus_addr; |
| 661 |
|
| 662 |
- /* Fast path single entry scatterlists. */ |
| 663 |
- if (nents == 1) { |
| 664 |
- sbus_unmap_single(sdev, sg->dma_address, sg->dma_length, direction); |
| 665 |
- return; |
| 666 |
- } |
| 667 |
+ if (unlikely(direction == SBUS_DMA_NONE)) |
| 668 |
+ BUG(); |
| 669 |
+ |
| 670 |
+ iommu = sdev->bus->iommu; |
| 671 |
+ |
| 672 |
+ bus_addr = sglist->dma_address & IO_PAGE_MASK; |
| 673 |
|
| 674 |
- dvma_base = sg[0].dma_address & IO_PAGE_MASK; |
| 675 |
- for (i = 0; i < nents; i++) { |
| 676 |
- if (sg[i].dma_length == 0) |
| 677 |
+ for (i = 1; i < nelems; i++) |
| 678 |
+ if (sglist[i].dma_length == 0) |
| 679 |
break; |
| 680 |
- } |
| 681 |
i--; |
| 682 |
- size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - dvma_base; |
| 683 |
+ npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - |
| 684 |
+ bus_addr) >> IO_PAGE_SHIFT; |
| 685 |
+ |
| 686 |
+ base = iommu->page_table + |
| 687 |
+ ((bus_addr - MAP_BASE) >> IO_PAGE_SHIFT); |
| 688 |
|
| 689 |
- iommu = sdev->bus->iommu; |
| 690 |
spin_lock_irqsave(&iommu->lock, flags); |
| 691 |
- free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT); |
| 692 |
- sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction); |
| 693 |
+ sbus_strbuf_flush(iommu, bus_addr, npages, direction); |
| 694 |
+ for (i = 0; i < npages; i++) |
| 695 |
+ iopte_val(base[i]) = 0UL; |
| 696 |
+ free_npages(iommu, bus_addr - MAP_BASE, npages); |
| 697 |
spin_unlock_irqrestore(&iommu->lock, flags); |
| 698 |
} |
| 699 |
|
| 700 |
-void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t size, int direction) |
| 701 |
+void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t bus_addr, size_t sz, int direction) |
| 702 |
{ |
| 703 |
- struct sbus_iommu *iommu = sdev->bus->iommu; |
| 704 |
- unsigned long flags; |
| 705 |
+ struct sbus_iommu *iommu; |
| 706 |
+ unsigned long flags, npages; |
| 707 |
+ |
| 708 |
+ iommu = sdev->bus->iommu; |
| 709 |
|
| 710 |
- size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK)); |
| 711 |
+ npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); |
| 712 |
+ npages >>= IO_PAGE_SHIFT; |
| 713 |
+ bus_addr &= IO_PAGE_MASK; |
| 714 |
|
| 715 |
spin_lock_irqsave(&iommu->lock, flags); |
| 716 |
- sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction); |
| 717 |
+ sbus_strbuf_flush(iommu, bus_addr, npages, direction); |
| 718 |
spin_unlock_irqrestore(&iommu->lock, flags); |
| 719 |
} |
| 720 |
|
| 721 |
@@ -613,23 +535,25 @@ void sbus_dma_sync_single_for_device(struct sbus_dev *sdev, dma_addr_t base, siz |
| 722 |
{ |
| 723 |
} |
| 724 |
|
| 725 |
-void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int direction) |
| 726 |
+void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sglist, int nelems, int direction) |
| 727 |
{ |
| 728 |
- struct sbus_iommu *iommu = sdev->bus->iommu; |
| 729 |
- unsigned long flags, size; |
| 730 |
- u32 base; |
| 731 |
- int i; |
| 732 |
+ struct sbus_iommu *iommu; |
| 733 |
+ unsigned long flags, npages, i; |
| 734 |
+ u32 bus_addr; |
| 735 |
+ |
| 736 |
+ iommu = sdev->bus->iommu; |
| 737 |
|
| 738 |
- base = sg[0].dma_address & IO_PAGE_MASK; |
| 739 |
- for (i = 0; i < nents; i++) { |
| 740 |
- if (sg[i].dma_length == 0) |
| 741 |
+ bus_addr = sglist[0].dma_address & IO_PAGE_MASK; |
| 742 |
+ for (i = 0; i < nelems; i++) { |
| 743 |
+ if (!sglist[i].dma_length) |
| 744 |
break; |
| 745 |
} |
| 746 |
i--; |
| 747 |
- size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; |
| 748 |
+ npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) |
| 749 |
+ - bus_addr) >> IO_PAGE_SHIFT; |
| 750 |
|
| 751 |
spin_lock_irqsave(&iommu->lock, flags); |
| 752 |
- sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction); |
| 753 |
+ sbus_strbuf_flush(iommu, bus_addr, npages, direction); |
| 754 |
spin_unlock_irqrestore(&iommu->lock, flags); |
| 755 |
} |
| 756 |
|
| 757 |
@@ -1104,7 +1028,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) |
| 758 |
struct linux_prom64_registers *pr; |
| 759 |
struct device_node *dp; |
| 760 |
struct sbus_iommu *iommu; |
| 761 |
- unsigned long regs, tsb_base; |
| 762 |
+ unsigned long regs; |
| 763 |
u64 control; |
| 764 |
int i; |
| 765 |
|
| 766 |
@@ -1132,14 +1056,6 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) |
| 767 |
|
| 768 |
memset(iommu, 0, sizeof(*iommu)); |
| 769 |
|
| 770 |
- /* We start with no consistent mappings. */ |
| 771 |
- iommu->lowest_consistent_map = CLUSTER_NPAGES; |
| 772 |
- |
| 773 |
- for (i = 0; i < NCLUSTERS; i++) { |
| 774 |
- iommu->alloc_info[i].flush = 0; |
| 775 |
- iommu->alloc_info[i].next = 0; |
| 776 |
- } |
| 777 |
- |
| 778 |
/* Setup spinlock. */ |
| 779 |
spin_lock_init(&iommu->lock); |
| 780 |
|
| 781 |
@@ -1159,25 +1075,13 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) |
| 782 |
sbus->portid, regs); |
| 783 |
|
| 784 |
/* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ |
| 785 |
+ sbus_iommu_table_init(iommu, IO_TSB_SIZE); |
| 786 |
+ |
| 787 |
control = upa_readq(iommu->iommu_regs + IOMMU_CONTROL); |
| 788 |
control = ((7UL << 16UL) | |
| 789 |
(0UL << 2UL) | |
| 790 |
(1UL << 1UL) | |
| 791 |
(1UL << 0UL)); |
| 792 |
- |
| 793 |
- /* Using the above configuration we need 1MB iommu page |
| 794 |
- * table (128K ioptes * 8 bytes per iopte). This is |
| 795 |
- * page order 7 on UltraSparc. |
| 796 |
- */ |
| 797 |
- tsb_base = __get_free_pages(GFP_ATOMIC, get_order(IO_TSB_SIZE)); |
| 798 |
- if (tsb_base == 0UL) { |
| 799 |
- prom_printf("sbus_iommu_init: Fatal error, cannot alloc TSB table.\n"); |
| 800 |
- prom_halt(); |
| 801 |
- } |
| 802 |
- |
| 803 |
- iommu->page_table = (iopte_t *) tsb_base; |
| 804 |
- memset(iommu->page_table, 0, IO_TSB_SIZE); |
| 805 |
- |
| 806 |
upa_writeq(control, iommu->iommu_regs + IOMMU_CONTROL); |
| 807 |
|
| 808 |
/* Clean out any cruft in the IOMMU using |
| 809 |
@@ -1195,7 +1099,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) |
| 810 |
upa_readq(iommu->sbus_control_reg); |
| 811 |
|
| 812 |
/* Give the TSB to SYSIO. */ |
| 813 |
- upa_writeq(__pa(tsb_base), iommu->iommu_regs + IOMMU_TSBBASE); |
| 814 |
+ upa_writeq(__pa(iommu->page_table), iommu->iommu_regs + IOMMU_TSBBASE); |
| 815 |
|
| 816 |
/* Setup streaming buffer, DE=1 SB_EN=1 */ |
| 817 |
control = (1UL << 1UL) | (1UL << 0UL); |