/[linux-patches]/genpatches-2.6/tags/3.9-9/2710_radeon-uvd-patchset.patch
Gentoo

Contents of /genpatches-2.6/tags/3.9-9/2710_radeon-uvd-patchset.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2399 - (show annotations) (download)
Fri Jun 7 22:45:56 2013 UTC (5 years, 6 months ago) by tomwij
File size: 105407 byte(s)
3.9-9 release
1 diff -urN a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
2 --- a/drivers/gpu/drm/radeon/Makefile 2013-05-09 22:13:14.640517917 +0200
3 +++ b/drivers/gpu/drm/radeon/Makefile 2013-05-09 22:14:57.167175503 +0200
4 @@ -76,7 +76,7 @@
5 evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
6 evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
7 atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
8 - si_blit_shaders.o radeon_prime.o
9 + si_blit_shaders.o radeon_prime.o radeon_uvd.o
10
11 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
12 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
13 diff -urN a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
14 --- a/drivers/gpu/drm/radeon/atombios.h 2013-05-09 22:13:14.640517917 +0200
15 +++ b/drivers/gpu/drm/radeon/atombios.h 2013-05-09 22:14:57.227175498 +0200
16 @@ -458,6 +458,7 @@
17 union
18 {
19 ATOM_COMPUTE_CLOCK_FREQ ulClock; //Input Parameter
20 + ULONG ulClockParams; //ULONG access for BE
21 ATOM_S_MPLL_FB_DIVIDER ulFbDiv; //Output Parameter
22 };
23 UCHAR ucRefDiv; //Output Parameter
24 @@ -490,6 +491,7 @@
25 union
26 {
27 ATOM_COMPUTE_CLOCK_FREQ ulClock; //Input Parameter
28 + ULONG ulClockParams; //ULONG access for BE
29 ATOM_S_MPLL_FB_DIVIDER ulFbDiv; //Output Parameter
30 };
31 UCHAR ucRefDiv; //Output Parameter
32 diff -urN a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
33 --- a/drivers/gpu/drm/radeon/evergreen.c 2013-05-09 22:13:14.640517917 +0200
34 +++ b/drivers/gpu/drm/radeon/evergreen.c 2013-05-09 22:14:57.217175499 +0200
35 @@ -84,6 +84,223 @@
36 }
37 }
38
39 +static int sumo_set_uvd_clock(struct radeon_device *rdev, u32 clock,
40 + u32 cntl_reg, u32 status_reg)
41 +{
42 + int r, i;
43 + struct atom_clock_dividers dividers;
44 +
45 + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
46 + clock, false, &dividers);
47 + if (r)
48 + return r;
49 +
50 + WREG32_P(cntl_reg, dividers.post_div, ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK));
51 +
52 + for (i = 0; i < 100; i++) {
53 + if (RREG32(status_reg) & DCLK_STATUS)
54 + break;
55 + mdelay(10);
56 + }
57 + if (i == 100)
58 + return -ETIMEDOUT;
59 +
60 + return 0;
61 +}
62 +
63 +int sumo_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
64 +{
65 + int r = 0;
66 + u32 cg_scratch = RREG32(CG_SCRATCH1);
67 +
68 + r = sumo_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
69 + if (r)
70 + goto done;
71 + cg_scratch &= 0xffff0000;
72 + cg_scratch |= vclk / 100; /* Mhz */
73 +
74 + r = sumo_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
75 + if (r)
76 + goto done;
77 + cg_scratch &= 0x0000ffff;
78 + cg_scratch |= (dclk / 100) << 16; /* Mhz */
79 +
80 +done:
81 + WREG32(CG_SCRATCH1, cg_scratch);
82 +
83 + return r;
84 +}
85 +
86 +static int evergreen_uvd_calc_post_div(unsigned target_freq,
87 + unsigned vco_freq,
88 + unsigned *div)
89 +{
90 + /* target larger than vco frequency ? */
91 + if (vco_freq < target_freq)
92 + return -1; /* forget it */
93 +
94 + /* Fclk = Fvco / PDIV */
95 + *div = vco_freq / target_freq;
96 +
97 + /* we alway need a frequency less than or equal the target */
98 + if ((vco_freq / *div) > target_freq)
99 + *div += 1;
100 +
101 + /* dividers above 5 must be even */
102 + if (*div > 5 && *div % 2)
103 + *div += 1;
104 +
105 + /* out of range ? */
106 + if (*div >= 128)
107 + return -1; /* forget it */
108 +
109 + return vco_freq / *div;
110 +}
111 +
112 +static int evergreen_uvd_send_upll_ctlreq(struct radeon_device *rdev)
113 +{
114 + unsigned i;
115 +
116 + /* assert UPLL_CTLREQ */
117 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
118 +
119 + /* wait for CTLACK and CTLACK2 to get asserted */
120 + for (i = 0; i < 100; ++i) {
121 + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
122 + if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
123 + break;
124 + mdelay(10);
125 + }
126 + if (i == 100)
127 + return -ETIMEDOUT;
128 +
129 + /* deassert UPLL_CTLREQ */
130 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
131 +
132 + return 0;
133 +}
134 +
135 +int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
136 +{
137 + /* start off with something large */
138 + int optimal_diff_score = 0x7FFFFFF;
139 + unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
140 + unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
141 + unsigned vco_freq;
142 + int r;
143 +
144 + /* bypass vclk and dclk with bclk */
145 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
146 + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
147 + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
148 +
149 + /* put PLL in bypass mode */
150 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
151 +
152 + if (!vclk || !dclk) {
153 + /* keep the Bypass mode, put PLL to sleep */
154 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
155 + return 0;
156 + }
157 +
158 + /* loop through vco from low to high */
159 + for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
160 + unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
161 + int calc_clk, diff_score, diff_vclk, diff_dclk;
162 + unsigned vclk_div, dclk_div;
163 +
164 + /* fb div out of range ? */
165 + if (fb_div > 0x03FFFFFF)
166 + break; /* it can oly get worse */
167 +
168 + /* calc vclk with current vco freq. */
169 + calc_clk = evergreen_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
170 + if (calc_clk == -1)
171 + break; /* vco is too big, it has to stop. */
172 + diff_vclk = vclk - calc_clk;
173 +
174 + /* calc dclk with current vco freq. */
175 + calc_clk = evergreen_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
176 + if (calc_clk == -1)
177 + break; /* vco is too big, it has to stop. */
178 + diff_dclk = dclk - calc_clk;
179 +
180 + /* determine if this vco setting is better than current optimal settings */
181 + diff_score = abs(diff_vclk) + abs(diff_dclk);
182 + if (diff_score < optimal_diff_score) {
183 + optimal_fb_div = fb_div;
184 + optimal_vclk_div = vclk_div;
185 + optimal_dclk_div = dclk_div;
186 + optimal_vco_freq = vco_freq;
187 + optimal_diff_score = diff_score;
188 + if (optimal_diff_score == 0)
189 + break; /* it can't get better than this */
190 + }
191 + }
192 +
193 + /* set VCO_MODE to 1 */
194 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
195 +
196 + /* toggle UPLL_SLEEP to 1 then back to 0 */
197 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
198 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
199 +
200 + /* deassert UPLL_RESET */
201 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
202 +
203 + mdelay(1);
204 +
205 + r = evergreen_uvd_send_upll_ctlreq(rdev);
206 + if (r)
207 + return r;
208 +
209 + /* assert UPLL_RESET again */
210 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
211 +
212 + /* disable spread spectrum. */
213 + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
214 +
215 + /* set feedback divider */
216 + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
217 +
218 + /* set ref divider to 0 */
219 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
220 +
221 + if (optimal_vco_freq < 187500)
222 + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
223 + else
224 + WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
225 +
226 + /* set PDIV_A and PDIV_B */
227 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
228 + UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
229 + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
230 +
231 + /* give the PLL some time to settle */
232 + mdelay(15);
233 +
234 + /* deassert PLL_RESET */
235 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
236 +
237 + mdelay(15);
238 +
239 + /* switch from bypass mode to normal mode */
240 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
241 +
242 + r = evergreen_uvd_send_upll_ctlreq(rdev);
243 + if (r)
244 + return r;
245 +
246 + /* switch VCLK and DCLK selection */
247 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
248 + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
249 + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
250 +
251 + mdelay(100);
252 +
253 + return 0;
254 +}
255 +
256 void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
257 {
258 u16 ctl, v;
259 @@ -2058,6 +2275,9 @@
260 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
261 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
262 WREG32(DMA_TILING_CONFIG, gb_addr_config);
263 + WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
264 + WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
265 + WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
266
267 if ((rdev->config.evergreen.max_backends == 1) &&
268 (rdev->flags & RADEON_IS_IGP)) {
269 @@ -3360,6 +3580,9 @@
270 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
271 break;
272 }
273 + case 124: /* UVD */
274 + DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
275 + radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
276 break;
277 case 146:
278 case 147:
279 @@ -3571,7 +3794,7 @@
280
281 static int evergreen_startup(struct radeon_device *rdev)
282 {
283 - struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
284 + struct radeon_ring *ring;
285 int r;
286
287 /* enable pcie gen2 link */
288 @@ -3638,6 +3861,17 @@
289 return r;
290 }
291
292 + r = rv770_uvd_resume(rdev);
293 + if (!r) {
294 + r = radeon_fence_driver_start_ring(rdev,
295 + R600_RING_TYPE_UVD_INDEX);
296 + if (r)
297 + dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
298 + }
299 +
300 + if (r)
301 + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
302 +
303 /* Enable IRQ */
304 r = r600_irq_init(rdev);
305 if (r) {
306 @@ -3647,6 +3881,7 @@
307 }
308 evergreen_irq_set(rdev);
309
310 + ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
311 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
312 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
313 0, 0xfffff, RADEON_CP_PACKET2);
314 @@ -3670,6 +3905,19 @@
315 if (r)
316 return r;
317
318 + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
319 + if (ring->ring_size) {
320 + r = radeon_ring_init(rdev, ring, ring->ring_size,
321 + R600_WB_UVD_RPTR_OFFSET,
322 + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
323 + 0, 0xfffff, RADEON_CP_PACKET2);
324 + if (!r)
325 + r = r600_uvd_init(rdev);
326 +
327 + if (r)
328 + DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
329 + }
330 +
331 r = radeon_ib_pool_init(rdev);
332 if (r) {
333 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
334 @@ -3716,8 +3964,10 @@
335 int evergreen_suspend(struct radeon_device *rdev)
336 {
337 r600_audio_fini(rdev);
338 + radeon_uvd_suspend(rdev);
339 r700_cp_stop(rdev);
340 r600_dma_stop(rdev);
341 + r600_uvd_rbc_stop(rdev);
342 evergreen_irq_suspend(rdev);
343 radeon_wb_disable(rdev);
344 evergreen_pcie_gart_disable(rdev);
345 @@ -3797,6 +4047,13 @@
346 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
347 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
348
349 + r = radeon_uvd_init(rdev);
350 + if (!r) {
351 + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
352 + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
353 + 4096);
354 + }
355 +
356 rdev->ih.ring_obj = NULL;
357 r600_ih_ring_init(rdev, 64 * 1024);
358
359 @@ -3843,6 +4100,7 @@
360 radeon_ib_pool_fini(rdev);
361 radeon_irq_kms_fini(rdev);
362 evergreen_pcie_gart_fini(rdev);
363 + radeon_uvd_fini(rdev);
364 r600_vram_scratch_fini(rdev);
365 radeon_gem_fini(rdev);
366 radeon_fence_driver_fini(rdev);
367 diff -urN a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
368 --- a/drivers/gpu/drm/radeon/evergreend.h 2013-05-09 22:13:14.643851250 +0200
369 +++ b/drivers/gpu/drm/radeon/evergreend.h 2013-05-09 22:14:57.210508833 +0200
370 @@ -53,6 +53,43 @@
371 #define RCU_IND_INDEX 0x100
372 #define RCU_IND_DATA 0x104
373
374 +/* discrete uvd clocks */
375 +#define CG_UPLL_FUNC_CNTL 0x718
376 +# define UPLL_RESET_MASK 0x00000001
377 +# define UPLL_SLEEP_MASK 0x00000002
378 +# define UPLL_BYPASS_EN_MASK 0x00000004
379 +# define UPLL_CTLREQ_MASK 0x00000008
380 +# define UPLL_REF_DIV_MASK 0x001F0000
381 +# define UPLL_VCO_MODE_MASK 0x00000200
382 +# define UPLL_CTLACK_MASK 0x40000000
383 +# define UPLL_CTLACK2_MASK 0x80000000
384 +#define CG_UPLL_FUNC_CNTL_2 0x71c
385 +# define UPLL_PDIV_A(x) ((x) << 0)
386 +# define UPLL_PDIV_A_MASK 0x0000007F
387 +# define UPLL_PDIV_B(x) ((x) << 8)
388 +# define UPLL_PDIV_B_MASK 0x00007F00
389 +# define VCLK_SRC_SEL(x) ((x) << 20)
390 +# define VCLK_SRC_SEL_MASK 0x01F00000
391 +# define DCLK_SRC_SEL(x) ((x) << 25)
392 +# define DCLK_SRC_SEL_MASK 0x3E000000
393 +#define CG_UPLL_FUNC_CNTL_3 0x720
394 +# define UPLL_FB_DIV(x) ((x) << 0)
395 +# define UPLL_FB_DIV_MASK 0x01FFFFFF
396 +#define CG_UPLL_FUNC_CNTL_4 0x854
397 +# define UPLL_SPARE_ISPARE9 0x00020000
398 +#define CG_UPLL_SPREAD_SPECTRUM 0x79c
399 +# define SSEN_MASK 0x00000001
400 +
401 +/* fusion uvd clocks */
402 +#define CG_DCLK_CNTL 0x610
403 +# define DCLK_DIVIDER_MASK 0x7f
404 +# define DCLK_DIR_CNTL_EN (1 << 8)
405 +#define CG_DCLK_STATUS 0x614
406 +# define DCLK_STATUS (1 << 0)
407 +#define CG_VCLK_CNTL 0x618
408 +#define CG_VCLK_STATUS 0x61c
409 +#define CG_SCRATCH1 0x820
410 +
411 #define GRBM_GFX_INDEX 0x802C
412 #define INSTANCE_INDEX(x) ((x) << 0)
413 #define SE_INDEX(x) ((x) << 16)
414 @@ -992,6 +1029,16 @@
415 # define TARGET_LINK_SPEED_MASK (0xf << 0)
416 # define SELECTABLE_DEEMPHASIS (1 << 6)
417
418 +
419 +/*
420 + * UVD
421 + */
422 +#define UVD_UDEC_ADDR_CONFIG 0xef4c
423 +#define UVD_UDEC_DB_ADDR_CONFIG 0xef50
424 +#define UVD_UDEC_DBW_ADDR_CONFIG 0xef54
425 +#define UVD_RBC_RB_RPTR 0xf690
426 +#define UVD_RBC_RB_WPTR 0xf694
427 +
428 /*
429 * PM4
430 */
431 diff -urN a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
432 --- a/drivers/gpu/drm/radeon/ni.c 2013-05-09 22:13:14.643851250 +0200
433 +++ b/drivers/gpu/drm/radeon/ni.c 2013-05-09 22:14:57.210508833 +0200
434 @@ -624,6 +624,9 @@
435 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
436 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
437 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
438 + WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
439 + WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
440 + WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
441
442 if ((rdev->config.cayman.max_backends_per_se == 1) &&
443 (rdev->flags & RADEON_IS_IGP)) {
444 @@ -931,6 +934,23 @@
445 radeon_ring_write(ring, 10); /* poll interval */
446 }
447
448 +void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
449 + struct radeon_ring *ring,
450 + struct radeon_semaphore *semaphore,
451 + bool emit_wait)
452 +{
453 + uint64_t addr = semaphore->gpu_addr;
454 +
455 + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
456 + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
457 +
458 + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
459 + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
460 +
461 + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
462 + radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
463 +}
464 +
465 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
466 {
467 if (enable)
468 @@ -1682,6 +1702,16 @@
469 return r;
470 }
471
472 + r = rv770_uvd_resume(rdev);
473 + if (!r) {
474 + r = radeon_fence_driver_start_ring(rdev,
475 + R600_RING_TYPE_UVD_INDEX);
476 + if (r)
477 + dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
478 + }
479 + if (r)
480 + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
481 +
482 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
483 if (r) {
484 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
485 @@ -1748,6 +1778,18 @@
486 if (r)
487 return r;
488
489 + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
490 + if (ring->ring_size) {
491 + r = radeon_ring_init(rdev, ring, ring->ring_size,
492 + R600_WB_UVD_RPTR_OFFSET,
493 + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
494 + 0, 0xfffff, RADEON_CP_PACKET2);
495 + if (!r)
496 + r = r600_uvd_init(rdev);
497 + if (r)
498 + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
499 + }
500 +
501 r = radeon_ib_pool_init(rdev);
502 if (r) {
503 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
504 @@ -1794,6 +1836,8 @@
505 radeon_vm_manager_fini(rdev);
506 cayman_cp_enable(rdev, false);
507 cayman_dma_stop(rdev);
508 + r600_uvd_rbc_stop(rdev);
509 + radeon_uvd_suspend(rdev);
510 evergreen_irq_suspend(rdev);
511 radeon_wb_disable(rdev);
512 cayman_pcie_gart_disable(rdev);
513 @@ -1868,6 +1912,13 @@
514 ring->ring_obj = NULL;
515 r600_ring_init(rdev, ring, 64 * 1024);
516
517 + r = radeon_uvd_init(rdev);
518 + if (!r) {
519 + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
520 + ring->ring_obj = NULL;
521 + r600_ring_init(rdev, ring, 4096);
522 + }
523 +
524 rdev->ih.ring_obj = NULL;
525 r600_ih_ring_init(rdev, 64 * 1024);
526
527 @@ -1919,6 +1970,7 @@
528 radeon_vm_manager_fini(rdev);
529 radeon_ib_pool_fini(rdev);
530 radeon_irq_kms_fini(rdev);
531 + radeon_uvd_fini(rdev);
532 cayman_pcie_gart_fini(rdev);
533 r600_vram_scratch_fini(rdev);
534 radeon_gem_fini(rdev);
535 diff -urN a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
536 --- a/drivers/gpu/drm/radeon/nid.h 2013-05-09 22:13:14.643851250 +0200
537 +++ b/drivers/gpu/drm/radeon/nid.h 2013-05-09 22:14:57.210508833 +0200
538 @@ -486,6 +486,18 @@
539 # define CACHE_FLUSH_AND_INV_EVENT (0x16 << 0)
540
541 /*
542 + * UVD
543 + */
544 +#define UVD_SEMA_ADDR_LOW 0xEF00
545 +#define UVD_SEMA_ADDR_HIGH 0xEF04
546 +#define UVD_SEMA_CMD 0xEF08
547 +#define UVD_UDEC_ADDR_CONFIG 0xEF4C
548 +#define UVD_UDEC_DB_ADDR_CONFIG 0xEF50
549 +#define UVD_UDEC_DBW_ADDR_CONFIG 0xEF54
550 +#define UVD_RBC_RB_RPTR 0xF690
551 +#define UVD_RBC_RB_WPTR 0xF694
552 +
553 +/*
554 * PM4
555 */
556 #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \
557 diff -urN a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
558 --- a/drivers/gpu/drm/radeon/r600.c 2013-05-09 22:13:14.643851250 +0200
559 +++ b/drivers/gpu/drm/radeon/r600.c 2013-05-09 22:14:57.230508831 +0200
560 @@ -2552,6 +2552,193 @@
561 }
562
563 /*
564 + * UVD
565 + */
566 +int r600_uvd_rbc_start(struct radeon_device *rdev)
567 +{
568 + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
569 + uint64_t rptr_addr;
570 + uint32_t rb_bufsz, tmp;
571 + int r;
572 +
573 + rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
574 +
575 + if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
576 + DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
577 + return -EINVAL;
578 + }
579 +
580 + /* force RBC into idle state */
581 + WREG32(UVD_RBC_RB_CNTL, 0x11010101);
582 +
583 + /* Set the write pointer delay */
584 + WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
585 +
586 + /* set the wb address */
587 + WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
588 +
589 + /* programm the 4GB memory segment for rptr and ring buffer */
590 + WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
591 + (0x7 << 16) | (0x1 << 31));
592 +
593 + /* Initialize the ring buffer's read and write pointers */
594 + WREG32(UVD_RBC_RB_RPTR, 0x0);
595 +
596 + ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
597 + WREG32(UVD_RBC_RB_WPTR, ring->wptr);
598 +
599 + /* set the ring address */
600 + WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
601 +
602 + /* Set ring buffer size */
603 + rb_bufsz = drm_order(ring->ring_size);
604 + rb_bufsz = (0x1 << 8) | rb_bufsz;
605 + WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
606 +
607 + ring->ready = true;
608 + r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
609 + if (r) {
610 + ring->ready = false;
611 + return r;
612 + }
613 +
614 + r = radeon_ring_lock(rdev, ring, 10);
615 + if (r) {
616 + DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
617 + return r;
618 + }
619 +
620 + tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
621 + radeon_ring_write(ring, tmp);
622 + radeon_ring_write(ring, 0xFFFFF);
623 +
624 + tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
625 + radeon_ring_write(ring, tmp);
626 + radeon_ring_write(ring, 0xFFFFF);
627 +
628 + tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
629 + radeon_ring_write(ring, tmp);
630 + radeon_ring_write(ring, 0xFFFFF);
631 +
632 + /* Clear timeout status bits */
633 + radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
634 + radeon_ring_write(ring, 0x8);
635 +
636 + radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
637 + radeon_ring_write(ring, 3);
638 +
639 + radeon_ring_unlock_commit(rdev, ring);
640 +
641 + return 0;
642 +}
643 +
644 +void r600_uvd_rbc_stop(struct radeon_device *rdev)
645 +{
646 + struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
647 +
648 + /* force RBC into idle state */
649 + WREG32(UVD_RBC_RB_CNTL, 0x11010101);
650 + ring->ready = false;
651 +}
652 +
653 +int r600_uvd_init(struct radeon_device *rdev)
654 +{
655 + int i, j, r;
656 +
657 + /* raise clocks while booting up the VCPU */
658 + radeon_set_uvd_clocks(rdev, 53300, 40000);
659 +
660 + /* disable clock gating */
661 + WREG32(UVD_CGC_GATE, 0);
662 +
663 + /* disable interupt */
664 + WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
665 +
666 + /* put LMI, VCPU, RBC etc... into reset */
667 + WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
668 + LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
669 + CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
670 + mdelay(5);
671 +
672 + /* take UVD block out of reset */
673 + WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
674 + mdelay(5);
675 +
676 + /* initialize UVD memory controller */
677 + WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
678 + (1 << 21) | (1 << 9) | (1 << 20));
679 +
680 + /* disable byte swapping */
681 + WREG32(UVD_LMI_SWAP_CNTL, 0);
682 + WREG32(UVD_MP_SWAP_CNTL, 0);
683 +
684 + WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
685 + WREG32(UVD_MPC_SET_MUXA1, 0x0);
686 + WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
687 + WREG32(UVD_MPC_SET_MUXB1, 0x0);
688 + WREG32(UVD_MPC_SET_ALU, 0);
689 + WREG32(UVD_MPC_SET_MUX, 0x88);
690 +
691 + /* Stall UMC */
692 + WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
693 + WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
694 +
695 + /* take all subblocks out of reset, except VCPU */
696 + WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
697 + mdelay(5);
698 +
699 + /* enable VCPU clock */
700 + WREG32(UVD_VCPU_CNTL, 1 << 9);
701 +
702 + /* enable UMC */
703 + WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
704 +
705 + /* boot up the VCPU */
706 + WREG32(UVD_SOFT_RESET, 0);
707 + mdelay(10);
708 +
709 + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
710 +
711 + for (i = 0; i < 10; ++i) {
712 + uint32_t status;
713 + for (j = 0; j < 100; ++j) {
714 + status = RREG32(UVD_STATUS);
715 + if (status & 2)
716 + break;
717 + mdelay(10);
718 + }
719 + r = 0;
720 + if (status & 2)
721 + break;
722 +
723 + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
724 + WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
725 + mdelay(10);
726 + WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
727 + mdelay(10);
728 + r = -1;
729 + }
730 +
731 + if (r) {
732 + DRM_ERROR("UVD not responding, giving up!!!\n");
733 + radeon_set_uvd_clocks(rdev, 0, 0);
734 + return r;
735 + }
736 +
737 + /* enable interupt */
738 + WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
739 +
740 + r = r600_uvd_rbc_start(rdev);
741 + if (!r)
742 + DRM_INFO("UVD initialized successfully.\n");
743 +
744 + /* lower clocks again */
745 + radeon_set_uvd_clocks(rdev, 0, 0);
746 +
747 + return r;
748 +}
749 +
750 +/*
751 * GPU scratch registers helpers function.
752 */
753 void r600_scratch_init(struct radeon_device *rdev)
754 @@ -2660,6 +2847,40 @@
755 return r;
756 }
757
758 +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
759 +{
760 + uint32_t tmp = 0;
761 + unsigned i;
762 + int r;
763 +
764 + WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
765 + r = radeon_ring_lock(rdev, ring, 3);
766 + if (r) {
767 + DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
768 + ring->idx, r);
769 + return r;
770 + }
771 + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
772 + radeon_ring_write(ring, 0xDEADBEEF);
773 + radeon_ring_unlock_commit(rdev, ring);
774 + for (i = 0; i < rdev->usec_timeout; i++) {
775 + tmp = RREG32(UVD_CONTEXT_ID);
776 + if (tmp == 0xDEADBEEF)
777 + break;
778 + DRM_UDELAY(1);
779 + }
780 +
781 + if (i < rdev->usec_timeout) {
782 + DRM_INFO("ring test on %d succeeded in %d usecs\n",
783 + ring->idx, i);
784 + } else {
785 + DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
786 + ring->idx, tmp);
787 + r = -EINVAL;
788 + }
789 + return r;
790 +}
791 +
792 /*
793 * CP fences/semaphores
794 */
795 @@ -2711,6 +2932,30 @@
796 }
797 }
798
799 +void r600_uvd_fence_emit(struct radeon_device *rdev,
800 + struct radeon_fence *fence)
801 +{
802 + struct radeon_ring *ring = &rdev->ring[fence->ring];
803 + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
804 +
805 + radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
806 + radeon_ring_write(ring, fence->seq);
807 + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
808 + radeon_ring_write(ring, addr & 0xffffffff);
809 + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
810 + radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
811 + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
812 + radeon_ring_write(ring, 0);
813 +
814 + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
815 + radeon_ring_write(ring, 0);
816 + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
817 + radeon_ring_write(ring, 0);
818 + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
819 + radeon_ring_write(ring, 2);
820 + return;
821 +}
822 +
823 void r600_semaphore_ring_emit(struct radeon_device *rdev,
824 struct radeon_ring *ring,
825 struct radeon_semaphore *semaphore,
826 @@ -2780,6 +3025,23 @@
827 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
828 }
829
830 +void r600_uvd_semaphore_emit(struct radeon_device *rdev,
831 + struct radeon_ring *ring,
832 + struct radeon_semaphore *semaphore,
833 + bool emit_wait)
834 +{
835 + uint64_t addr = semaphore->gpu_addr;
836 +
837 + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
838 + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
839 +
840 + radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
841 + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
842 +
843 + radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
844 + radeon_ring_write(ring, emit_wait ? 1 : 0);
845 +}
846 +
847 int r600_copy_blit(struct radeon_device *rdev,
848 uint64_t src_offset,
849 uint64_t dst_offset,
850 @@ -3183,6 +3445,16 @@
851 radeon_ring_write(ring, ib->length_dw);
852 }
853
854 +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
855 +{
856 + struct radeon_ring *ring = &rdev->ring[ib->ring];
857 +
858 + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
859 + radeon_ring_write(ring, ib->gpu_addr);
860 + radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
861 + radeon_ring_write(ring, ib->length_dw);
862 +}
863 +
864 int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
865 {
866 struct radeon_ib ib;
867 @@ -3300,6 +3572,41 @@
868 return r;
869 }
870
871 +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
872 +{
873 + struct radeon_fence *fence = NULL;
874 + int r;
875 +
876 + r = radeon_set_uvd_clocks(rdev, 53300, 40000);
877 + if (r) {
878 + DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
879 + return r;
880 + }
881 +
882 + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
883 + if (r) {
884 + DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
885 + goto error;
886 + }
887 +
888 + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
889 + if (r) {
890 + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
891 + goto error;
892 + }
893 +
894 + r = radeon_fence_wait(fence, false);
895 + if (r) {
896 + DRM_ERROR("radeon: fence wait failed (%d).\n", r);
897 + goto error;
898 + }
899 + DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
900 +error:
901 + radeon_fence_unref(&fence);
902 + radeon_set_uvd_clocks(rdev, 0, 0);
903 + return r;
904 +}
905 +
906 /**
907 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
908 *
909 diff -urN a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
910 --- a/drivers/gpu/drm/radeon/r600d.h 2013-05-09 22:13:14.643851250 +0200
911 +++ b/drivers/gpu/drm/radeon/r600d.h 2013-05-09 22:14:57.173842169 +0200
912 @@ -691,6 +691,7 @@
913 #define SRBM_SOFT_RESET 0xe60
914 # define SOFT_RESET_DMA (1 << 12)
915 # define SOFT_RESET_RLC (1 << 13)
916 +# define SOFT_RESET_UVD (1 << 18)
917 # define RV770_SOFT_RESET_DMA (1 << 20)
918
919 #define CP_INT_CNTL 0xc124
920 @@ -1143,6 +1144,66 @@
921 # define AFMT_AZ_AUDIO_ENABLE_CHG_ACK (1 << 30)
922
923 /*
924 + * UVD
925 + */
926 +#define UVD_SEMA_ADDR_LOW 0xef00
927 +#define UVD_SEMA_ADDR_HIGH 0xef04
928 +#define UVD_SEMA_CMD 0xef08
929 +
930 +#define UVD_GPCOM_VCPU_CMD 0xef0c
931 +#define UVD_GPCOM_VCPU_DATA0 0xef10
932 +#define UVD_GPCOM_VCPU_DATA1 0xef14
933 +#define UVD_ENGINE_CNTL 0xef18
934 +
935 +#define UVD_SEMA_CNTL 0xf400
936 +#define UVD_RB_ARB_CTRL 0xf480
937 +
938 +#define UVD_LMI_EXT40_ADDR 0xf498
939 +#define UVD_CGC_GATE 0xf4a8
940 +#define UVD_LMI_CTRL2 0xf4f4
941 +#define UVD_MASTINT_EN 0xf500
942 +#define UVD_LMI_ADDR_EXT 0xf594
943 +#define UVD_LMI_CTRL 0xf598
944 +#define UVD_LMI_SWAP_CNTL 0xf5b4
945 +#define UVD_MP_SWAP_CNTL 0xf5bC
946 +#define UVD_MPC_CNTL 0xf5dC
947 +#define UVD_MPC_SET_MUXA0 0xf5e4
948 +#define UVD_MPC_SET_MUXA1 0xf5e8
949 +#define UVD_MPC_SET_MUXB0 0xf5eC
950 +#define UVD_MPC_SET_MUXB1 0xf5f0
951 +#define UVD_MPC_SET_MUX 0xf5f4
952 +#define UVD_MPC_SET_ALU 0xf5f8
953 +
954 +#define UVD_VCPU_CNTL 0xf660
955 +#define UVD_SOFT_RESET 0xf680
956 +#define RBC_SOFT_RESET (1<<0)
957 +#define LBSI_SOFT_RESET (1<<1)
958 +#define LMI_SOFT_RESET (1<<2)
959 +#define VCPU_SOFT_RESET (1<<3)
960 +#define CSM_SOFT_RESET (1<<5)
961 +#define CXW_SOFT_RESET (1<<6)
962 +#define TAP_SOFT_RESET (1<<7)
963 +#define LMI_UMC_SOFT_RESET (1<<13)
964 +#define UVD_RBC_IB_BASE 0xf684
965 +#define UVD_RBC_IB_SIZE 0xf688
966 +#define UVD_RBC_RB_BASE 0xf68c
967 +#define UVD_RBC_RB_RPTR 0xf690
968 +#define UVD_RBC_RB_WPTR 0xf694
969 +#define UVD_RBC_RB_WPTR_CNTL 0xf698
970 +
971 +#define UVD_STATUS 0xf6bc
972 +
973 +#define UVD_SEMA_TIMEOUT_STATUS 0xf6c0
974 +#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL 0xf6c4
975 +#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL 0xf6c8
976 +#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL 0xf6cc
977 +
978 +#define UVD_RBC_RB_CNTL 0xf6a4
979 +#define UVD_RBC_RB_RPTR_ADDR 0xf6a8
980 +
981 +#define UVD_CONTEXT_ID 0xf6f4
982 +
983 +/*
984 * PM4
985 */
986 #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \
987 diff -urN a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
988 --- a/drivers/gpu/drm/radeon/radeon.h 2013-05-09 22:13:14.643851250 +0200
989 +++ b/drivers/gpu/drm/radeon/radeon.h 2013-05-09 22:14:57.220508832 +0200
990 @@ -95,6 +95,7 @@
991 extern int radeon_pcie_gen2;
992 extern int radeon_msi;
993 extern int radeon_lockup_timeout;
994 +extern int radeon_fastfb;
995
996 /*
997 * Copy from radeon_drv.h so we don't have to include both and have conflicting
998 @@ -109,24 +110,27 @@
999 #define RADEON_BIOS_NUM_SCRATCH 8
1000
1001 /* max number of rings */
1002 -#define RADEON_NUM_RINGS 5
1003 +#define RADEON_NUM_RINGS 6
1004
1005 /* fence seq are set to this number when signaled */
1006 #define RADEON_FENCE_SIGNALED_SEQ 0LL
1007
1008 /* internal ring indices */
1009 /* r1xx+ has gfx CP ring */
1010 -#define RADEON_RING_TYPE_GFX_INDEX 0
1011 +#define RADEON_RING_TYPE_GFX_INDEX 0
1012
1013 /* cayman has 2 compute CP rings */
1014 -#define CAYMAN_RING_TYPE_CP1_INDEX 1
1015 -#define CAYMAN_RING_TYPE_CP2_INDEX 2
1016 +#define CAYMAN_RING_TYPE_CP1_INDEX 1
1017 +#define CAYMAN_RING_TYPE_CP2_INDEX 2
1018
1019 /* R600+ has an async dma ring */
1020 #define R600_RING_TYPE_DMA_INDEX 3
1021 /* cayman add a second async dma ring */
1022 #define CAYMAN_RING_TYPE_DMA1_INDEX 4
1023
1024 +/* R600+ */
1025 +#define R600_RING_TYPE_UVD_INDEX 5
1026 +
1027 /* hardcode those limit for now */
1028 #define RADEON_VA_IB_OFFSET (1 << 20)
1029 #define RADEON_VA_RESERVED_SIZE (8 << 20)
1030 @@ -202,6 +206,11 @@
1031 void radeon_pm_resume(struct radeon_device *rdev);
1032 void radeon_combios_get_power_modes(struct radeon_device *rdev);
1033 void radeon_atombios_get_power_modes(struct radeon_device *rdev);
1034 +int radeon_atom_get_clock_dividers(struct radeon_device *rdev,
1035 + u8 clock_type,
1036 + u32 clock,
1037 + bool strobe_mode,
1038 + struct atom_clock_dividers *dividers);
1039 void radeon_atom_set_voltage(struct radeon_device *rdev, u16 voltage_level, u8 voltage_type);
1040 void rs690_pm_info(struct radeon_device *rdev);
1041 extern int rv6xx_get_temp(struct radeon_device *rdev);
1042 @@ -357,8 +366,9 @@
1043 struct ttm_validate_buffer tv;
1044 struct radeon_bo *bo;
1045 uint64_t gpu_offset;
1046 - unsigned rdomain;
1047 - unsigned wdomain;
1048 + bool written;
1049 + unsigned domain;
1050 + unsigned alt_domain;
1051 u32 tiling_flags;
1052 };
1053
1054 @@ -918,6 +928,7 @@
1055 #define R600_WB_DMA_RPTR_OFFSET 1792
1056 #define R600_WB_IH_WPTR_OFFSET 2048
1057 #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
1058 +#define R600_WB_UVD_RPTR_OFFSET 2560
1059 #define R600_WB_EVENT_OFFSET 3072
1060
1061 /**
1062 @@ -1118,6 +1129,35 @@
1063 int radeon_pm_get_type_index(struct radeon_device *rdev,
1064 enum radeon_pm_state_type ps_type,
1065 int instance);
1066 +/*
1067 + * UVD
1068 + */
1069 +#define RADEON_MAX_UVD_HANDLES 10
1070 +#define RADEON_UVD_STACK_SIZE (1024*1024)
1071 +#define RADEON_UVD_HEAP_SIZE (1024*1024)
1072 +
1073 +struct radeon_uvd {
1074 + struct radeon_bo *vcpu_bo;
1075 + void *cpu_addr;
1076 + uint64_t gpu_addr;
1077 + atomic_t handles[RADEON_MAX_UVD_HANDLES];
1078 + struct drm_file *filp[RADEON_MAX_UVD_HANDLES];
1079 + struct delayed_work idle_work;
1080 +};
1081 +
1082 +int radeon_uvd_init(struct radeon_device *rdev);
1083 +void radeon_uvd_fini(struct radeon_device *rdev);
1084 +int radeon_uvd_suspend(struct radeon_device *rdev);
1085 +int radeon_uvd_resume(struct radeon_device *rdev);
1086 +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
1087 + uint32_t handle, struct radeon_fence **fence);
1088 +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
1089 + uint32_t handle, struct radeon_fence **fence);
1090 +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
1091 +void radeon_uvd_free_handles(struct radeon_device *rdev,
1092 + struct drm_file *filp);
1093 +int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
1094 +void radeon_uvd_note_usage(struct radeon_device *rdev);
1095
1096 struct r600_audio {
1097 int channels;
1098 @@ -1281,6 +1321,7 @@
1099 int (*get_pcie_lanes)(struct radeon_device *rdev);
1100 void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
1101 void (*set_clock_gating)(struct radeon_device *rdev, int enable);
1102 + int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk);
1103 } pm;
1104 /* pageflipping */
1105 struct {
1106 @@ -1608,6 +1649,7 @@
1107 struct radeon_asic *asic;
1108 struct radeon_gem gem;
1109 struct radeon_pm pm;
1110 + struct radeon_uvd uvd;
1111 uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH];
1112 struct radeon_wb wb;
1113 struct radeon_dummy_page dummy_page;
1114 @@ -1615,12 +1657,14 @@
1115 bool suspend;
1116 bool need_dma32;
1117 bool accel_working;
1118 + bool fastfb_working; /* IGP feature*/
1119 struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
1120 const struct firmware *me_fw; /* all family ME firmware */
1121 const struct firmware *pfp_fw; /* r6/700 PFP firmware */
1122 const struct firmware *rlc_fw; /* r6/700 RLC firmware */
1123 const struct firmware *mc_fw; /* NI MC firmware */
1124 const struct firmware *ce_fw; /* SI CE firmware */
1125 + const struct firmware *uvd_fw; /* UVD firmware */
1126 struct r600_blit r600_blit;
1127 struct r600_vram_scratch vram_scratch;
1128 int msi_enabled; /* msi enabled */
1129 @@ -1845,6 +1889,7 @@
1130 #define radeon_get_pcie_lanes(rdev) (rdev)->asic->pm.get_pcie_lanes((rdev))
1131 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l))
1132 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e))
1133 +#define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d))
1134 #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s)))
1135 #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
1136 #define radeon_bandwidth_update(rdev) (rdev)->asic->display.bandwidth_update((rdev))
1137 diff -urN a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
1138 --- a/drivers/gpu/drm/radeon/radeon_asic.c 2013-05-09 22:13:14.643851250 +0200
1139 +++ b/drivers/gpu/drm/radeon/radeon_asic.c 2013-05-09 22:14:57.203842167 +0200
1140 @@ -1130,6 +1130,15 @@
1141 .ring_test = &r600_dma_ring_test,
1142 .ib_test = &r600_dma_ib_test,
1143 .is_lockup = &r600_dma_is_lockup,
1144 + },
1145 + [R600_RING_TYPE_UVD_INDEX] = {
1146 + .ib_execute = &r600_uvd_ib_execute,
1147 + .emit_fence = &r600_uvd_fence_emit,
1148 + .emit_semaphore = &r600_uvd_semaphore_emit,
1149 + .cs_parse = &radeon_uvd_cs_parse,
1150 + .ring_test = &r600_uvd_ring_test,
1151 + .ib_test = &r600_uvd_ib_test,
1152 + .is_lockup = &radeon_ring_test_lockup,
1153 }
1154 },
1155 .irq = {
1156 @@ -1174,6 +1183,7 @@
1157 .get_pcie_lanes = &r600_get_pcie_lanes,
1158 .set_pcie_lanes = &r600_set_pcie_lanes,
1159 .set_clock_gating = &radeon_atom_set_clock_gating,
1160 + .set_uvd_clocks = &rv770_set_uvd_clocks,
1161 },
1162 .pflip = {
1163 .pre_page_flip = &rs600_pre_page_flip,
1164 @@ -1216,6 +1226,15 @@
1165 .ring_test = &r600_dma_ring_test,
1166 .ib_test = &r600_dma_ib_test,
1167 .is_lockup = &evergreen_dma_is_lockup,
1168 + },
1169 + [R600_RING_TYPE_UVD_INDEX] = {
1170 + .ib_execute = &r600_uvd_ib_execute,
1171 + .emit_fence = &r600_uvd_fence_emit,
1172 + .emit_semaphore = &r600_uvd_semaphore_emit,
1173 + .cs_parse = &radeon_uvd_cs_parse,
1174 + .ring_test = &r600_uvd_ring_test,
1175 + .ib_test = &r600_uvd_ib_test,
1176 + .is_lockup = &radeon_ring_test_lockup,
1177 }
1178 },
1179 .irq = {
1180 @@ -1260,6 +1279,7 @@
1181 .get_pcie_lanes = &r600_get_pcie_lanes,
1182 .set_pcie_lanes = &r600_set_pcie_lanes,
1183 .set_clock_gating = NULL,
1184 + .set_uvd_clocks = &evergreen_set_uvd_clocks,
1185 },
1186 .pflip = {
1187 .pre_page_flip = &evergreen_pre_page_flip,
1188 @@ -1302,6 +1322,15 @@
1189 .ring_test = &r600_dma_ring_test,
1190 .ib_test = &r600_dma_ib_test,
1191 .is_lockup = &evergreen_dma_is_lockup,
1192 + },
1193 + [R600_RING_TYPE_UVD_INDEX] = {
1194 + .ib_execute = &r600_uvd_ib_execute,
1195 + .emit_fence = &r600_uvd_fence_emit,
1196 + .emit_semaphore = &r600_uvd_semaphore_emit,
1197 + .cs_parse = &radeon_uvd_cs_parse,
1198 + .ring_test = &r600_uvd_ring_test,
1199 + .ib_test = &r600_uvd_ib_test,
1200 + .is_lockup = &radeon_ring_test_lockup,
1201 }
1202 },
1203 .irq = {
1204 @@ -1346,6 +1375,7 @@
1205 .get_pcie_lanes = NULL,
1206 .set_pcie_lanes = NULL,
1207 .set_clock_gating = NULL,
1208 + .set_uvd_clocks = &sumo_set_uvd_clocks,
1209 },
1210 .pflip = {
1211 .pre_page_flip = &evergreen_pre_page_flip,
1212 @@ -1388,6 +1418,15 @@
1213 .ring_test = &r600_dma_ring_test,
1214 .ib_test = &r600_dma_ib_test,
1215 .is_lockup = &evergreen_dma_is_lockup,
1216 + },
1217 + [R600_RING_TYPE_UVD_INDEX] = {
1218 + .ib_execute = &r600_uvd_ib_execute,
1219 + .emit_fence = &r600_uvd_fence_emit,
1220 + .emit_semaphore = &r600_uvd_semaphore_emit,
1221 + .cs_parse = &radeon_uvd_cs_parse,
1222 + .ring_test = &r600_uvd_ring_test,
1223 + .ib_test = &r600_uvd_ib_test,
1224 + .is_lockup = &radeon_ring_test_lockup,
1225 }
1226 },
1227 .irq = {
1228 @@ -1432,6 +1471,7 @@
1229 .get_pcie_lanes = NULL,
1230 .set_pcie_lanes = NULL,
1231 .set_clock_gating = NULL,
1232 + .set_uvd_clocks = &evergreen_set_uvd_clocks,
1233 },
1234 .pflip = {
1235 .pre_page_flip = &evergreen_pre_page_flip,
1236 @@ -1517,6 +1557,15 @@
1237 .ib_test = &r600_dma_ib_test,
1238 .is_lockup = &cayman_dma_is_lockup,
1239 .vm_flush = &cayman_dma_vm_flush,
1240 + },
1241 + [R600_RING_TYPE_UVD_INDEX] = {
1242 + .ib_execute = &r600_uvd_ib_execute,
1243 + .emit_fence = &r600_uvd_fence_emit,
1244 + .emit_semaphore = &cayman_uvd_semaphore_emit,
1245 + .cs_parse = &radeon_uvd_cs_parse,
1246 + .ring_test = &r600_uvd_ring_test,
1247 + .ib_test = &r600_uvd_ib_test,
1248 + .is_lockup = &radeon_ring_test_lockup,
1249 }
1250 },
1251 .irq = {
1252 @@ -1561,6 +1610,7 @@
1253 .get_pcie_lanes = NULL,
1254 .set_pcie_lanes = NULL,
1255 .set_clock_gating = NULL,
1256 + .set_uvd_clocks = &evergreen_set_uvd_clocks,
1257 },
1258 .pflip = {
1259 .pre_page_flip = &evergreen_pre_page_flip,
1260 @@ -1646,6 +1696,15 @@
1261 .ib_test = &r600_dma_ib_test,
1262 .is_lockup = &cayman_dma_is_lockup,
1263 .vm_flush = &cayman_dma_vm_flush,
1264 + },
1265 + [R600_RING_TYPE_UVD_INDEX] = {
1266 + .ib_execute = &r600_uvd_ib_execute,
1267 + .emit_fence = &r600_uvd_fence_emit,
1268 + .emit_semaphore = &cayman_uvd_semaphore_emit,
1269 + .cs_parse = &radeon_uvd_cs_parse,
1270 + .ring_test = &r600_uvd_ring_test,
1271 + .ib_test = &r600_uvd_ib_test,
1272 + .is_lockup = &radeon_ring_test_lockup,
1273 }
1274 },
1275 .irq = {
1276 @@ -1690,6 +1749,7 @@
1277 .get_pcie_lanes = NULL,
1278 .set_pcie_lanes = NULL,
1279 .set_clock_gating = NULL,
1280 + .set_uvd_clocks = &sumo_set_uvd_clocks,
1281 },
1282 .pflip = {
1283 .pre_page_flip = &evergreen_pre_page_flip,
1284 @@ -1775,6 +1835,15 @@
1285 .ib_test = &r600_dma_ib_test,
1286 .is_lockup = &si_dma_is_lockup,
1287 .vm_flush = &si_dma_vm_flush,
1288 + },
1289 + [R600_RING_TYPE_UVD_INDEX] = {
1290 + .ib_execute = &r600_uvd_ib_execute,
1291 + .emit_fence = &r600_uvd_fence_emit,
1292 + .emit_semaphore = &cayman_uvd_semaphore_emit,
1293 + .cs_parse = &radeon_uvd_cs_parse,
1294 + .ring_test = &r600_uvd_ring_test,
1295 + .ib_test = &r600_uvd_ib_test,
1296 + .is_lockup = &radeon_ring_test_lockup,
1297 }
1298 },
1299 .irq = {
1300 @@ -1819,6 +1888,7 @@
1301 .get_pcie_lanes = NULL,
1302 .set_pcie_lanes = NULL,
1303 .set_clock_gating = NULL,
1304 + .set_uvd_clocks = &si_set_uvd_clocks,
1305 },
1306 .pflip = {
1307 .pre_page_flip = &evergreen_pre_page_flip,
1308 diff -urN a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
1309 --- a/drivers/gpu/drm/radeon/radeon_asic.h 2013-05-09 22:13:14.643851250 +0200
1310 +++ b/drivers/gpu/drm/radeon/radeon_asic.h 2013-05-09 22:14:57.203842167 +0200
1311 @@ -330,6 +330,7 @@
1312 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
1313 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
1314 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
1315 +int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
1316 int r600_copy_blit(struct radeon_device *rdev,
1317 uint64_t src_offset, uint64_t dst_offset,
1318 unsigned num_gpu_pages, struct radeon_fence **fence);
1319 @@ -392,6 +393,19 @@
1320 u32 r600_get_xclk(struct radeon_device *rdev);
1321 uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev);
1322
1323 +/* uvd */
1324 +int r600_uvd_init(struct radeon_device *rdev);
1325 +int r600_uvd_rbc_start(struct radeon_device *rdev);
1326 +void r600_uvd_rbc_stop(struct radeon_device *rdev);
1327 +int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
1328 +void r600_uvd_fence_emit(struct radeon_device *rdev,
1329 + struct radeon_fence *fence);
1330 +void r600_uvd_semaphore_emit(struct radeon_device *rdev,
1331 + struct radeon_ring *ring,
1332 + struct radeon_semaphore *semaphore,
1333 + bool emit_wait);
1334 +void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
1335 +
1336 /*
1337 * rv770,rv730,rv710,rv740
1338 */
1339 @@ -409,6 +423,8 @@
1340 unsigned num_gpu_pages,
1341 struct radeon_fence **fence);
1342 u32 rv770_get_xclk(struct radeon_device *rdev);
1343 +int rv770_uvd_resume(struct radeon_device *rdev);
1344 +int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
1345
1346 /*
1347 * evergreen
1348 @@ -444,6 +460,8 @@
1349 extern void evergreen_pm_finish(struct radeon_device *rdev);
1350 extern void sumo_pm_init_profile(struct radeon_device *rdev);
1351 extern void btc_pm_init_profile(struct radeon_device *rdev);
1352 +int sumo_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
1353 +int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
1354 extern void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc);
1355 extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_base);
1356 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
1357 @@ -465,6 +483,10 @@
1358 */
1359 void cayman_fence_ring_emit(struct radeon_device *rdev,
1360 struct radeon_fence *fence);
1361 +void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1362 + struct radeon_ring *ring,
1363 + struct radeon_semaphore *semaphore,
1364 + bool emit_wait);
1365 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
1366 int cayman_init(struct radeon_device *rdev);
1367 void cayman_fini(struct radeon_device *rdev);
1368 @@ -524,5 +546,6 @@
1369 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
1370 u32 si_get_xclk(struct radeon_device *rdev);
1371 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
1372 +int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
1373
1374 #endif
1375 diff -urN a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
1376 --- a/drivers/gpu/drm/radeon/radeon_atombios.c 2013-05-09 22:13:14.647184583 +0200
1377 +++ b/drivers/gpu/drm/radeon/radeon_atombios.c 2013-05-09 22:14:57.230508831 +0200
1378 @@ -2654,6 +2654,111 @@
1379 rdev->pm.current_vddc = 0;
1380 }
1381
1382 +union get_clock_dividers {
1383 + struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS v1;
1384 + struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V2 v2;
1385 + struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V3 v3;
1386 + struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 v4;
1387 + struct _COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V5 v5;
1388 +};
1389 +
1390 +int radeon_atom_get_clock_dividers(struct radeon_device *rdev,
1391 + u8 clock_type,
1392 + u32 clock,
1393 + bool strobe_mode,
1394 + struct atom_clock_dividers *dividers)
1395 +{
1396 + union get_clock_dividers args;
1397 + int index = GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL);
1398 + u8 frev, crev;
1399 +
1400 + memset(&args, 0, sizeof(args));
1401 + memset(dividers, 0, sizeof(struct atom_clock_dividers));
1402 +
1403 + if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
1404 + return -EINVAL;
1405 +
1406 + switch (crev) {
1407 + case 1:
1408 + /* r4xx, r5xx */
1409 + args.v1.ucAction = clock_type;
1410 + args.v1.ulClock = cpu_to_le32(clock); /* 10 khz */
1411 +
1412 + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
1413 +
1414 + dividers->post_div = args.v1.ucPostDiv;
1415 + dividers->fb_div = args.v1.ucFbDiv;
1416 + dividers->enable_post_div = true;
1417 + break;
1418 + case 2:
1419 + case 3:
1420 + /* r6xx, r7xx, evergreen, ni */
1421 + if (rdev->family <= CHIP_RV770) {
1422 + args.v2.ucAction = clock_type;
1423 + args.v2.ulClock = cpu_to_le32(clock); /* 10 khz */
1424 +
1425 + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
1426 +
1427 + dividers->post_div = args.v2.ucPostDiv;
1428 + dividers->fb_div = le16_to_cpu(args.v2.usFbDiv);
1429 + dividers->ref_div = args.v2.ucAction;
1430 + if (rdev->family == CHIP_RV770) {
1431 + dividers->enable_post_div = (le32_to_cpu(args.v2.ulClock) & (1 << 24)) ?
1432 + true : false;
1433 + dividers->vco_mode = (le32_to_cpu(args.v2.ulClock) & (1 << 25)) ? 1 : 0;
1434 + } else
1435 + dividers->enable_post_div = (dividers->fb_div & 1) ? true : false;
1436 + } else {
1437 + if (clock_type == COMPUTE_ENGINE_PLL_PARAM) {
1438 + args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
1439 +
1440 + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
1441 +
1442 + dividers->post_div = args.v3.ucPostDiv;
1443 + dividers->enable_post_div = (args.v3.ucCntlFlag &
1444 + ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
1445 + dividers->enable_dithen = (args.v3.ucCntlFlag &
1446 + ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
1447 + dividers->fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
1448 + dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
1449 + dividers->ref_div = args.v3.ucRefDiv;
1450 + dividers->vco_mode = (args.v3.ucCntlFlag &
1451 + ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
1452 + } else {
1453 + args.v5.ulClockParams = cpu_to_le32((clock_type << 24) | clock);
1454 + if (strobe_mode)
1455 + args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN;
1456 +
1457 + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
1458 +
1459 + dividers->post_div = args.v5.ucPostDiv;
1460 + dividers->enable_post_div = (args.v5.ucCntlFlag &
1461 + ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
1462 + dividers->enable_dithen = (args.v5.ucCntlFlag &
1463 + ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
1464 + dividers->whole_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDiv);
1465 + dividers->frac_fb_div = le16_to_cpu(args.v5.ulFbDiv.usFbDivFrac);
1466 + dividers->ref_div = args.v5.ucRefDiv;
1467 + dividers->vco_mode = (args.v5.ucCntlFlag &
1468 + ATOM_PLL_CNTL_FLAG_MPLL_VCO_MODE) ? 1 : 0;
1469 + }
1470 + }
1471 + break;
1472 + case 4:
1473 + /* fusion */
1474 + args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */
1475 +
1476 + atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
1477 +
1478 + dividers->post_div = args.v4.ucPostDiv;
1479 + dividers->real_clock = le32_to_cpu(args.v4.ulClock);
1480 + break;
1481 + default:
1482 + return -EINVAL;
1483 + }
1484 + return 0;
1485 +}
1486 +
1487 void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable)
1488 {
1489 DYNAMIC_CLOCK_GATING_PS_ALLOCATION args;
1490 diff -urN a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
1491 --- a/drivers/gpu/drm/radeon/radeon_cs.c 2013-05-09 22:13:14.647184583 +0200
1492 +++ b/drivers/gpu/drm/radeon/radeon_cs.c 2013-05-09 22:14:57.223842165 +0200
1493 @@ -63,30 +63,50 @@
1494 break;
1495 }
1496 }
1497 - if (!duplicate) {
1498 - p->relocs[i].gobj = drm_gem_object_lookup(ddev,
1499 - p->filp,
1500 - r->handle);
1501 - if (p->relocs[i].gobj == NULL) {
1502 - DRM_ERROR("gem object lookup failed 0x%x\n",
1503 - r->handle);
1504 - return -ENOENT;
1505 - }
1506 - p->relocs_ptr[i] = &p->relocs[i];
1507 - p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
1508 - p->relocs[i].lobj.bo = p->relocs[i].robj;
1509 - p->relocs[i].lobj.wdomain = r->write_domain;
1510 - p->relocs[i].lobj.rdomain = r->read_domains;
1511 - p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
1512 - p->relocs[i].handle = r->handle;
1513 - p->relocs[i].flags = r->flags;
1514 - radeon_bo_list_add_object(&p->relocs[i].lobj,
1515 - &p->validated);
1516 -
1517 - } else
1518 + if (duplicate) {
1519 p->relocs[i].handle = 0;
1520 + continue;
1521 + }
1522 +
1523 + p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp,
1524 + r->handle);
1525 + if (p->relocs[i].gobj == NULL) {
1526 + DRM_ERROR("gem object lookup failed 0x%x\n",
1527 + r->handle);
1528 + return -ENOENT;
1529 + }
1530 + p->relocs_ptr[i] = &p->relocs[i];
1531 + p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
1532 + p->relocs[i].lobj.bo = p->relocs[i].robj;
1533 + p->relocs[i].lobj.written = !!r->write_domain;
1534 +
1535 + /* the first reloc of an UVD job is the
1536 + msg and that must be in VRAM */
1537 + if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) {
1538 + /* TODO: is this still needed for NI+ ? */
1539 + p->relocs[i].lobj.domain =
1540 + RADEON_GEM_DOMAIN_VRAM;
1541 +
1542 + p->relocs[i].lobj.alt_domain =
1543 + RADEON_GEM_DOMAIN_VRAM;
1544 +
1545 + } else {
1546 + uint32_t domain = r->write_domain ?
1547 + r->write_domain : r->read_domains;
1548 +
1549 + p->relocs[i].lobj.domain = domain;
1550 + if (domain == RADEON_GEM_DOMAIN_VRAM)
1551 + domain |= RADEON_GEM_DOMAIN_GTT;
1552 + p->relocs[i].lobj.alt_domain = domain;
1553 + }
1554 +
1555 + p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
1556 + p->relocs[i].handle = r->handle;
1557 +
1558 + radeon_bo_list_add_object(&p->relocs[i].lobj,
1559 + &p->validated);
1560 }
1561 - return radeon_bo_list_validate(&p->validated);
1562 + return radeon_bo_list_validate(&p->validated, p->ring);
1563 }
1564
1565 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
1566 @@ -121,6 +141,9 @@
1567 return -EINVAL;
1568 }
1569 break;
1570 + case RADEON_CS_RING_UVD:
1571 + p->ring = R600_RING_TYPE_UVD_INDEX;
1572 + break;
1573 }
1574 return 0;
1575 }
1576 @@ -241,15 +264,15 @@
1577 return -EINVAL;
1578 }
1579
1580 - /* we only support VM on SI+ */
1581 - if ((p->rdev->family >= CHIP_TAHITI) &&
1582 - ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
1583 - DRM_ERROR("VM required on SI+!\n");
1584 + if (radeon_cs_get_ring(p, ring, priority))
1585 return -EINVAL;
1586 - }
1587
1588 - if (radeon_cs_get_ring(p, ring, priority))
1589 + /* we only support VM on some SI+ rings */
1590 + if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) &&
1591 + ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
1592 + DRM_ERROR("Ring %d requires VM!\n", p->ring);
1593 return -EINVAL;
1594 + }
1595 }
1596
1597 /* deal with non-vm */
1598 @@ -526,6 +549,10 @@
1599 r = radeon_cs_handle_lockup(rdev, r);
1600 return r;
1601 }
1602 +
1603 + if (parser.ring == R600_RING_TYPE_UVD_INDEX)
1604 + radeon_uvd_note_usage(rdev);
1605 +
1606 r = radeon_cs_ib_chunk(rdev, &parser);
1607 if (r) {
1608 goto out;
1609 diff -urN a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
1610 --- a/drivers/gpu/drm/radeon/radeon_drv.c 2013-05-09 22:13:14.647184583 +0200
1611 +++ b/drivers/gpu/drm/radeon/radeon_drv.c 2013-05-09 22:14:57.210508833 +0200
1612 @@ -71,9 +71,11 @@
1613 * 2.28.0 - r600-eg: Add MEM_WRITE packet support
1614 * 2.29.0 - R500 FP16 color clear registers
1615 * 2.30.0 - fix for FMASK texturing
1616 + * 2.31.0 - Add fastfb support for rs690
1617 + * 2.32.0 - new info request for rings working
1618 */
1619 #define KMS_DRIVER_MAJOR 2
1620 -#define KMS_DRIVER_MINOR 30
1621 +#define KMS_DRIVER_MINOR 32
1622 #define KMS_DRIVER_PATCHLEVEL 0
1623 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
1624 int radeon_driver_unload_kms(struct drm_device *dev);
1625 @@ -160,6 +162,7 @@
1626 int radeon_pcie_gen2 = -1;
1627 int radeon_msi = -1;
1628 int radeon_lockup_timeout = 10000;
1629 +int radeon_fastfb = 0;
1630
1631 MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers");
1632 module_param_named(no_wb, radeon_no_wb, int, 0444);
1633 @@ -212,6 +215,9 @@
1634 MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (defaul 10000 = 10 seconds, 0 = disable)");
1635 module_param_named(lockup_timeout, radeon_lockup_timeout, int, 0444);
1636
1637 +MODULE_PARM_DESC(fastfb, "Direct FB access for IGP chips (0 = disable, 1 = enable)");
1638 +module_param_named(fastfb, radeon_fastfb, int, 0444);
1639 +
1640 static struct pci_device_id pciidlist[] = {
1641 radeon_PCI_IDS
1642 };
1643 diff -urN a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
1644 --- a/drivers/gpu/drm/radeon/radeon_fence.c 2013-05-09 22:13:14.647184583 +0200
1645 +++ b/drivers/gpu/drm/radeon/radeon_fence.c 2013-05-09 22:14:57.213842166 +0200
1646 @@ -31,9 +31,9 @@
1647 #include <linux/seq_file.h>
1648 #include <linux/atomic.h>
1649 #include <linux/wait.h>
1650 -#include <linux/list.h>
1651 #include <linux/kref.h>
1652 #include <linux/slab.h>
1653 +#include <linux/firmware.h>
1654 #include <drm/drmP.h>
1655 #include "radeon_reg.h"
1656 #include "radeon.h"
1657 @@ -767,8 +767,20 @@
1658
1659 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
1660 if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
1661 - rdev->fence_drv[ring].scratch_reg = 0;
1662 - index = R600_WB_EVENT_OFFSET + ring * 4;
1663 + if (ring != R600_RING_TYPE_UVD_INDEX) {
1664 + rdev->fence_drv[ring].scratch_reg = 0;
1665 + index = R600_WB_EVENT_OFFSET + ring * 4;
1666 + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
1667 + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
1668 + index;
1669 +
1670 + } else {
1671 + /* put fence directly behind firmware */
1672 + index = ALIGN(rdev->uvd_fw->size, 8);
1673 + rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
1674 + rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
1675 + }
1676 +
1677 } else {
1678 r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
1679 if (r) {
1680 @@ -778,9 +790,9 @@
1681 index = RADEON_WB_SCRATCH_OFFSET +
1682 rdev->fence_drv[ring].scratch_reg -
1683 rdev->scratch.reg_base;
1684 + rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
1685 + rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
1686 }
1687 - rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
1688 - rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
1689 radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
1690 rdev->fence_drv[ring].initialized = true;
1691 dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
1692 diff -urN a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
1693 --- a/drivers/gpu/drm/radeon/radeon_kms.c 2013-05-09 22:13:14.647184583 +0200
1694 +++ b/drivers/gpu/drm/radeon/radeon_kms.c 2013-05-09 22:14:57.210508833 +0200
1695 @@ -376,6 +376,26 @@
1696 else
1697 return -EINVAL;
1698 break;
1699 + case RADEON_INFO_FASTFB_WORKING:
1700 + value = rdev->fastfb_working;
1701 + break;
1702 + case RADEON_INFO_RING_WORKING:
1703 + switch (value) {
1704 + case RADEON_CS_RING_GFX:
1705 + case RADEON_CS_RING_COMPUTE:
1706 + value = rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready;
1707 + break;
1708 + case RADEON_CS_RING_DMA:
1709 + value = rdev->ring[R600_RING_TYPE_DMA_INDEX].ready;
1710 + value |= rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready;
1711 + break;
1712 + case RADEON_CS_RING_UVD:
1713 + value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready;
1714 + break;
1715 + default:
1716 + return -EINVAL;
1717 + }
1718 + break;
1719 default:
1720 DRM_DEBUG_KMS("Invalid request %d\n", info->request);
1721 return -EINVAL;
1722 @@ -513,6 +533,7 @@
1723 rdev->hyperz_filp = NULL;
1724 if (rdev->cmask_filp == file_priv)
1725 rdev->cmask_filp = NULL;
1726 + radeon_uvd_free_handles(rdev, file_priv);
1727 }
1728
1729 /*
1730 diff -urN a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
1731 --- a/drivers/gpu/drm/radeon/radeon_mode.h 2013-05-09 22:13:14.647184583 +0200
1732 +++ b/drivers/gpu/drm/radeon/radeon_mode.h 2013-05-09 22:14:57.183842168 +0200
1733 @@ -492,6 +492,29 @@
1734 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
1735 ((em) == ATOM_ENCODER_MODE_DP_MST))
1736
1737 +struct atom_clock_dividers {
1738 + u32 post_div;
1739 + union {
1740 + struct {
1741 +#ifdef __BIG_ENDIAN
1742 + u32 reserved : 6;
1743 + u32 whole_fb_div : 12;
1744 + u32 frac_fb_div : 14;
1745 +#else
1746 + u32 frac_fb_div : 14;
1747 + u32 whole_fb_div : 12;
1748 + u32 reserved : 6;
1749 +#endif
1750 + };
1751 + u32 fb_div;
1752 + };
1753 + u32 ref_div;
1754 + bool enable_post_div;
1755 + bool enable_dithen;
1756 + u32 vco_mode;
1757 + u32 real_clock;
1758 +};
1759 +
1760 extern enum radeon_tv_std
1761 radeon_combios_get_tv_info(struct radeon_device *rdev);
1762 extern enum radeon_tv_std
1763 diff -urN a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
1764 --- a/drivers/gpu/drm/radeon/radeon_object.c 2013-05-09 22:13:14.647184583 +0200
1765 +++ b/drivers/gpu/drm/radeon/radeon_object.c 2013-05-09 22:14:57.180508835 +0200
1766 @@ -321,8 +321,10 @@
1767 int radeon_bo_init(struct radeon_device *rdev)
1768 {
1769 /* Add an MTRR for the VRAM */
1770 - rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
1771 + if (!rdev->fastfb_working) {
1772 + rdev->mc.vram_mtrr = mtrr_add(rdev->mc.aper_base, rdev->mc.aper_size,
1773 MTRR_TYPE_WRCOMB, 1);
1774 + }
1775 DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
1776 rdev->mc.mc_vram_size >> 20,
1777 (unsigned long long)rdev->mc.aper_size >> 20);
1778 @@ -339,14 +341,14 @@
1779 void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
1780 struct list_head *head)
1781 {
1782 - if (lobj->wdomain) {
1783 + if (lobj->written) {
1784 list_add(&lobj->tv.head, head);
1785 } else {
1786 list_add_tail(&lobj->tv.head, head);
1787 }
1788 }
1789
1790 -int radeon_bo_list_validate(struct list_head *head)
1791 +int radeon_bo_list_validate(struct list_head *head, int ring)
1792 {
1793 struct radeon_bo_list *lobj;
1794 struct radeon_bo *bo;
1795 @@ -360,15 +362,17 @@
1796 list_for_each_entry(lobj, head, tv.head) {
1797 bo = lobj->bo;
1798 if (!bo->pin_count) {
1799 - domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
1800 + domain = lobj->domain;
1801
1802 retry:
1803 radeon_ttm_placement_from_domain(bo, domain);
1804 + if (ring == R600_RING_TYPE_UVD_INDEX)
1805 + radeon_uvd_force_into_uvd_segment(bo);
1806 r = ttm_bo_validate(&bo->tbo, &bo->placement,
1807 true, false);
1808 if (unlikely(r)) {
1809 - if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
1810 - domain |= RADEON_GEM_DOMAIN_GTT;
1811 + if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
1812 + domain = lobj->alt_domain;
1813 goto retry;
1814 }
1815 return r;
1816 diff -urN a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
1817 --- a/drivers/gpu/drm/radeon/radeon_object.h 2013-05-09 22:13:14.647184583 +0200
1818 +++ b/drivers/gpu/drm/radeon/radeon_object.h 2013-05-09 22:14:57.180508835 +0200
1819 @@ -128,7 +128,7 @@
1820 extern void radeon_bo_fini(struct radeon_device *rdev);
1821 extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
1822 struct list_head *head);
1823 -extern int radeon_bo_list_validate(struct list_head *head);
1824 +extern int radeon_bo_list_validate(struct list_head *head, int ring);
1825 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
1826 struct vm_area_struct *vma);
1827 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
1828 diff -urN a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
1829 --- a/drivers/gpu/drm/radeon/radeon_ring.c 2013-05-09 22:13:14.647184583 +0200
1830 +++ b/drivers/gpu/drm/radeon/radeon_ring.c 2013-05-09 22:14:57.180508835 +0200
1831 @@ -368,7 +368,7 @@
1832 {
1833 u32 rptr;
1834
1835 - if (rdev->wb.enabled)
1836 + if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX])
1837 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
1838 else
1839 rptr = RREG32(ring->rptr_reg);
1840 @@ -821,18 +821,20 @@
1841 return 0;
1842 }
1843
1844 -static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
1845 -static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
1846 -static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
1847 -static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX;
1848 -static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
1849 +static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
1850 +static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
1851 +static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
1852 +static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX;
1853 +static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
1854 +static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX;
1855
1856 static struct drm_info_list radeon_debugfs_ring_info_list[] = {
1857 - {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
1858 - {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
1859 - {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
1860 - {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index},
1861 - {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index},
1862 + {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index},
1863 + {"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index},
1864 + {"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index},
1865 + {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index},
1866 + {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index},
1867 + {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
1868 };
1869
1870 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
1871 diff -urN a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
1872 --- a/drivers/gpu/drm/radeon/radeon_test.c 2013-05-09 22:13:14.647184583 +0200
1873 +++ b/drivers/gpu/drm/radeon/radeon_test.c 2013-05-09 22:14:57.180508835 +0200
1874 @@ -252,6 +252,36 @@
1875 radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
1876 }
1877
1878 +static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
1879 + struct radeon_ring *ring,
1880 + struct radeon_fence **fence)
1881 +{
1882 + int r;
1883 +
1884 + if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
1885 + r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
1886 + if (r) {
1887 + DRM_ERROR("Failed to get dummy create msg\n");
1888 + return r;
1889 + }
1890 +
1891 + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
1892 + if (r) {
1893 + DRM_ERROR("Failed to get dummy destroy msg\n");
1894 + return r;
1895 + }
1896 + } else {
1897 + r = radeon_ring_lock(rdev, ring, 64);
1898 + if (r) {
1899 + DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
1900 + return r;
1901 + }
1902 + radeon_fence_emit(rdev, fence, ring->idx);
1903 + radeon_ring_unlock_commit(rdev, ring);
1904 + }
1905 + return 0;
1906 +}
1907 +
1908 void radeon_test_ring_sync(struct radeon_device *rdev,
1909 struct radeon_ring *ringA,
1910 struct radeon_ring *ringB)
1911 @@ -272,21 +302,24 @@
1912 goto out_cleanup;
1913 }
1914 radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
1915 - r = radeon_fence_emit(rdev, &fence1, ringA->idx);
1916 - if (r) {
1917 - DRM_ERROR("Failed to emit fence 1\n");
1918 - radeon_ring_unlock_undo(rdev, ringA);
1919 + radeon_ring_unlock_commit(rdev, ringA);
1920 +
1921 + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
1922 + if (r)
1923 goto out_cleanup;
1924 - }
1925 - radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
1926 - r = radeon_fence_emit(rdev, &fence2, ringA->idx);
1927 +
1928 + r = radeon_ring_lock(rdev, ringA, 64);
1929 if (r) {
1930 - DRM_ERROR("Failed to emit fence 2\n");
1931 - radeon_ring_unlock_undo(rdev, ringA);
1932 + DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
1933 goto out_cleanup;
1934 }
1935 + radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
1936 radeon_ring_unlock_commit(rdev, ringA);
1937
1938 + r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
1939 + if (r)
1940 + goto out_cleanup;
1941 +
1942 mdelay(1000);
1943
1944 if (radeon_fence_signaled(fence1)) {
1945 @@ -364,27 +397,22 @@
1946 goto out_cleanup;
1947 }
1948 radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
1949 - r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
1950 - if (r) {
1951 - DRM_ERROR("Failed to emit sync fence 1\n");
1952 - radeon_ring_unlock_undo(rdev, ringA);
1953 - goto out_cleanup;
1954 - }
1955 radeon_ring_unlock_commit(rdev, ringA);
1956
1957 + r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
1958 + if (r)
1959 + goto out_cleanup;
1960 +
1961 r = radeon_ring_lock(rdev, ringB, 64);
1962 if (r) {
1963 DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
1964 goto out_cleanup;
1965 }
1966 radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
1967 - r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
1968 - if (r) {
1969 - DRM_ERROR("Failed to create sync fence 2\n");
1970 - radeon_ring_unlock_undo(rdev, ringB);
1971 - goto out_cleanup;
1972 - }
1973 radeon_ring_unlock_commit(rdev, ringB);
1974 + r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
1975 + if (r)
1976 + goto out_cleanup;
1977
1978 mdelay(1000);
1979
1980 @@ -393,7 +421,7 @@
1981 goto out_cleanup;
1982 }
1983 if (radeon_fence_signaled(fenceB)) {
1984 - DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
1985 + DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
1986 goto out_cleanup;
1987 }
1988
1989 diff -urN a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
1990 --- a/drivers/gpu/drm/radeon/rs690.c 2013-05-09 22:13:14.647184583 +0200
1991 +++ b/drivers/gpu/drm/radeon/rs690.c 2013-05-09 22:14:57.163842170 +0200
1992 @@ -148,6 +148,8 @@
1993 static void rs690_mc_init(struct radeon_device *rdev)
1994 {
1995 u64 base;
1996 + uint32_t h_addr, l_addr;
1997 + unsigned long long k8_addr;
1998
1999 rs400_gart_adjust_size(rdev);
2000 rdev->mc.vram_is_ddr = true;
2001 @@ -160,6 +162,27 @@
2002 base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
2003 base = G_000100_MC_FB_START(base) << 16;
2004 rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
2005 +
2006 + /* Use K8 direct mapping for fast fb access. */
2007 + rdev->fastfb_working = false;
2008 + h_addr = G_00005F_K8_ADDR_EXT(RREG32_MC(R_00005F_MC_MISC_UMA_CNTL));
2009 + l_addr = RREG32_MC(R_00001E_K8_FB_LOCATION);
2010 + k8_addr = ((unsigned long long)h_addr) << 32 | l_addr;
2011 +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
2012 + if (k8_addr + rdev->mc.visible_vram_size < 0x100000000ULL)
2013 +#endif
2014 + {
2015 + /* FastFB shall be used with UMA memory. Here it is simply disabled when sideport
2016 + * memory is present.
2017 + */
2018 + if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) {
2019 + DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n",
2020 + (unsigned long long)rdev->mc.aper_base, k8_addr);
2021 + rdev->mc.aper_base = (resource_size_t)k8_addr;
2022 + rdev->fastfb_working = true;
2023 + }
2024 + }
2025 +
2026 rs690_pm_info(rdev);
2027 radeon_vram_location(rdev, &rdev->mc, base);
2028 rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
2029 diff -urN a/drivers/gpu/drm/radeon/rs690d.h b/drivers/gpu/drm/radeon/rs690d.h
2030 --- a/drivers/gpu/drm/radeon/rs690d.h 2013-05-09 22:13:14.647184583 +0200
2031 +++ b/drivers/gpu/drm/radeon/rs690d.h 2013-05-09 22:14:57.163842170 +0200
2032 @@ -29,6 +29,9 @@
2033 #define __RS690D_H__
2034
2035 /* Registers */
2036 +#define R_00001E_K8_FB_LOCATION 0x00001E
2037 +#define R_00005F_MC_MISC_UMA_CNTL 0x00005F
2038 +#define G_00005F_K8_ADDR_EXT(x) (((x) >> 0) & 0xFF)
2039 #define R_000078_MC_INDEX 0x000078
2040 #define S_000078_MC_IND_ADDR(x) (((x) & 0x1FF) << 0)
2041 #define G_000078_MC_IND_ADDR(x) (((x) >> 0) & 0x1FF)
2042 diff -urN a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
2043 --- a/drivers/gpu/drm/radeon/rv770.c 2013-05-09 22:13:14.647184583 +0200
2044 +++ b/drivers/gpu/drm/radeon/rv770.c 2013-05-09 22:14:57.217175499 +0200
2045 @@ -42,6 +42,168 @@
2046 static void rv770_gpu_init(struct radeon_device *rdev);
2047 void rv770_fini(struct radeon_device *rdev);
2048 static void rv770_pcie_gen2_enable(struct radeon_device *rdev);
2049 +int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
2050 +
2051 +static int rv770_uvd_calc_post_div(unsigned target_freq,
2052 + unsigned vco_freq,
2053 + unsigned *div)
2054 +{
2055 + /* Fclk = Fvco / PDIV */
2056 + *div = vco_freq / target_freq;
2057 +
2058 + /* we alway need a frequency less than or equal the target */
2059 + if ((vco_freq / *div) > target_freq)
2060 + *div += 1;
2061 +
2062 + /* out of range ? */
2063 + if (*div > 30)
2064 + return -1; /* forget it */
2065 +
2066 + *div -= 1;
2067 + return vco_freq / (*div + 1);
2068 +}
2069 +
2070 +static int rv770_uvd_send_upll_ctlreq(struct radeon_device *rdev)
2071 +{
2072 + unsigned i;
2073 +
2074 + /* assert UPLL_CTLREQ */
2075 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
2076 +
2077 + /* wait for CTLACK and CTLACK2 to get asserted */
2078 + for (i = 0; i < 100; ++i) {
2079 + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
2080 + if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
2081 + break;
2082 + mdelay(10);
2083 + }
2084 + if (i == 100)
2085 + return -ETIMEDOUT;
2086 +
2087 + /* deassert UPLL_CTLREQ */
2088 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
2089 +
2090 + return 0;
2091 +}
2092 +
2093 +int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
2094 +{
2095 + /* start off with something large */
2096 + int optimal_diff_score = 0x7FFFFFF;
2097 + unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
2098 + unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
2099 + unsigned vco_freq, vco_min = 50000, vco_max = 160000;
2100 + unsigned ref_freq = rdev->clock.spll.reference_freq;
2101 + int r;
2102 +
2103 + /* RV740 uses evergreen uvd clk programming */
2104 + if (rdev->family == CHIP_RV740)
2105 + return evergreen_set_uvd_clocks(rdev, vclk, dclk);
2106 +
2107 + /* bypass vclk and dclk with bclk */
2108 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
2109 + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
2110 + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
2111 +
2112 + if (!vclk || !dclk) {
2113 + /* keep the Bypass mode, put PLL to sleep */
2114 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
2115 + return 0;
2116 + }
2117 +
2118 + /* loop through vco from low to high */
2119 + vco_min = max(max(vco_min, vclk), dclk);
2120 + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 500) {
2121 + uint64_t fb_div = (uint64_t)vco_freq * 43663;
2122 + int calc_clk, diff_score, diff_vclk, diff_dclk;
2123 + unsigned vclk_div, dclk_div;
2124 +
2125 + do_div(fb_div, ref_freq);
2126 + fb_div |= 1;
2127 +
2128 + /* fb div out of range ? */
2129 + if (fb_div > 0x03FFFFFF)
2130 + break; /* it can oly get worse */
2131 +
2132 + /* calc vclk with current vco freq. */
2133 + calc_clk = rv770_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
2134 + if (calc_clk == -1)
2135 + break; /* vco is too big, it has to stop. */
2136 + diff_vclk = vclk - calc_clk;
2137 +
2138 + /* calc dclk with current vco freq. */
2139 + calc_clk = rv770_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
2140 + if (calc_clk == -1)
2141 + break; /* vco is too big, it has to stop. */
2142 + diff_dclk = dclk - calc_clk;
2143 +
2144 + /* determine if this vco setting is better than current optimal settings */
2145 + diff_score = abs(diff_vclk) + abs(diff_dclk);
2146 + if (diff_score < optimal_diff_score) {
2147 + optimal_fb_div = fb_div;
2148 + optimal_vclk_div = vclk_div;
2149 + optimal_dclk_div = dclk_div;
2150 + optimal_vco_freq = vco_freq;
2151 + optimal_diff_score = diff_score;
2152 + if (optimal_diff_score == 0)
2153 + break; /* it can't get better than this */
2154 + }
2155 + }
2156 +
2157 + /* set UPLL_FB_DIV to 0x50000 */
2158 + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(0x50000), ~UPLL_FB_DIV_MASK);
2159 +
2160 + /* deassert UPLL_RESET and UPLL_SLEEP */
2161 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~(UPLL_RESET_MASK | UPLL_SLEEP_MASK));
2162 +
2163 + /* assert BYPASS EN and FB_DIV[0] <- ??? why? */
2164 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
2165 + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(1), ~UPLL_FB_DIV(1));
2166 +
2167 + r = rv770_uvd_send_upll_ctlreq(rdev);
2168 + if (r)
2169 + return r;
2170 +
2171 + /* assert PLL_RESET */
2172 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
2173 +
2174 + /* set the required FB_DIV, REF_DIV, Post divder values */
2175 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REF_DIV(1), ~UPLL_REF_DIV_MASK);
2176 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
2177 + UPLL_SW_HILEN(optimal_vclk_div >> 1) |
2178 + UPLL_SW_LOLEN((optimal_vclk_div >> 1) + (optimal_vclk_div & 1)) |
2179 + UPLL_SW_HILEN2(optimal_dclk_div >> 1) |
2180 + UPLL_SW_LOLEN2((optimal_dclk_div >> 1) + (optimal_dclk_div & 1)),
2181 + ~UPLL_SW_MASK);
2182 +
2183 + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div),
2184 + ~UPLL_FB_DIV_MASK);
2185 +
2186 + /* give the PLL some time to settle */
2187 + mdelay(15);
2188 +
2189 + /* deassert PLL_RESET */
2190 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
2191 +
2192 + mdelay(15);
2193 +
2194 + /* deassert BYPASS EN and FB_DIV[0] <- ??? why? */
2195 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
2196 + WREG32_P(CG_UPLL_FUNC_CNTL_3, 0, ~UPLL_FB_DIV(1));
2197 +
2198 + r = rv770_uvd_send_upll_ctlreq(rdev);
2199 + if (r)
2200 + return r;
2201 +
2202 + /* switch VCLK and DCLK selection */
2203 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
2204 + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
2205 + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
2206 +
2207 + mdelay(100);
2208 +
2209 + return 0;
2210 +}
2211
2212 #define PCIE_BUS_CLK 10000
2213 #define TCLK (PCIE_BUS_CLK / 10)
2214 @@ -68,6 +230,105 @@
2215 return reference_clock;
2216 }
2217
2218 +int rv770_uvd_resume(struct radeon_device *rdev)
2219 +{
2220 + uint64_t addr;
2221 + uint32_t chip_id, size;
2222 + int r;
2223 +
2224 + r = radeon_uvd_resume(rdev);
2225 + if (r)
2226 + return r;
2227 +
2228 + /* programm the VCPU memory controller bits 0-27 */
2229 + addr = rdev->uvd.gpu_addr >> 3;
2230 + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
2231 + WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
2232 + WREG32(UVD_VCPU_CACHE_SIZE0, size);
2233 +
2234 + addr += size;
2235 + size = RADEON_UVD_STACK_SIZE >> 3;
2236 + WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
2237 + WREG32(UVD_VCPU_CACHE_SIZE1, size);
2238 +
2239 + addr += size;
2240 + size = RADEON_UVD_HEAP_SIZE >> 3;
2241 + WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
2242 + WREG32(UVD_VCPU_CACHE_SIZE2, size);
2243 +
2244 + /* bits 28-31 */
2245 + addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
2246 + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
2247 +
2248 + /* bits 32-39 */
2249 + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
2250 + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
2251 +
2252 + /* tell firmware which hardware it is running on */
2253 + switch (rdev->family) {
2254 + default:
2255 + return -EINVAL;
2256 + case CHIP_RV710:
2257 + chip_id = 0x01000005;
2258 + break;
2259 + case CHIP_RV730:
2260 + chip_id = 0x01000006;
2261 + break;
2262 + case CHIP_RV740:
2263 + chip_id = 0x01000007;
2264 + break;
2265 + case CHIP_CYPRESS:
2266 + case CHIP_HEMLOCK:
2267 + chip_id = 0x01000008;
2268 + break;
2269 + case CHIP_JUNIPER:
2270 + chip_id = 0x01000009;
2271 + break;
2272 + case CHIP_REDWOOD:
2273 + chip_id = 0x0100000a;
2274 + break;
2275 + case CHIP_CEDAR:
2276 + chip_id = 0x0100000b;
2277 + break;
2278 + case CHIP_SUMO:
2279 + chip_id = 0x0100000c;
2280 + break;
2281 + case CHIP_SUMO2:
2282 + chip_id = 0x0100000d;
2283 + break;
2284 + case CHIP_PALM:
2285 + chip_id = 0x0100000e;
2286 + break;
2287 + case CHIP_CAYMAN:
2288 + chip_id = 0x0100000f;
2289 + break;
2290 + case CHIP_BARTS:
2291 + chip_id = 0x01000010;
2292 + break;
2293 + case CHIP_TURKS:
2294 + chip_id = 0x01000011;
2295 + break;
2296 + case CHIP_CAICOS:
2297 + chip_id = 0x01000012;
2298 + break;
2299 + case CHIP_TAHITI:
2300 + chip_id = 0x01000014;
2301 + break;
2302 + case CHIP_VERDE:
2303 + chip_id = 0x01000015;
2304 + break;
2305 + case CHIP_PITCAIRN:
2306 + chip_id = 0x01000016;
2307 + break;
2308 + case CHIP_ARUBA:
2309 + chip_id = 0x01000017;
2310 + break;
2311 + }
2312 + WREG32(UVD_VCPU_CHIP_ID, chip_id);
2313 +
2314 + return 0;
2315 +}
2316 +
2317 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
2318 {
2319 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
2320 @@ -611,6 +872,11 @@
2321 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
2322 WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
2323 WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
2324 + if (rdev->family == CHIP_RV730) {
2325 + WREG32(UVD_UDEC_DB_TILING_CONFIG, (gb_tiling_config & 0xffff));
2326 + WREG32(UVD_UDEC_DBW_TILING_CONFIG, (gb_tiling_config & 0xffff));
2327 + WREG32(UVD_UDEC_TILING_CONFIG, (gb_tiling_config & 0xffff));
2328 + }
2329
2330 WREG32(CGTS_SYS_TCC_DISABLE, 0);
2331 WREG32(CGTS_TCC_DISABLE, 0);
2332 @@ -1040,6 +1306,17 @@
2333 return r;
2334 }
2335
2336 + r = rv770_uvd_resume(rdev);
2337 + if (!r) {
2338 + r = radeon_fence_driver_start_ring(rdev,
2339 + R600_RING_TYPE_UVD_INDEX);
2340 + if (r)
2341 + dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2342 + }
2343 +
2344 + if (r)
2345 + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2346 +
2347 /* Enable IRQ */
2348 r = r600_irq_init(rdev);
2349 if (r) {
2350 @@ -1074,6 +1351,19 @@
2351 if (r)
2352 return r;
2353
2354 + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2355 + if (ring->ring_size) {
2356 + r = radeon_ring_init(rdev, ring, ring->ring_size,
2357 + R600_WB_UVD_RPTR_OFFSET,
2358 + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2359 + 0, 0xfffff, RADEON_CP_PACKET2);
2360 + if (!r)
2361 + r = r600_uvd_init(rdev);
2362 +
2363 + if (r)
2364 + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2365 + }
2366 +
2367 r = radeon_ib_pool_init(rdev);
2368 if (r) {
2369 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2370 @@ -1115,6 +1405,7 @@
2371 int rv770_suspend(struct radeon_device *rdev)
2372 {
2373 r600_audio_fini(rdev);
2374 + radeon_uvd_suspend(rdev);
2375 r700_cp_stop(rdev);
2376 r600_dma_stop(rdev);
2377 r600_irq_suspend(rdev);
2378 @@ -1190,6 +1481,13 @@
2379 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
2380 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
2381
2382 + r = radeon_uvd_init(rdev);
2383 + if (!r) {
2384 + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
2385 + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
2386 + 4096);
2387 + }
2388 +
2389 rdev->ih.ring_obj = NULL;
2390 r600_ih_ring_init(rdev, 64 * 1024);
2391
2392 @@ -1224,6 +1522,7 @@
2393 radeon_ib_pool_fini(rdev);
2394 radeon_irq_kms_fini(rdev);
2395 rv770_pcie_gart_fini(rdev);
2396 + radeon_uvd_fini(rdev);
2397 r600_vram_scratch_fini(rdev);
2398 radeon_gem_fini(rdev);
2399 radeon_fence_driver_fini(rdev);
2400 diff -urN a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
2401 --- a/drivers/gpu/drm/radeon/rv770d.h 2013-05-09 22:13:14.647184583 +0200
2402 +++ b/drivers/gpu/drm/radeon/rv770d.h 2013-05-09 22:14:57.210508833 +0200
2403 @@ -38,6 +38,30 @@
2404 #define R7XX_MAX_PIPES 8
2405 #define R7XX_MAX_PIPES_MASK 0xff
2406
2407 +/* discrete uvd clocks */
2408 +#define CG_UPLL_FUNC_CNTL 0x718
2409 +# define UPLL_RESET_MASK 0x00000001
2410 +# define UPLL_SLEEP_MASK 0x00000002
2411 +# define UPLL_BYPASS_EN_MASK 0x00000004
2412 +# define UPLL_CTLREQ_MASK 0x00000008
2413 +# define UPLL_REF_DIV(x) ((x) << 16)
2414 +# define UPLL_REF_DIV_MASK 0x001F0000
2415 +# define UPLL_CTLACK_MASK 0x40000000
2416 +# define UPLL_CTLACK2_MASK 0x80000000
2417 +#define CG_UPLL_FUNC_CNTL_2 0x71c
2418 +# define UPLL_SW_HILEN(x) ((x) << 0)
2419 +# define UPLL_SW_LOLEN(x) ((x) << 4)
2420 +# define UPLL_SW_HILEN2(x) ((x) << 8)
2421 +# define UPLL_SW_LOLEN2(x) ((x) << 12)
2422 +# define UPLL_SW_MASK 0x0000FFFF
2423 +# define VCLK_SRC_SEL(x) ((x) << 20)
2424 +# define VCLK_SRC_SEL_MASK 0x01F00000
2425 +# define DCLK_SRC_SEL(x) ((x) << 25)
2426 +# define DCLK_SRC_SEL_MASK 0x3E000000
2427 +#define CG_UPLL_FUNC_CNTL_3 0x720
2428 +# define UPLL_FB_DIV(x) ((x) << 0)
2429 +# define UPLL_FB_DIV_MASK 0x01FFFFFF
2430 +
2431 /* Registers */
2432 #define CB_COLOR0_BASE 0x28040
2433 #define CB_COLOR1_BASE 0x28044
2434 @@ -112,6 +136,11 @@
2435 #define DMA_TILING_CONFIG 0x3ec8
2436 #define DMA_TILING_CONFIG2 0xd0b8
2437
2438 +/* RV730 only */
2439 +#define UVD_UDEC_TILING_CONFIG 0xef40
2440 +#define UVD_UDEC_DB_TILING_CONFIG 0xef44
2441 +#define UVD_UDEC_DBW_TILING_CONFIG 0xef48
2442 +
2443 #define GC_USER_SHADER_PIPE_CONFIG 0x8954
2444 #define INACTIVE_QD_PIPES(x) ((x) << 8)
2445 #define INACTIVE_QD_PIPES_MASK 0x0000FF00
2446 @@ -671,4 +700,18 @@
2447 # define TARGET_LINK_SPEED_MASK (0xf << 0)
2448 # define SELECTABLE_DEEMPHASIS (1 << 6)
2449
2450 +/* UVD */
2451 +#define UVD_LMI_EXT40_ADDR 0xf498
2452 +#define UVD_VCPU_CHIP_ID 0xf4d4
2453 +#define UVD_VCPU_CACHE_OFFSET0 0xf4d8
2454 +#define UVD_VCPU_CACHE_SIZE0 0xf4dc
2455 +#define UVD_VCPU_CACHE_OFFSET1 0xf4e0
2456 +#define UVD_VCPU_CACHE_SIZE1 0xf4e4
2457 +#define UVD_VCPU_CACHE_OFFSET2 0xf4e8
2458 +#define UVD_VCPU_CACHE_SIZE2 0xf4ec
2459 +#define UVD_LMI_ADDR_EXT 0xf594
2460 +
2461 +#define UVD_RBC_RB_RPTR 0xf690
2462 +#define UVD_RBC_RB_WPTR 0xf694
2463 +
2464 #endif
2465 diff -urN a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
2466 --- a/drivers/gpu/drm/radeon/si.c 2013-05-09 22:13:14.650517916 +0200
2467 +++ b/drivers/gpu/drm/radeon/si.c 2013-05-09 22:14:57.220508832 +0200
2468 @@ -1768,6 +1768,9 @@
2469 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2470 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
2471 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
2472 + WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2473 + WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2474 + WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2475
2476 si_tiling_mode_table_init(rdev);
2477
2478 @@ -4372,6 +4375,16 @@
2479 return r;
2480 }
2481
2482 + r = rv770_uvd_resume(rdev);
2483 + if (!r) {
2484 + r = radeon_fence_driver_start_ring(rdev,
2485 + R600_RING_TYPE_UVD_INDEX);
2486 + if (r)
2487 + dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2488 + }
2489 + if (r)
2490 + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2491 +
2492 /* Enable IRQ */
2493 r = si_irq_init(rdev);
2494 if (r) {
2495 @@ -4429,6 +4442,18 @@
2496 if (r)
2497 return r;
2498
2499 + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2500 + if (ring->ring_size) {
2501 + r = radeon_ring_init(rdev, ring, ring->ring_size,
2502 + R600_WB_UVD_RPTR_OFFSET,
2503 + UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2504 + 0, 0xfffff, RADEON_CP_PACKET2);
2505 + if (!r)
2506 + r = r600_uvd_init(rdev);
2507 + if (r)
2508 + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2509 + }
2510 +
2511 r = radeon_ib_pool_init(rdev);
2512 if (r) {
2513 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2514 @@ -4472,6 +4497,8 @@
2515 radeon_vm_manager_fini(rdev);
2516 si_cp_enable(rdev, false);
2517 cayman_dma_stop(rdev);
2518 + r600_uvd_rbc_stop(rdev);
2519 + radeon_uvd_suspend(rdev);
2520 si_irq_suspend(rdev);
2521 radeon_wb_disable(rdev);
2522 si_pcie_gart_disable(rdev);
2523 @@ -4557,6 +4584,13 @@
2524 ring->ring_obj = NULL;
2525 r600_ring_init(rdev, ring, 64 * 1024);
2526
2527 + r = radeon_uvd_init(rdev);
2528 + if (!r) {
2529 + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2530 + ring->ring_obj = NULL;
2531 + r600_ring_init(rdev, ring, 4096);
2532 + }
2533 +
2534 rdev->ih.ring_obj = NULL;
2535 r600_ih_ring_init(rdev, 64 * 1024);
2536
2537 @@ -4605,6 +4639,7 @@
2538 radeon_vm_manager_fini(rdev);
2539 radeon_ib_pool_fini(rdev);
2540 radeon_irq_kms_fini(rdev);
2541 + radeon_uvd_fini(rdev);
2542 si_pcie_gart_fini(rdev);
2543 r600_vram_scratch_fini(rdev);
2544 radeon_gem_fini(rdev);
2545 @@ -4634,3 +4669,176 @@
2546 mutex_unlock(&rdev->gpu_clock_mutex);
2547 return clock;
2548 }
2549 +
2550 +static int si_uvd_calc_post_div(unsigned target_freq,
2551 + unsigned vco_freq,
2552 + unsigned *div)
2553 +{
2554 + /* target larger than vco frequency ? */
2555 + if (vco_freq < target_freq)
2556 + return -1; /* forget it */
2557 +
2558 + /* Fclk = Fvco / PDIV */
2559 + *div = vco_freq / target_freq;
2560 +
2561 + /* we alway need a frequency less than or equal the target */
2562 + if ((vco_freq / *div) > target_freq)
2563 + *div += 1;
2564 +
2565 + /* dividers above 5 must be even */
2566 + if (*div > 5 && *div % 2)
2567 + *div += 1;
2568 +
2569 + /* out of range ? */
2570 + if (*div >= 128)
2571 + return -1; /* forget it */
2572 +
2573 + return vco_freq / *div;
2574 +}
2575 +
2576 +static int si_uvd_send_upll_ctlreq(struct radeon_device *rdev)
2577 +{
2578 + unsigned i;
2579 +
2580 + /* assert UPLL_CTLREQ */
2581 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
2582 +
2583 + /* wait for CTLACK and CTLACK2 to get asserted */
2584 + for (i = 0; i < 100; ++i) {
2585 + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
2586 + if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask)
2587 + break;
2588 + mdelay(10);
2589 + }
2590 + if (i == 100)
2591 + return -ETIMEDOUT;
2592 +
2593 + /* deassert UPLL_CTLREQ */
2594 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
2595 +
2596 + return 0;
2597 +}
2598 +
2599 +int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
2600 +{
2601 + /* start off with something large */
2602 + int optimal_diff_score = 0x7FFFFFF;
2603 + unsigned optimal_fb_div = 0, optimal_vclk_div = 0;
2604 + unsigned optimal_dclk_div = 0, optimal_vco_freq = 0;
2605 + unsigned vco_freq;
2606 + int r;
2607 +
2608 + /* bypass vclk and dclk with bclk */
2609 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
2610 + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
2611 + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
2612 +
2613 + /* put PLL in bypass mode */
2614 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
2615 +
2616 + if (!vclk || !dclk) {
2617 + /* keep the Bypass mode, put PLL to sleep */
2618 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
2619 + return 0;
2620 + }
2621 +
2622 + /* loop through vco from low to high */
2623 + for (vco_freq = 125000; vco_freq <= 250000; vco_freq += 100) {
2624 + unsigned fb_div = vco_freq / rdev->clock.spll.reference_freq * 16384;
2625 + int calc_clk, diff_score, diff_vclk, diff_dclk;
2626 + unsigned vclk_div, dclk_div;
2627 +
2628 + /* fb div out of range ? */
2629 + if (fb_div > 0x03FFFFFF)
2630 + break; /* it can oly get worse */
2631 +
2632 + /* calc vclk with current vco freq. */
2633 + calc_clk = si_uvd_calc_post_div(vclk, vco_freq, &vclk_div);
2634 + if (calc_clk == -1)
2635 + break; /* vco is too big, it has to stop. */
2636 + diff_vclk = vclk - calc_clk;
2637 +
2638 + /* calc dclk with current vco freq. */
2639 + calc_clk = si_uvd_calc_post_div(dclk, vco_freq, &dclk_div);
2640 + if (calc_clk == -1)
2641 + break; /* vco is too big, it has to stop. */
2642 + diff_dclk = dclk - calc_clk;
2643 +
2644 + /* determine if this vco setting is better than current optimal settings */
2645 + diff_score = abs(diff_vclk) + abs(diff_dclk);
2646 + if (diff_score < optimal_diff_score) {
2647 + optimal_fb_div = fb_div;
2648 + optimal_vclk_div = vclk_div;
2649 + optimal_dclk_div = dclk_div;
2650 + optimal_vco_freq = vco_freq;
2651 + optimal_diff_score = diff_score;
2652 + if (optimal_diff_score == 0)
2653 + break; /* it can't get better than this */
2654 + }
2655 + }
2656 +
2657 + /* set RESET_ANTI_MUX to 0 */
2658 + WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
2659 +
2660 + /* set VCO_MODE to 1 */
2661 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
2662 +
2663 + /* toggle UPLL_SLEEP to 1 then back to 0 */
2664 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
2665 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
2666 +
2667 + /* deassert UPLL_RESET */
2668 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
2669 +
2670 + mdelay(1);
2671 +
2672 + r = si_uvd_send_upll_ctlreq(rdev);
2673 + if (r)
2674 + return r;
2675 +
2676 + /* assert UPLL_RESET again */
2677 + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
2678 +
2679 + /* disable spread spectrum. */
2680 + WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
2681 +
2682 + /* set feedback divider */
2683 + WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(optimal_fb_div), ~UPLL_FB_DIV_MASK);
2684 +
2685 + /* set ref divider to 0 */
2686 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
2687 +
2688 + if (optimal_vco_freq < 187500)
2689 + WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
2690 + else
2691 + WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
2692 +
2693 + /* set PDIV_A and PDIV_B */
2694 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
2695 + UPLL_PDIV_A(optimal_vclk_div) | UPLL_PDIV_B(optimal_dclk_div),
2696 + ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
2697 +
2698 + /* give the PLL some time to settle */
2699 + mdelay(15);
2700 +
2701 + /* deassert PLL_RESET */
2702 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
2703 +
2704 + mdelay(15);
2705 +
2706 + /* switch from bypass mode to normal mode */
2707 + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
2708 +
2709 + r = si_uvd_send_upll_ctlreq(rdev);
2710 + if (r)
2711 + return r;
2712 +
2713 + /* switch VCLK and DCLK selection */
2714 + WREG32_P(CG_UPLL_FUNC_CNTL_2,
2715 + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
2716 + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
2717 +
2718 + mdelay(100);
2719 +
2720 + return 0;
2721 +}
2722 diff -urN a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
2723 --- a/drivers/gpu/drm/radeon/sid.h 2013-05-09 22:13:14.650517916 +0200
2724 +++ b/drivers/gpu/drm/radeon/sid.h 2013-05-09 22:14:57.210508833 +0200
2725 @@ -29,6 +29,35 @@
2726 #define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
2727 #define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
2728
2729 +/* discrete uvd clocks */
2730 +#define CG_UPLL_FUNC_CNTL 0x634
2731 +# define UPLL_RESET_MASK 0x00000001
2732 +# define UPLL_SLEEP_MASK 0x00000002
2733 +# define UPLL_BYPASS_EN_MASK 0x00000004
2734 +# define UPLL_CTLREQ_MASK 0x00000008
2735 +# define UPLL_VCO_MODE_MASK 0x00000600
2736 +# define UPLL_REF_DIV_MASK 0x001F0000
2737 +# define UPLL_CTLACK_MASK 0x40000000
2738 +# define UPLL_CTLACK2_MASK 0x80000000
2739 +#define CG_UPLL_FUNC_CNTL_2 0x638
2740 +# define UPLL_PDIV_A(x) ((x) << 0)
2741 +# define UPLL_PDIV_A_MASK 0x0000007F
2742 +# define UPLL_PDIV_B(x) ((x) << 8)
2743 +# define UPLL_PDIV_B_MASK 0x00007F00
2744 +# define VCLK_SRC_SEL(x) ((x) << 20)
2745 +# define VCLK_SRC_SEL_MASK 0x01F00000
2746 +# define DCLK_SRC_SEL(x) ((x) << 25)
2747 +# define DCLK_SRC_SEL_MASK 0x3E000000
2748 +#define CG_UPLL_FUNC_CNTL_3 0x63C
2749 +# define UPLL_FB_DIV(x) ((x) << 0)
2750 +# define UPLL_FB_DIV_MASK 0x01FFFFFF
2751 +#define CG_UPLL_FUNC_CNTL_4 0x644
2752 +# define UPLL_SPARE_ISPARE9 0x00020000
2753 +#define CG_UPLL_FUNC_CNTL_5 0x648
2754 +# define RESET_ANTI_MUX_MASK 0x00000200
2755 +#define CG_UPLL_SPREAD_SPECTRUM 0x650
2756 +# define SSEN_MASK 0x00000001
2757 +
2758 #define CG_MULT_THERMAL_STATUS 0x714
2759 #define ASIC_MAX_TEMP(x) ((x) << 0)
2760 #define ASIC_MAX_TEMP_MASK 0x000001ff
2761 @@ -798,6 +827,15 @@
2762 # define THREAD_TRACE_FINISH (55 << 0)
2763
2764 /*
2765 + * UVD
2766 + */
2767 +#define UVD_UDEC_ADDR_CONFIG 0xEF4C
2768 +#define UVD_UDEC_DB_ADDR_CONFIG 0xEF50
2769 +#define UVD_UDEC_DBW_ADDR_CONFIG 0xEF54
2770 +#define UVD_RBC_RB_RPTR 0xF690
2771 +#define UVD_RBC_RB_WPTR 0xF694
2772 +
2773 +/*
2774 * PM4
2775 */
2776 #define PACKET0(reg, n) ((RADEON_PACKET_TYPE0 << 30) | \
2777 diff -urN a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
2778 --- a/include/uapi/drm/radeon_drm.h 2013-05-09 22:13:14.680517913 +0200
2779 +++ b/include/uapi/drm/radeon_drm.h 2013-05-09 22:14:57.210508833 +0200
2780 @@ -918,6 +918,7 @@
2781 #define RADEON_CS_RING_GFX 0
2782 #define RADEON_CS_RING_COMPUTE 1
2783 #define RADEON_CS_RING_DMA 2
2784 +#define RADEON_CS_RING_UVD 3
2785 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
2786 /* 0 = normal, + = higher priority, - = lower priority */
2787
2788 @@ -972,6 +973,11 @@
2789 #define RADEON_INFO_MAX_SE 0x12
2790 /* max SH per SE */
2791 #define RADEON_INFO_MAX_SH_PER_SE 0x13
2792 +/* fast fb access is enabled */
2793 +#define RADEON_INFO_FASTFB_WORKING 0x14
2794 +/* query if a RADEON_CS_RING_* submission is supported */
2795 +#define RADEON_INFO_RING_WORKING 0x15
2796 +
2797
2798 struct drm_radeon_info {
2799 uint32_t request;
2800 diff -urN /dev/null b/include/uapi/drm/radeon_uvd.c
2801 --- /dev/null 2013-05-09 19:51:08.911273005 +0200
2802 +++ b/drivers/gpu/drm/radeon/radeon_uvd.c 2013-05-10 01:45:56.542720982 +0200
2803 @@ -0,0 +1,694 @@
2804 +/*
2805 + * Copyright 2011 Advanced Micro Devices, Inc.
2806 + * All Rights Reserved.
2807 + *
2808 + * Permission is hereby granted, free of charge, to any person obtaining a
2809 + * copy of this software and associated documentation files (the
2810 + * "Software"), to deal in the Software without restriction, including
2811 + * without limitation the rights to use, copy, modify, merge, publish,
2812 + * distribute, sub license, and/or sell copies of the Software, and to
2813 + * permit persons to whom the Software is furnished to do so, subject to
2814 + * the following conditions:
2815 + *
2816 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2817 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2818 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
2819 + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
2820 + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
2821 + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
2822 + * USE OR OTHER DEALINGS IN THE SOFTWARE.
2823 + *
2824 + * The above copyright notice and this permission notice (including the
2825 + * next paragraph) shall be included in all copies or substantial portions
2826 + * of the Software.
2827 + *
2828 + */
2829 +/*
2830 + * Authors:
2831 + * Christian K├Ânig <deathsimple@vodafone.de>
2832 + */
2833 +
2834 +#include <linux/firmware.h>
2835 +#include <linux/module.h>
2836 +#include <drm/drmP.h>
2837 +#include <drm/drm.h>
2838 +
2839 +#include "radeon.h"
2840 +#include "r600d.h"
2841 +
2842 +/* 1 second timeout */
2843 +#define UVD_IDLE_TIMEOUT_MS 1000
2844 +
2845 +/* Firmware Names */
2846 +#define FIRMWARE_RV710 "radeon/RV710_uvd.bin"
2847 +#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin"
2848 +#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin"
2849 +#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin"
2850 +
2851 +MODULE_FIRMWARE(FIRMWARE_RV710);
2852 +MODULE_FIRMWARE(FIRMWARE_CYPRESS);
2853 +MODULE_FIRMWARE(FIRMWARE_SUMO);
2854 +MODULE_FIRMWARE(FIRMWARE_TAHITI);
2855 +
2856 +static void radeon_uvd_idle_work_handler(struct work_struct *work);
2857 +
2858 +int radeon_uvd_init(struct radeon_device *rdev)
2859 +{
2860 + struct platform_device *pdev;
2861 + unsigned long bo_size;
2862 + const char *fw_name;
2863 + int i, r;
2864 +
2865 + INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
2866 +
2867 + pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0);
2868 + r = IS_ERR(pdev);
2869 + if (r) {
2870 + dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n");
2871 + return -EINVAL;
2872 + }
2873 +
2874 + switch (rdev->family) {
2875 + case CHIP_RV710:
2876 + case CHIP_RV730:
2877 + case CHIP_RV740:
2878 + fw_name = FIRMWARE_RV710;
2879 + break;
2880 +
2881 + case CHIP_CYPRESS:
2882 + case CHIP_HEMLOCK:
2883 + case CHIP_JUNIPER:
2884 + case CHIP_REDWOOD:
2885 + case CHIP_CEDAR:
2886 + fw_name = FIRMWARE_CYPRESS;
2887 + break;
2888 +
2889 + case CHIP_SUMO:
2890 + case CHIP_SUMO2:
2891 + case CHIP_PALM:
2892 + case CHIP_CAYMAN:
2893 + case CHIP_BARTS:
2894 + case CHIP_TURKS:
2895 + case CHIP_CAICOS:
2896 + fw_name = FIRMWARE_SUMO;
2897 + break;
2898 +
2899 + case CHIP_TAHITI:
2900 + case CHIP_VERDE:
2901 + case CHIP_PITCAIRN:
2902 + case CHIP_ARUBA:
2903 + fw_name = FIRMWARE_TAHITI;
2904 + break;
2905 +
2906 + default:
2907 + return -EINVAL;
2908 + }
2909 +
2910 + r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev);
2911 + if (r) {
2912 + dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
2913 + fw_name);
2914 + platform_device_unregister(pdev);
2915 + return r;
2916 + }
2917 +
2918 + platform_device_unregister(pdev);
2919 +
2920 + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
2921 + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
2922 + r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
2923 + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
2924 + if (r) {
2925 + dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
2926 + return r;
2927 + }
2928 +
2929 + r = radeon_uvd_resume(rdev);
2930 + if (r)
2931 + return r;
2932 +
2933 + memset(rdev->uvd.cpu_addr, 0, bo_size);
2934 + memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
2935 +
2936 + r = radeon_uvd_suspend(rdev);
2937 + if (r)
2938 + return r;
2939 +
2940 + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
2941 + atomic_set(&rdev->uvd.handles[i], 0);
2942 + rdev->uvd.filp[i] = NULL;
2943 + }
2944 +
2945 + return 0;
2946 +}
2947 +
2948 +void radeon_uvd_fini(struct radeon_device *rdev)
2949 +{
2950 + radeon_uvd_suspend(rdev);
2951 + radeon_bo_unref(&rdev->uvd.vcpu_bo);
2952 +}
2953 +
2954 +int radeon_uvd_suspend(struct radeon_device *rdev)
2955 +{
2956 + int r;
2957 +
2958 + if (rdev->uvd.vcpu_bo == NULL)
2959 + return 0;
2960 +
2961 + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
2962 + if (!r) {
2963 + radeon_bo_kunmap(rdev->uvd.vcpu_bo);
2964 + radeon_bo_unpin(rdev->uvd.vcpu_bo);
2965 + radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2966 + }
2967 + return r;
2968 +}
2969 +
2970 +int radeon_uvd_resume(struct radeon_device *rdev)
2971 +{
2972 + int r;
2973 +
2974 + if (rdev->uvd.vcpu_bo == NULL)
2975 + return -EINVAL;
2976 +
2977 + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
2978 + if (r) {
2979 + radeon_bo_unref(&rdev->uvd.vcpu_bo);
2980 + dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
2981 + return r;
2982 + }
2983 +
2984 + r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
2985 + &rdev->uvd.gpu_addr);
2986 + if (r) {
2987 + radeon_bo_unreserve(rdev->uvd.vcpu_bo);
2988 + radeon_bo_unref(&rdev->uvd.vcpu_bo);
2989 + dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
2990 + return r;
2991 + }
2992 +
2993 + r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
2994 + if (r) {
2995 + dev_err(rdev->dev, "(%d) UVD map failed\n", r);
2996 + return r;
2997 + }
2998 +
2999 + radeon_bo_unreserve(rdev->uvd.vcpu_bo);
3000 +
3001 + return 0;
3002 +}
3003 +
3004 +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
3005 +{
3006 + rbo->placement.fpfn = 0 >> PAGE_SHIFT;
3007 + rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
3008 +}
3009 +
3010 +void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
3011 +{
3012 + int i, r;
3013 + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
3014 + if (rdev->uvd.filp[i] == filp) {
3015 + uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
3016 + struct radeon_fence *fence;
3017 +
3018 + r = radeon_uvd_get_destroy_msg(rdev,
3019 + R600_RING_TYPE_UVD_INDEX, handle, &fence);
3020 + if (r) {
3021 + DRM_ERROR("Error destroying UVD (%d)!\n", r);
3022 + continue;
3023 + }
3024 +
3025 + radeon_fence_wait(fence, false);
3026 + radeon_fence_unref(&fence);
3027 +
3028 + rdev->uvd.filp[i] = NULL;
3029 + atomic_set(&rdev->uvd.handles[i], 0);
3030 + }
3031 + }
3032 +}
3033 +
3034 +static int radeon_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[])
3035 +{
3036 + unsigned stream_type = msg[4];
3037 + unsigned width = msg[6];
3038 + unsigned height = msg[7];
3039 + unsigned dpb_size = msg[9];
3040 + unsigned pitch = msg[28];
3041 +
3042 + unsigned width_in_mb = width / 16;
3043 + unsigned height_in_mb = ALIGN(height / 16, 2);
3044 +
3045 + unsigned image_size, tmp, min_dpb_size;
3046 +
3047 + image_size = width * height;
3048 + image_size += image_size / 2;
3049 + image_size = ALIGN(image_size, 1024);
3050 +
3051 + switch (stream_type) {
3052 + case 0: /* H264 */
3053 +
3054 + /* reference picture buffer */
3055 + min_dpb_size = image_size * 17;
3056 +
3057 + /* macroblock context buffer */
3058 + min_dpb_size += width_in_mb * height_in_mb * 17 * 192;
3059 +
3060 + /* IT surface buffer */
3061 + min_dpb_size += width_in_mb * height_in_mb * 32;
3062 + break;
3063 +
3064 + case 1: /* VC1 */
3065 +
3066 + /* reference picture buffer */
3067 + min_dpb_size = image_size * 3;
3068 +
3069 + /* CONTEXT_BUFFER */
3070 + min_dpb_size += width_in_mb * height_in_mb * 128;
3071 +
3072 + /* IT surface buffer */
3073 + min_dpb_size += width_in_mb * 64;
3074 +
3075 + /* DB surface buffer */
3076 + min_dpb_size += width_in_mb * 128;
3077 +
3078 + /* BP */
3079 + tmp = max(width_in_mb, height_in_mb);
3080 + min_dpb_size += ALIGN(tmp * 7 * 16, 64);
3081 + break;
3082 +
3083 + case 3: /* MPEG2 */
3084 +
3085 + /* reference picture buffer */
3086 + min_dpb_size = image_size * 3;
3087 + break;
3088 +
3089 + case 4: /* MPEG4 */
3090 +
3091 + /* reference picture buffer */
3092 + min_dpb_size = image_size * 3;
3093 +
3094 + /* CM */
3095 + min_dpb_size += width_in_mb * height_in_mb * 64;
3096 +
3097 + /* IT surface buffer */
3098 + min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64);
3099 + break;
3100 +
3101 + default:
3102 + DRM_ERROR("UVD codec not handled %d!\n", stream_type);
3103 + return -EINVAL;
3104 + }
3105 +
3106 + if (width > pitch) {
3107 + DRM_ERROR("Invalid UVD decoding target pitch!\n");
3108 + return -EINVAL;
3109 + }
3110 +
3111 + if (dpb_size < min_dpb_size) {
3112 + DRM_ERROR("Invalid dpb_size in UVD message (%d / %d)!\n",
3113 + dpb_size, min_dpb_size);
3114 + return -EINVAL;
3115 + }
3116 +
3117 + buf_sizes[0x1] = dpb_size;
3118 + buf_sizes[0x2] = image_size;
3119 + return 0;
3120 +}
3121 +
3122 +static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
3123 + unsigned offset, unsigned buf_sizes[])
3124 +{
3125 + int32_t *msg, msg_type, handle;
3126 + void *ptr;
3127 +
3128 + int i, r;
3129 +
3130 + if (offset & 0x3F) {
3131 + DRM_ERROR("UVD messages must be 64 byte aligned!\n");
3132 + return -EINVAL;
3133 + }
3134 +
3135 + r = radeon_bo_kmap(bo, &ptr);
3136 + if (r)
3137 + return r;
3138 +
3139 + msg = ptr + offset;
3140 +
3141 + msg_type = msg[1];
3142 + handle = msg[2];
3143 +
3144 + if (handle == 0) {
3145 + DRM_ERROR("Invalid UVD handle!\n");
3146 + return -EINVAL;
3147 + }
3148 +
3149 + if (msg_type == 1) {
3150 + /* it's a decode msg, calc buffer sizes */
3151 + r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
3152 + radeon_bo_kunmap(bo);
3153 + if (r)
3154 + return r;
3155 +
3156 + } else if (msg_type == 2) {
3157 + /* it's a destroy msg, free the handle */
3158 + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
3159 + atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
3160 + radeon_bo_kunmap(bo);
3161 + return 0;
3162 + } else {
3163 + /* it's a create msg, no special handling needed */
3164 + radeon_bo_kunmap(bo);
3165 + }
3166 +
3167 + /* create or decode, validate the handle */
3168 + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
3169 + if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
3170 + return 0;
3171 + }
3172 +
3173 + /* handle not found try to alloc a new one */
3174 + for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
3175 + if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
3176 + p->rdev->uvd.filp[i] = p->filp;
3177 + return 0;
3178 + }
3179 + }
3180 +
3181 + DRM_ERROR("No more free UVD handles!\n");
3182 + return -EINVAL;
3183 +}
3184 +
3185 +static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
3186 + int data0, int data1,
3187 + unsigned buf_sizes[])
3188 +{
3189 + struct radeon_cs_chunk *relocs_chunk;
3190 + struct radeon_cs_reloc *reloc;
3191 + unsigned idx, cmd, offset;
3192 + uint64_t start, end;
3193 + int r;
3194 +
3195 + relocs_chunk = &p->chunks[p->chunk_relocs_idx];
3196 + offset = radeon_get_ib_value(p, data0);
3197 + idx = radeon_get_ib_value(p, data1);
3198 + if (idx >= relocs_chunk->length_dw) {
3199 + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
3200 + idx, relocs_chunk->length_dw);
3201 + return -EINVAL;
3202 + }
3203 +
3204 + reloc = p->relocs_ptr[(idx / 4)];
3205 + start = reloc->lobj.gpu_offset;
3206 + end = start + radeon_bo_size(reloc->robj);
3207 + start += offset;
3208 +
3209 + p->ib.ptr[data0] = start & 0xFFFFFFFF;
3210 + p->ib.ptr[data1] = start >> 32;
3211 +
3212 + cmd = radeon_get_ib_value(p, p->idx) >> 1;
3213 +
3214 + if (cmd < 0x4) {
3215 + if ((end - start) < buf_sizes[cmd]) {
3216 + DRM_ERROR("buffer to small (%d / %d)!\n",
3217 + (unsigned)(end - start), buf_sizes[cmd]);
3218 + return -EINVAL;
3219 + }
3220 +
3221 + } else if (cmd != 0x100) {
3222 + DRM_ERROR("invalid UVD command %X!\n", cmd);
3223 + return -EINVAL;
3224 + }
3225 +
3226 + if ((start >> 28) != (end >> 28)) {
3227 + DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
3228 + start, end);
3229 + return -EINVAL;
3230 + }
3231 +
3232 + /* TODO: is this still necessary on NI+ ? */
3233 + if ((cmd == 0 || cmd == 0x3) &&
3234 + (start >> 28) != (p->rdev->uvd.gpu_addr >> 28)) {
3235 + DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
3236 + start, end);
3237 + return -EINVAL;
3238 + }
3239 +
3240 + if (cmd == 0) {
3241 + r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
3242 + if (r)
3243 + return r;
3244 + }
3245 +
3246 + return 0;
3247 +}
3248 +
3249 +static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
3250 + struct radeon_cs_packet *pkt,
3251 + int *data0, int *data1,
3252 + unsigned buf_sizes[])
3253 +{
3254 + int i, r;
3255 +
3256 + p->idx++;
3257 + for (i = 0; i <= pkt->count; ++i) {
3258 + switch (pkt->reg + i*4) {
3259 + case UVD_GPCOM_VCPU_DATA0:
3260 + *data0 = p->idx;
3261 + break;
3262 + case UVD_GPCOM_VCPU_DATA1:
3263 + *data1 = p->idx;
3264 + break;
3265 + case UVD_GPCOM_VCPU_CMD:
3266 + r = radeon_uvd_cs_reloc(p, *data0, *data1, buf_sizes);
3267 + if (r)
3268 + return r;
3269 + break;
3270 + case UVD_ENGINE_CNTL:
3271 + break;
3272 + default:
3273 + DRM_ERROR("Invalid reg 0x%X!\n",
3274 + pkt->reg + i*4);
3275 + return -EINVAL;
3276 + }
3277 + p->idx++;
3278 + }
3279 + return 0;
3280 +}
3281 +
3282 +int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
3283 +{
3284 + struct radeon_cs_packet pkt;
3285 + int r, data0 = 0, data1 = 0;
3286 +
3287 + /* minimum buffer sizes */
3288 + unsigned buf_sizes[] = {
3289 + [0x00000000] = 2048,
3290 + [0x00000001] = 32 * 1024 * 1024,
3291 + [0x00000002] = 2048 * 1152 * 3,
3292 + [0x00000003] = 2048,
3293 + };
3294 +
3295 + if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
3296 + DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
3297 + p->chunks[p->chunk_ib_idx].length_dw);
3298 + return -EINVAL;
3299 + }
3300 +
3301 + if (p->chunk_relocs_idx == -1) {
3302 + DRM_ERROR("No relocation chunk !\n");
3303 + return -EINVAL;
3304 + }
3305 +
3306 +
3307 + do {
3308 + r = radeon_cs_packet_parse(p, &pkt, p->idx);
3309 + if (r)
3310 + return r;
3311 + switch (pkt.type) {
3312 + case RADEON_PACKET_TYPE0:
3313 + r = radeon_uvd_cs_reg(p, &pkt, &data0,
3314 + &data1, buf_sizes);
3315 + if (r)
3316 + return r;
3317 + break;
3318 + case RADEON_PACKET_TYPE2:
3319 + p->idx += pkt.count + 2;
3320 + break;
3321 + default:
3322 + DRM_ERROR("Unknown packet type %d !\n", pkt.type);
3323 + return -EINVAL;
3324 + }
3325 + } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3326 + return 0;
3327 +}
3328 +
3329 +static int radeon_uvd_send_msg(struct radeon_device *rdev,
3330 + int ring, struct radeon_bo *bo,
3331 + struct radeon_fence **fence)
3332 +{
3333 + struct ttm_validate_buffer tv;
3334 + struct list_head head;
3335 + struct radeon_ib ib;
3336 + uint64_t addr;
3337 + int i, r;
3338 +
3339 + memset(&tv, 0, sizeof(tv));
3340 + tv.bo = &bo->tbo;
3341 +
3342 + INIT_LIST_HEAD(&head);
3343 + list_add(&tv.head, &head);
3344 +
3345 + r = ttm_eu_reserve_buffers(&head);
3346 + if (r)
3347 + return r;
3348 +
3349 + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
3350 + radeon_uvd_force_into_uvd_segment(bo);
3351 +
3352 + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
3353 + if (r) {
3354 + ttm_eu_backoff_reservation(&head);
3355 + return r;
3356 + }
3357 +
3358 + r = radeon_ib_get(rdev, ring, &ib, NULL, 16);
3359 + if (r) {
3360 + ttm_eu_backoff_reservation(&head);
3361 + return r;
3362 + }
3363 +
3364 + addr = radeon_bo_gpu_offset(bo);
3365 + ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
3366 + ib.ptr[1] = addr;
3367 + ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
3368 + ib.ptr[3] = addr >> 32;
3369 + ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
3370 + ib.ptr[5] = 0;
3371 + for (i = 6; i < 16; ++i)
3372 + ib.ptr[i] = PACKET2(0);
3373 + ib.length_dw = 16;
3374 +
3375 + r = radeon_ib_schedule(rdev, &ib, NULL);
3376 + if (r) {
3377 + ttm_eu_backoff_reservation(&head);
3378 + return r;
3379 + }
3380 + ttm_eu_fence_buffer_objects(&head, ib.fence);
3381 +
3382 + if (fence)
3383 + *fence = radeon_fence_ref(ib.fence);
3384 +
3385 + radeon_ib_free(rdev, &ib);
3386 + radeon_bo_unref(&bo);
3387 + return 0;
3388 +}
3389 +
3390 +/* multiple fence commands without any stream commands in between can
3391 + crash the vcpu so just try to emmit a dummy create/destroy msg to
3392 + avoid this */
3393 +int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
3394 + uint32_t handle, struct radeon_fence **fence)
3395 +{
3396 + struct radeon_bo *bo;
3397 + uint32_t *msg;
3398 + int r, i;
3399 +
3400 + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
3401 + RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
3402 + if (r)
3403 + return r;
3404 +
3405 + r = radeon_bo_reserve(bo, false);
3406 + if (r) {
3407 + radeon_bo_unref(&bo);
3408 + return r;
3409 + }
3410 +
3411 + r = radeon_bo_kmap(bo, (void **)&msg);
3412 + if (r) {
3413 + radeon_bo_unreserve(bo);
3414 + radeon_bo_unref(&bo);
3415 + return r;
3416 + }
3417 +
3418 + /* stitch together an UVD create msg */
3419 + msg[0] = 0x00000de4;
3420 + msg[1] = 0x00000000;
3421 + msg[2] = handle;
3422 + msg[3] = 0x00000000;
3423 + msg[4] = 0x00000000;
3424 + msg[5] = 0x00000000;
3425 + msg[6] = 0x00000000;
3426 + msg[7] = 0x00000780;
3427 + msg[8] = 0x00000440;
3428 + msg[9] = 0x00000000;
3429 + msg[10] = 0x01b37000;
3430 + for (i = 11; i < 1024; ++i)
3431 + msg[i] = 0x0;
3432 +
3433 + radeon_bo_kunmap(bo);
3434 + radeon_bo_unreserve(bo);
3435 +
3436 + return radeon_uvd_send_msg(rdev, ring, bo, fence);
3437 +}
3438 +
3439 +int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
3440 + uint32_t handle, struct radeon_fence **fence)
3441 +{
3442 + struct radeon_bo *bo;
3443 + uint32_t *msg;
3444 + int r, i;
3445 +
3446 + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
3447 + RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
3448 + if (r)
3449 + return r;
3450 +
3451 + r = radeon_bo_reserve(bo, false);
3452 + if (r) {
3453 + radeon_bo_unref(&bo);
3454 + return r;
3455 + }
3456 +
3457 + r = radeon_bo_kmap(bo, (void **)&msg);
3458 + if (r) {
3459 + radeon_bo_unreserve(bo);
3460 + radeon_bo_unref(&bo);
3461 + return r;
3462 + }
3463 +
3464 + /* stitch together an UVD destroy msg */
3465 + msg[0] = 0x00000de4;
3466 + msg[1] = 0x00000002;
3467 + msg[2] = handle;
3468 + msg[3] = 0x00000000;
3469 + for (i = 4; i < 1024; ++i)
3470 + msg[i] = 0x0;
3471 +
3472 + radeon_bo_kunmap(bo);
3473 + radeon_bo_unreserve(bo);
3474 +
3475 + return radeon_uvd_send_msg(rdev, ring, bo, fence);
3476 +}
3477 +
3478 +static void radeon_uvd_idle_work_handler(struct work_struct *work)
3479 +{
3480 + struct radeon_device *rdev =
3481 + container_of(work, struct radeon_device, uvd.idle_work.work);
3482 +
3483 + if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0)
3484 + radeon_set_uvd_clocks(rdev, 0, 0);
3485 + else
3486 + schedule_delayed_work(&rdev->uvd.idle_work,
3487 + msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
3488 +}
3489 +
3490 +void radeon_uvd_note_usage(struct radeon_device *rdev)
3491 +{
3492 + bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
3493 + set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
3494 + msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
3495 + if (set_clocks)
3496 + radeon_set_uvd_clocks(rdev, 53300, 40000);
3497 +}

  ViewVC Help
Powered by ViewVC 1.1.20