0433ff80cb70983cecd1fd08bb68630fd84934e9
[deliverable/linux.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
28 */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37 * Helper functions for client state checking and fixup
38 */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41 dev_priv,
42 drm_file_t * filp_priv,
43 u32 *offset)
44 {
45 u32 off = *offset;
46 struct drm_radeon_driver_file_fields *radeon_priv;
47
48 /* Hrm ... the story of the offset ... So this function converts
49 * the various ideas of what userland clients might have for an
50 * offset in the card address space into an offset into the card
51 * address space :) So with a sane client, it should just keep
52 * the value intact and just do some boundary checking. However,
53 * not all clients are sane. Some older clients pass us 0 based
54 * offsets relative to the start of the framebuffer and some may
55 * assume the AGP aperture it appended to the framebuffer, so we
56 * try to detect those cases and fix them up.
57 *
58 * Note: It might be a good idea here to make sure the offset lands
59 * in some "allowed" area to protect things like the PCIE GART...
60 */
61
62 /* First, the best case, the offset already lands in either the
63 * framebuffer or the GART mapped space
64 */
65 if ((off >= dev_priv->fb_location &&
66 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67 (off >= dev_priv->gart_vm_start &&
68 off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
69 return 0;
70
71 /* Ok, that didn't happen... now check if we have a zero based
72 * offset that fits in the framebuffer + gart space, apply the
73 * magic offset we get from SETPARAM or calculated from fb_location
74 */
75 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76 radeon_priv = filp_priv->driver_priv;
77 off += radeon_priv->radeon_fb_delta;
78 }
79
80 /* Finally, assume we aimed at a GART offset if beyond the fb */
81 if (off > (dev_priv->fb_location + dev_priv->fb_size))
82 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83 dev_priv->gart_vm_start;
84
85 /* Now recheck and fail if out of bounds */
86 if ((off >= dev_priv->fb_location &&
87 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88 (off >= dev_priv->gart_vm_start &&
89 off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90 DRM_DEBUG("offset fixed up to 0x%x\n", off);
91 *offset = off;
92 return 0;
93 }
94 return DRM_ERR(EINVAL);
95 }
96
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98 dev_priv,
99 drm_file_t * filp_priv,
100 int id, u32 *data)
101 {
102 switch (id) {
103
104 case RADEON_EMIT_PP_MISC:
105 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107 DRM_ERROR("Invalid depth buffer offset\n");
108 return DRM_ERR(EINVAL);
109 }
110 break;
111
112 case RADEON_EMIT_PP_CNTL:
113 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115 DRM_ERROR("Invalid colour buffer offset\n");
116 return DRM_ERR(EINVAL);
117 }
118 break;
119
120 case R200_EMIT_PP_TXOFFSET_0:
121 case R200_EMIT_PP_TXOFFSET_1:
122 case R200_EMIT_PP_TXOFFSET_2:
123 case R200_EMIT_PP_TXOFFSET_3:
124 case R200_EMIT_PP_TXOFFSET_4:
125 case R200_EMIT_PP_TXOFFSET_5:
126 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127 &data[0])) {
128 DRM_ERROR("Invalid R200 texture offset\n");
129 return DRM_ERR(EINVAL);
130 }
131 break;
132
133 case RADEON_EMIT_PP_TXFILTER_0:
134 case RADEON_EMIT_PP_TXFILTER_1:
135 case RADEON_EMIT_PP_TXFILTER_2:
136 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138 DRM_ERROR("Invalid R100 texture offset\n");
139 return DRM_ERR(EINVAL);
140 }
141 break;
142
143 case R200_EMIT_PP_CUBIC_OFFSETS_0:
144 case R200_EMIT_PP_CUBIC_OFFSETS_1:
145 case R200_EMIT_PP_CUBIC_OFFSETS_2:
146 case R200_EMIT_PP_CUBIC_OFFSETS_3:
147 case R200_EMIT_PP_CUBIC_OFFSETS_4:
148 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149 int i;
150 for (i = 0; i < 5; i++) {
151 if (radeon_check_and_fixup_offset(dev_priv,
152 filp_priv,
153 &data[i])) {
154 DRM_ERROR
155 ("Invalid R200 cubic texture offset\n");
156 return DRM_ERR(EINVAL);
157 }
158 }
159 break;
160 }
161
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165 int i;
166 for (i = 0; i < 5; i++) {
167 if (radeon_check_and_fixup_offset(dev_priv,
168 filp_priv,
169 &data[i])) {
170 DRM_ERROR
171 ("Invalid R100 cubic texture offset\n");
172 return DRM_ERR(EINVAL);
173 }
174 }
175 }
176 break;
177
178 case R200_EMIT_VAP_CTL:{
179 RING_LOCALS;
180 BEGIN_RING(2);
181 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
182 ADVANCE_RING();
183 }
184 break;
185
186 case RADEON_EMIT_RB3D_COLORPITCH:
187 case RADEON_EMIT_RE_LINE_PATTERN:
188 case RADEON_EMIT_SE_LINE_WIDTH:
189 case RADEON_EMIT_PP_LUM_MATRIX:
190 case RADEON_EMIT_PP_ROT_MATRIX_0:
191 case RADEON_EMIT_RB3D_STENCILREFMASK:
192 case RADEON_EMIT_SE_VPORT_XSCALE:
193 case RADEON_EMIT_SE_CNTL:
194 case RADEON_EMIT_SE_CNTL_STATUS:
195 case RADEON_EMIT_RE_MISC:
196 case RADEON_EMIT_PP_BORDER_COLOR_0:
197 case RADEON_EMIT_PP_BORDER_COLOR_1:
198 case RADEON_EMIT_PP_BORDER_COLOR_2:
199 case RADEON_EMIT_SE_ZBIAS_FACTOR:
200 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
201 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
202 case R200_EMIT_PP_TXCBLEND_0:
203 case R200_EMIT_PP_TXCBLEND_1:
204 case R200_EMIT_PP_TXCBLEND_2:
205 case R200_EMIT_PP_TXCBLEND_3:
206 case R200_EMIT_PP_TXCBLEND_4:
207 case R200_EMIT_PP_TXCBLEND_5:
208 case R200_EMIT_PP_TXCBLEND_6:
209 case R200_EMIT_PP_TXCBLEND_7:
210 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
211 case R200_EMIT_TFACTOR_0:
212 case R200_EMIT_VTX_FMT_0:
213 case R200_EMIT_MATRIX_SELECT_0:
214 case R200_EMIT_TEX_PROC_CTL_2:
215 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
216 case R200_EMIT_PP_TXFILTER_0:
217 case R200_EMIT_PP_TXFILTER_1:
218 case R200_EMIT_PP_TXFILTER_2:
219 case R200_EMIT_PP_TXFILTER_3:
220 case R200_EMIT_PP_TXFILTER_4:
221 case R200_EMIT_PP_TXFILTER_5:
222 case R200_EMIT_VTE_CNTL:
223 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
224 case R200_EMIT_PP_TAM_DEBUG3:
225 case R200_EMIT_PP_CNTL_X:
226 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
227 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
228 case R200_EMIT_RE_SCISSOR_TL_0:
229 case R200_EMIT_RE_SCISSOR_TL_1:
230 case R200_EMIT_RE_SCISSOR_TL_2:
231 case R200_EMIT_SE_VAP_CNTL_STATUS:
232 case R200_EMIT_SE_VTX_STATE_CNTL:
233 case R200_EMIT_RE_POINTSIZE:
234 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
235 case R200_EMIT_PP_CUBIC_FACES_0:
236 case R200_EMIT_PP_CUBIC_FACES_1:
237 case R200_EMIT_PP_CUBIC_FACES_2:
238 case R200_EMIT_PP_CUBIC_FACES_3:
239 case R200_EMIT_PP_CUBIC_FACES_4:
240 case R200_EMIT_PP_CUBIC_FACES_5:
241 case RADEON_EMIT_PP_TEX_SIZE_0:
242 case RADEON_EMIT_PP_TEX_SIZE_1:
243 case RADEON_EMIT_PP_TEX_SIZE_2:
244 case R200_EMIT_RB3D_BLENDCOLOR:
245 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
246 case RADEON_EMIT_PP_CUBIC_FACES_0:
247 case RADEON_EMIT_PP_CUBIC_FACES_1:
248 case RADEON_EMIT_PP_CUBIC_FACES_2:
249 case R200_EMIT_PP_TRI_PERF_CNTL:
250 case R200_EMIT_PP_AFS_0:
251 case R200_EMIT_PP_AFS_1:
252 case R200_EMIT_ATF_TFACTOR:
253 case R200_EMIT_PP_TXCTLALL_0:
254 case R200_EMIT_PP_TXCTLALL_1:
255 case R200_EMIT_PP_TXCTLALL_2:
256 case R200_EMIT_PP_TXCTLALL_3:
257 case R200_EMIT_PP_TXCTLALL_4:
258 case R200_EMIT_PP_TXCTLALL_5:
259 case R200_EMIT_VAP_PVS_CNTL:
260 /* These packets don't contain memory offsets */
261 break;
262
263 default:
264 DRM_ERROR("Unknown state packet ID %d\n", id);
265 return DRM_ERR(EINVAL);
266 }
267
268 return 0;
269 }
270
271 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
272 dev_priv,
273 drm_file_t *filp_priv,
274 drm_radeon_kcmd_buffer_t *
275 cmdbuf,
276 unsigned int *cmdsz)
277 {
278 u32 *cmd = (u32 *) cmdbuf->buf;
279
280 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
281
282 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
283 DRM_ERROR("Not a type 3 packet\n");
284 return DRM_ERR(EINVAL);
285 }
286
287 if (4 * *cmdsz > cmdbuf->bufsz) {
288 DRM_ERROR("Packet size larger than size of data provided\n");
289 return DRM_ERR(EINVAL);
290 }
291
292 /* Check client state and fix it up if necessary */
293 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
294 u32 offset;
295
296 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
297 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
298 offset = cmd[2] << 10;
299 if (radeon_check_and_fixup_offset
300 (dev_priv, filp_priv, &offset)) {
301 DRM_ERROR("Invalid first packet offset\n");
302 return DRM_ERR(EINVAL);
303 }
304 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
305 }
306
307 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
308 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
309 offset = cmd[3] << 10;
310 if (radeon_check_and_fixup_offset
311 (dev_priv, filp_priv, &offset)) {
312 DRM_ERROR("Invalid second packet offset\n");
313 return DRM_ERR(EINVAL);
314 }
315 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
316 }
317 }
318
319 return 0;
320 }
321
322 /* ================================================================
323 * CP hardware state programming functions
324 */
325
326 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
327 drm_clip_rect_t * box)
328 {
329 RING_LOCALS;
330
331 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
332 box->x1, box->y1, box->x2, box->y2);
333
334 BEGIN_RING(4);
335 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
336 OUT_RING((box->y1 << 16) | box->x1);
337 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
338 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
339 ADVANCE_RING();
340 }
341
342 /* Emit 1.1 state
343 */
344 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
345 drm_file_t * filp_priv,
346 drm_radeon_context_regs_t * ctx,
347 drm_radeon_texture_regs_t * tex,
348 unsigned int dirty)
349 {
350 RING_LOCALS;
351 DRM_DEBUG("dirty=0x%08x\n", dirty);
352
353 if (dirty & RADEON_UPLOAD_CONTEXT) {
354 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
355 &ctx->rb3d_depthoffset)) {
356 DRM_ERROR("Invalid depth buffer offset\n");
357 return DRM_ERR(EINVAL);
358 }
359
360 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
361 &ctx->rb3d_coloroffset)) {
362 DRM_ERROR("Invalid depth buffer offset\n");
363 return DRM_ERR(EINVAL);
364 }
365
366 BEGIN_RING(14);
367 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
368 OUT_RING(ctx->pp_misc);
369 OUT_RING(ctx->pp_fog_color);
370 OUT_RING(ctx->re_solid_color);
371 OUT_RING(ctx->rb3d_blendcntl);
372 OUT_RING(ctx->rb3d_depthoffset);
373 OUT_RING(ctx->rb3d_depthpitch);
374 OUT_RING(ctx->rb3d_zstencilcntl);
375 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
376 OUT_RING(ctx->pp_cntl);
377 OUT_RING(ctx->rb3d_cntl);
378 OUT_RING(ctx->rb3d_coloroffset);
379 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
380 OUT_RING(ctx->rb3d_colorpitch);
381 ADVANCE_RING();
382 }
383
384 if (dirty & RADEON_UPLOAD_VERTFMT) {
385 BEGIN_RING(2);
386 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
387 OUT_RING(ctx->se_coord_fmt);
388 ADVANCE_RING();
389 }
390
391 if (dirty & RADEON_UPLOAD_LINE) {
392 BEGIN_RING(5);
393 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
394 OUT_RING(ctx->re_line_pattern);
395 OUT_RING(ctx->re_line_state);
396 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
397 OUT_RING(ctx->se_line_width);
398 ADVANCE_RING();
399 }
400
401 if (dirty & RADEON_UPLOAD_BUMPMAP) {
402 BEGIN_RING(5);
403 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
404 OUT_RING(ctx->pp_lum_matrix);
405 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
406 OUT_RING(ctx->pp_rot_matrix_0);
407 OUT_RING(ctx->pp_rot_matrix_1);
408 ADVANCE_RING();
409 }
410
411 if (dirty & RADEON_UPLOAD_MASKS) {
412 BEGIN_RING(4);
413 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
414 OUT_RING(ctx->rb3d_stencilrefmask);
415 OUT_RING(ctx->rb3d_ropcntl);
416 OUT_RING(ctx->rb3d_planemask);
417 ADVANCE_RING();
418 }
419
420 if (dirty & RADEON_UPLOAD_VIEWPORT) {
421 BEGIN_RING(7);
422 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
423 OUT_RING(ctx->se_vport_xscale);
424 OUT_RING(ctx->se_vport_xoffset);
425 OUT_RING(ctx->se_vport_yscale);
426 OUT_RING(ctx->se_vport_yoffset);
427 OUT_RING(ctx->se_vport_zscale);
428 OUT_RING(ctx->se_vport_zoffset);
429 ADVANCE_RING();
430 }
431
432 if (dirty & RADEON_UPLOAD_SETUP) {
433 BEGIN_RING(4);
434 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
435 OUT_RING(ctx->se_cntl);
436 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
437 OUT_RING(ctx->se_cntl_status);
438 ADVANCE_RING();
439 }
440
441 if (dirty & RADEON_UPLOAD_MISC) {
442 BEGIN_RING(2);
443 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
444 OUT_RING(ctx->re_misc);
445 ADVANCE_RING();
446 }
447
448 if (dirty & RADEON_UPLOAD_TEX0) {
449 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
450 &tex[0].pp_txoffset)) {
451 DRM_ERROR("Invalid texture offset for unit 0\n");
452 return DRM_ERR(EINVAL);
453 }
454
455 BEGIN_RING(9);
456 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
457 OUT_RING(tex[0].pp_txfilter);
458 OUT_RING(tex[0].pp_txformat);
459 OUT_RING(tex[0].pp_txoffset);
460 OUT_RING(tex[0].pp_txcblend);
461 OUT_RING(tex[0].pp_txablend);
462 OUT_RING(tex[0].pp_tfactor);
463 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
464 OUT_RING(tex[0].pp_border_color);
465 ADVANCE_RING();
466 }
467
468 if (dirty & RADEON_UPLOAD_TEX1) {
469 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
470 &tex[1].pp_txoffset)) {
471 DRM_ERROR("Invalid texture offset for unit 1\n");
472 return DRM_ERR(EINVAL);
473 }
474
475 BEGIN_RING(9);
476 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
477 OUT_RING(tex[1].pp_txfilter);
478 OUT_RING(tex[1].pp_txformat);
479 OUT_RING(tex[1].pp_txoffset);
480 OUT_RING(tex[1].pp_txcblend);
481 OUT_RING(tex[1].pp_txablend);
482 OUT_RING(tex[1].pp_tfactor);
483 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
484 OUT_RING(tex[1].pp_border_color);
485 ADVANCE_RING();
486 }
487
488 if (dirty & RADEON_UPLOAD_TEX2) {
489 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
490 &tex[2].pp_txoffset)) {
491 DRM_ERROR("Invalid texture offset for unit 2\n");
492 return DRM_ERR(EINVAL);
493 }
494
495 BEGIN_RING(9);
496 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
497 OUT_RING(tex[2].pp_txfilter);
498 OUT_RING(tex[2].pp_txformat);
499 OUT_RING(tex[2].pp_txoffset);
500 OUT_RING(tex[2].pp_txcblend);
501 OUT_RING(tex[2].pp_txablend);
502 OUT_RING(tex[2].pp_tfactor);
503 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
504 OUT_RING(tex[2].pp_border_color);
505 ADVANCE_RING();
506 }
507
508 return 0;
509 }
510
511 /* Emit 1.2 state
512 */
513 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
514 drm_file_t * filp_priv,
515 drm_radeon_state_t * state)
516 {
517 RING_LOCALS;
518
519 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
520 BEGIN_RING(3);
521 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
522 OUT_RING(state->context2.se_zbias_factor);
523 OUT_RING(state->context2.se_zbias_constant);
524 ADVANCE_RING();
525 }
526
527 return radeon_emit_state(dev_priv, filp_priv, &state->context,
528 state->tex, state->dirty);
529 }
530
531 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
532 * 1.3 cmdbuffers allow all previous state to be updated as well as
533 * the tcl scalar and vector areas.
534 */
535 static struct {
536 int start;
537 int len;
538 const char *name;
539 } packet[RADEON_MAX_STATE_PACKETS] = {
540 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
541 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
542 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
543 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
544 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
545 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
546 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
547 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
548 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
549 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
550 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
551 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
552 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
553 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
554 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
555 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
556 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
557 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
558 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
559 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
560 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
561 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
562 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
563 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
564 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
565 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
566 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
567 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
568 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
569 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
570 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
571 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
572 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
573 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
574 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
575 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
576 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
577 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
578 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
579 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
580 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
581 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
582 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
583 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
584 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
585 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
586 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
587 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
588 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
589 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
590 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
591 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
592 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
593 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
594 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
595 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
596 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
597 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
598 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
599 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
600 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
601 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
602 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
603 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
604 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
605 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
606 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
607 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
608 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
609 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
610 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
611 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
612 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
613 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
614 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
615 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
616 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
617 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
618 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
619 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
620 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
621 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
622 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
623 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
624 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
625 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
626 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
627 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
628 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
629 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
630 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
631 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
632 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
633 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
634 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
635 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
636 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
637 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
638 };
639
640 /* ================================================================
641 * Performance monitoring functions
642 */
643
644 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
645 int x, int y, int w, int h, int r, int g, int b)
646 {
647 u32 color;
648 RING_LOCALS;
649
650 x += dev_priv->sarea_priv->boxes[0].x1;
651 y += dev_priv->sarea_priv->boxes[0].y1;
652
653 switch (dev_priv->color_fmt) {
654 case RADEON_COLOR_FORMAT_RGB565:
655 color = (((r & 0xf8) << 8) |
656 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
657 break;
658 case RADEON_COLOR_FORMAT_ARGB8888:
659 default:
660 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
661 break;
662 }
663
664 BEGIN_RING(4);
665 RADEON_WAIT_UNTIL_3D_IDLE();
666 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
667 OUT_RING(0xffffffff);
668 ADVANCE_RING();
669
670 BEGIN_RING(6);
671
672 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
673 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
674 RADEON_GMC_BRUSH_SOLID_COLOR |
675 (dev_priv->color_fmt << 8) |
676 RADEON_GMC_SRC_DATATYPE_COLOR |
677 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
678
679 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
680 OUT_RING(dev_priv->front_pitch_offset);
681 } else {
682 OUT_RING(dev_priv->back_pitch_offset);
683 }
684
685 OUT_RING(color);
686
687 OUT_RING((x << 16) | y);
688 OUT_RING((w << 16) | h);
689
690 ADVANCE_RING();
691 }
692
693 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
694 {
695 /* Collapse various things into a wait flag -- trying to
696 * guess if userspase slept -- better just to have them tell us.
697 */
698 if (dev_priv->stats.last_frame_reads > 1 ||
699 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
700 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
701 }
702
703 if (dev_priv->stats.freelist_loops) {
704 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
705 }
706
707 /* Purple box for page flipping
708 */
709 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
710 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
711
712 /* Red box if we have to wait for idle at any point
713 */
714 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
715 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
716
717 /* Blue box: lost context?
718 */
719
720 /* Yellow box for texture swaps
721 */
722 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
723 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
724
725 /* Green box if hardware never idles (as far as we can tell)
726 */
727 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
728 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
729
730 /* Draw bars indicating number of buffers allocated
731 * (not a great measure, easily confused)
732 */
733 if (dev_priv->stats.requested_bufs) {
734 if (dev_priv->stats.requested_bufs > 100)
735 dev_priv->stats.requested_bufs = 100;
736
737 radeon_clear_box(dev_priv, 4, 16,
738 dev_priv->stats.requested_bufs, 4,
739 196, 128, 128);
740 }
741
742 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
743
744 }
745
746 /* ================================================================
747 * CP command dispatch functions
748 */
749
750 static void radeon_cp_dispatch_clear(drm_device_t * dev,
751 drm_radeon_clear_t * clear,
752 drm_radeon_clear_rect_t * depth_boxes)
753 {
754 drm_radeon_private_t *dev_priv = dev->dev_private;
755 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
756 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
757 int nbox = sarea_priv->nbox;
758 drm_clip_rect_t *pbox = sarea_priv->boxes;
759 unsigned int flags = clear->flags;
760 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
761 int i;
762 RING_LOCALS;
763 DRM_DEBUG("flags = 0x%x\n", flags);
764
765 dev_priv->stats.clears++;
766
767 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
768 unsigned int tmp = flags;
769
770 flags &= ~(RADEON_FRONT | RADEON_BACK);
771 if (tmp & RADEON_FRONT)
772 flags |= RADEON_BACK;
773 if (tmp & RADEON_BACK)
774 flags |= RADEON_FRONT;
775 }
776
777 if (flags & (RADEON_FRONT | RADEON_BACK)) {
778
779 BEGIN_RING(4);
780
781 /* Ensure the 3D stream is idle before doing a
782 * 2D fill to clear the front or back buffer.
783 */
784 RADEON_WAIT_UNTIL_3D_IDLE();
785
786 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
787 OUT_RING(clear->color_mask);
788
789 ADVANCE_RING();
790
791 /* Make sure we restore the 3D state next time.
792 */
793 dev_priv->sarea_priv->ctx_owner = 0;
794
795 for (i = 0; i < nbox; i++) {
796 int x = pbox[i].x1;
797 int y = pbox[i].y1;
798 int w = pbox[i].x2 - x;
799 int h = pbox[i].y2 - y;
800
801 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
802 x, y, w, h, flags);
803
804 if (flags & RADEON_FRONT) {
805 BEGIN_RING(6);
806
807 OUT_RING(CP_PACKET3
808 (RADEON_CNTL_PAINT_MULTI, 4));
809 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
810 RADEON_GMC_BRUSH_SOLID_COLOR |
811 (dev_priv->
812 color_fmt << 8) |
813 RADEON_GMC_SRC_DATATYPE_COLOR |
814 RADEON_ROP3_P |
815 RADEON_GMC_CLR_CMP_CNTL_DIS);
816
817 OUT_RING(dev_priv->front_pitch_offset);
818 OUT_RING(clear->clear_color);
819
820 OUT_RING((x << 16) | y);
821 OUT_RING((w << 16) | h);
822
823 ADVANCE_RING();
824 }
825
826 if (flags & RADEON_BACK) {
827 BEGIN_RING(6);
828
829 OUT_RING(CP_PACKET3
830 (RADEON_CNTL_PAINT_MULTI, 4));
831 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
832 RADEON_GMC_BRUSH_SOLID_COLOR |
833 (dev_priv->
834 color_fmt << 8) |
835 RADEON_GMC_SRC_DATATYPE_COLOR |
836 RADEON_ROP3_P |
837 RADEON_GMC_CLR_CMP_CNTL_DIS);
838
839 OUT_RING(dev_priv->back_pitch_offset);
840 OUT_RING(clear->clear_color);
841
842 OUT_RING((x << 16) | y);
843 OUT_RING((w << 16) | h);
844
845 ADVANCE_RING();
846 }
847 }
848 }
849
850 /* hyper z clear */
851 /* no docs available, based on reverse engeneering by Stephane Marchesin */
852 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
853 && (flags & RADEON_CLEAR_FASTZ)) {
854
855 int i;
856 int depthpixperline =
857 dev_priv->depth_fmt ==
858 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
859 2) : (dev_priv->
860 depth_pitch / 4);
861
862 u32 clearmask;
863
864 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
865 ((clear->depth_mask & 0xff) << 24);
866
867 /* Make sure we restore the 3D state next time.
868 * we haven't touched any "normal" state - still need this?
869 */
870 dev_priv->sarea_priv->ctx_owner = 0;
871
872 if ((dev_priv->flags & CHIP_HAS_HIERZ)
873 && (flags & RADEON_USE_HIERZ)) {
874 /* FIXME : reverse engineer that for Rx00 cards */
875 /* FIXME : the mask supposedly contains low-res z values. So can't set
876 just to the max (0xff? or actually 0x3fff?), need to take z clear
877 value into account? */
878 /* pattern seems to work for r100, though get slight
879 rendering errors with glxgears. If hierz is not enabled for r100,
880 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
881 other ones are ignored, and the same clear mask can be used. That's
882 very different behaviour than R200 which needs different clear mask
883 and different number of tiles to clear if hierz is enabled or not !?!
884 */
885 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
886 } else {
887 /* clear mask : chooses the clearing pattern.
888 rv250: could be used to clear only parts of macrotiles
889 (but that would get really complicated...)?
890 bit 0 and 1 (either or both of them ?!?!) are used to
891 not clear tile (or maybe one of the bits indicates if the tile is
892 compressed or not), bit 2 and 3 to not clear tile 1,...,.
893 Pattern is as follows:
894 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
895 bits -------------------------------------------------
896 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
897 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
898 covers 256 pixels ?!?
899 */
900 clearmask = 0x0;
901 }
902
903 BEGIN_RING(8);
904 RADEON_WAIT_UNTIL_2D_IDLE();
905 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
906 tempRB3D_DEPTHCLEARVALUE);
907 /* what offset is this exactly ? */
908 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
909 /* need ctlstat, otherwise get some strange black flickering */
910 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
911 RADEON_RB3D_ZC_FLUSH_ALL);
912 ADVANCE_RING();
913
914 for (i = 0; i < nbox; i++) {
915 int tileoffset, nrtilesx, nrtilesy, j;
916 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
917 if ((dev_priv->flags & CHIP_HAS_HIERZ)
918 && !(dev_priv->microcode_version == UCODE_R200)) {
919 /* FIXME : figure this out for r200 (when hierz is enabled). Or
920 maybe r200 actually doesn't need to put the low-res z value into
921 the tile cache like r100, but just needs to clear the hi-level z-buffer?
922 Works for R100, both with hierz and without.
923 R100 seems to operate on 2x1 8x8 tiles, but...
924 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
925 problematic with resolutions which are not 64 pix aligned? */
926 tileoffset =
927 ((pbox[i].y1 >> 3) * depthpixperline +
928 pbox[i].x1) >> 6;
929 nrtilesx =
930 ((pbox[i].x2 & ~63) -
931 (pbox[i].x1 & ~63)) >> 4;
932 nrtilesy =
933 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
934 for (j = 0; j <= nrtilesy; j++) {
935 BEGIN_RING(4);
936 OUT_RING(CP_PACKET3
937 (RADEON_3D_CLEAR_ZMASK, 2));
938 /* first tile */
939 OUT_RING(tileoffset * 8);
940 /* the number of tiles to clear */
941 OUT_RING(nrtilesx + 4);
942 /* clear mask : chooses the clearing pattern. */
943 OUT_RING(clearmask);
944 ADVANCE_RING();
945 tileoffset += depthpixperline >> 6;
946 }
947 } else if (dev_priv->microcode_version == UCODE_R200) {
948 /* works for rv250. */
949 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
950 tileoffset =
951 ((pbox[i].y1 >> 3) * depthpixperline +
952 pbox[i].x1) >> 5;
953 nrtilesx =
954 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
955 nrtilesy =
956 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
957 for (j = 0; j <= nrtilesy; j++) {
958 BEGIN_RING(4);
959 OUT_RING(CP_PACKET3
960 (RADEON_3D_CLEAR_ZMASK, 2));
961 /* first tile */
962 /* judging by the first tile offset needed, could possibly
963 directly address/clear 4x4 tiles instead of 8x2 * 4x4
964 macro tiles, though would still need clear mask for
965 right/bottom if truely 4x4 granularity is desired ? */
966 OUT_RING(tileoffset * 16);
967 /* the number of tiles to clear */
968 OUT_RING(nrtilesx + 1);
969 /* clear mask : chooses the clearing pattern. */
970 OUT_RING(clearmask);
971 ADVANCE_RING();
972 tileoffset += depthpixperline >> 5;
973 }
974 } else { /* rv 100 */
975 /* rv100 might not need 64 pix alignment, who knows */
976 /* offsets are, hmm, weird */
977 tileoffset =
978 ((pbox[i].y1 >> 4) * depthpixperline +
979 pbox[i].x1) >> 6;
980 nrtilesx =
981 ((pbox[i].x2 & ~63) -
982 (pbox[i].x1 & ~63)) >> 4;
983 nrtilesy =
984 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
985 for (j = 0; j <= nrtilesy; j++) {
986 BEGIN_RING(4);
987 OUT_RING(CP_PACKET3
988 (RADEON_3D_CLEAR_ZMASK, 2));
989 OUT_RING(tileoffset * 128);
990 /* the number of tiles to clear */
991 OUT_RING(nrtilesx + 4);
992 /* clear mask : chooses the clearing pattern. */
993 OUT_RING(clearmask);
994 ADVANCE_RING();
995 tileoffset += depthpixperline >> 6;
996 }
997 }
998 }
999
1000 /* TODO don't always clear all hi-level z tiles */
1001 if ((dev_priv->flags & CHIP_HAS_HIERZ)
1002 && (dev_priv->microcode_version == UCODE_R200)
1003 && (flags & RADEON_USE_HIERZ))
1004 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1005 /* FIXME : the mask supposedly contains low-res z values. So can't set
1006 just to the max (0xff? or actually 0x3fff?), need to take z clear
1007 value into account? */
1008 {
1009 BEGIN_RING(4);
1010 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1011 OUT_RING(0x0); /* First tile */
1012 OUT_RING(0x3cc0);
1013 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1014 ADVANCE_RING();
1015 }
1016 }
1017
1018 /* We have to clear the depth and/or stencil buffers by
1019 * rendering a quad into just those buffers. Thus, we have to
1020 * make sure the 3D engine is configured correctly.
1021 */
1022 else if ((dev_priv->microcode_version == UCODE_R200) &&
1023 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1024
1025 int tempPP_CNTL;
1026 int tempRE_CNTL;
1027 int tempRB3D_CNTL;
1028 int tempRB3D_ZSTENCILCNTL;
1029 int tempRB3D_STENCILREFMASK;
1030 int tempRB3D_PLANEMASK;
1031 int tempSE_CNTL;
1032 int tempSE_VTE_CNTL;
1033 int tempSE_VTX_FMT_0;
1034 int tempSE_VTX_FMT_1;
1035 int tempSE_VAP_CNTL;
1036 int tempRE_AUX_SCISSOR_CNTL;
1037
1038 tempPP_CNTL = 0;
1039 tempRE_CNTL = 0;
1040
1041 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1042
1043 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1044 tempRB3D_STENCILREFMASK = 0x0;
1045
1046 tempSE_CNTL = depth_clear->se_cntl;
1047
1048 /* Disable TCL */
1049
1050 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1051 (0x9 <<
1052 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1053
1054 tempRB3D_PLANEMASK = 0x0;
1055
1056 tempRE_AUX_SCISSOR_CNTL = 0x0;
1057
1058 tempSE_VTE_CNTL =
1059 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1060
1061 /* Vertex format (X, Y, Z, W) */
1062 tempSE_VTX_FMT_0 =
1063 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1064 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1065 tempSE_VTX_FMT_1 = 0x0;
1066
1067 /*
1068 * Depth buffer specific enables
1069 */
1070 if (flags & RADEON_DEPTH) {
1071 /* Enable depth buffer */
1072 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1073 } else {
1074 /* Disable depth buffer */
1075 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1076 }
1077
1078 /*
1079 * Stencil buffer specific enables
1080 */
1081 if (flags & RADEON_STENCIL) {
1082 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1083 tempRB3D_STENCILREFMASK = clear->depth_mask;
1084 } else {
1085 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1086 tempRB3D_STENCILREFMASK = 0x00000000;
1087 }
1088
1089 if (flags & RADEON_USE_COMP_ZBUF) {
1090 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1091 RADEON_Z_DECOMPRESSION_ENABLE;
1092 }
1093 if (flags & RADEON_USE_HIERZ) {
1094 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1095 }
1096
1097 BEGIN_RING(26);
1098 RADEON_WAIT_UNTIL_2D_IDLE();
1099
1100 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1101 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1102 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1103 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1104 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1105 tempRB3D_STENCILREFMASK);
1106 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1107 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1108 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1109 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1110 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1111 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1112 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1113 ADVANCE_RING();
1114
1115 /* Make sure we restore the 3D state next time.
1116 */
1117 dev_priv->sarea_priv->ctx_owner = 0;
1118
1119 for (i = 0; i < nbox; i++) {
1120
1121 /* Funny that this should be required --
1122 * sets top-left?
1123 */
1124 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1125
1126 BEGIN_RING(14);
1127 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1128 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1129 RADEON_PRIM_WALK_RING |
1130 (3 << RADEON_NUM_VERTICES_SHIFT)));
1131 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1133 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1134 OUT_RING(0x3f800000);
1135 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1136 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1137 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1138 OUT_RING(0x3f800000);
1139 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1140 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1141 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1142 OUT_RING(0x3f800000);
1143 ADVANCE_RING();
1144 }
1145 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1146
1147 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1148
1149 rb3d_cntl = depth_clear->rb3d_cntl;
1150
1151 if (flags & RADEON_DEPTH) {
1152 rb3d_cntl |= RADEON_Z_ENABLE;
1153 } else {
1154 rb3d_cntl &= ~RADEON_Z_ENABLE;
1155 }
1156
1157 if (flags & RADEON_STENCIL) {
1158 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1159 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1160 } else {
1161 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1162 rb3d_stencilrefmask = 0x00000000;
1163 }
1164
1165 if (flags & RADEON_USE_COMP_ZBUF) {
1166 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1167 RADEON_Z_DECOMPRESSION_ENABLE;
1168 }
1169 if (flags & RADEON_USE_HIERZ) {
1170 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1171 }
1172
1173 BEGIN_RING(13);
1174 RADEON_WAIT_UNTIL_2D_IDLE();
1175
1176 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1177 OUT_RING(0x00000000);
1178 OUT_RING(rb3d_cntl);
1179
1180 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1181 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1182 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1183 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1184 ADVANCE_RING();
1185
1186 /* Make sure we restore the 3D state next time.
1187 */
1188 dev_priv->sarea_priv->ctx_owner = 0;
1189
1190 for (i = 0; i < nbox; i++) {
1191
1192 /* Funny that this should be required --
1193 * sets top-left?
1194 */
1195 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1196
1197 BEGIN_RING(15);
1198
1199 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1200 OUT_RING(RADEON_VTX_Z_PRESENT |
1201 RADEON_VTX_PKCOLOR_PRESENT);
1202 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1203 RADEON_PRIM_WALK_RING |
1204 RADEON_MAOS_ENABLE |
1205 RADEON_VTX_FMT_RADEON_MODE |
1206 (3 << RADEON_NUM_VERTICES_SHIFT)));
1207
1208 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1210 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1211 OUT_RING(0x0);
1212
1213 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1214 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1215 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1216 OUT_RING(0x0);
1217
1218 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1219 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1220 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1221 OUT_RING(0x0);
1222
1223 ADVANCE_RING();
1224 }
1225 }
1226
1227 /* Increment the clear counter. The client-side 3D driver must
1228 * wait on this value before performing the clear ioctl. We
1229 * need this because the card's so damned fast...
1230 */
1231 dev_priv->sarea_priv->last_clear++;
1232
1233 BEGIN_RING(4);
1234
1235 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1236 RADEON_WAIT_UNTIL_IDLE();
1237
1238 ADVANCE_RING();
1239 }
1240
1241 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1242 {
1243 drm_radeon_private_t *dev_priv = dev->dev_private;
1244 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1245 int nbox = sarea_priv->nbox;
1246 drm_clip_rect_t *pbox = sarea_priv->boxes;
1247 int i;
1248 RING_LOCALS;
1249 DRM_DEBUG("\n");
1250
1251 /* Do some trivial performance monitoring...
1252 */
1253 if (dev_priv->do_boxes)
1254 radeon_cp_performance_boxes(dev_priv);
1255
1256 /* Wait for the 3D stream to idle before dispatching the bitblt.
1257 * This will prevent data corruption between the two streams.
1258 */
1259 BEGIN_RING(2);
1260
1261 RADEON_WAIT_UNTIL_3D_IDLE();
1262
1263 ADVANCE_RING();
1264
1265 for (i = 0; i < nbox; i++) {
1266 int x = pbox[i].x1;
1267 int y = pbox[i].y1;
1268 int w = pbox[i].x2 - x;
1269 int h = pbox[i].y2 - y;
1270
1271 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1272
1273 BEGIN_RING(7);
1274
1275 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1276 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1277 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1278 RADEON_GMC_BRUSH_NONE |
1279 (dev_priv->color_fmt << 8) |
1280 RADEON_GMC_SRC_DATATYPE_COLOR |
1281 RADEON_ROP3_S |
1282 RADEON_DP_SRC_SOURCE_MEMORY |
1283 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1284
1285 /* Make this work even if front & back are flipped:
1286 */
1287 if (dev_priv->current_page == 0) {
1288 OUT_RING(dev_priv->back_pitch_offset);
1289 OUT_RING(dev_priv->front_pitch_offset);
1290 } else {
1291 OUT_RING(dev_priv->front_pitch_offset);
1292 OUT_RING(dev_priv->back_pitch_offset);
1293 }
1294
1295 OUT_RING((x << 16) | y);
1296 OUT_RING((x << 16) | y);
1297 OUT_RING((w << 16) | h);
1298
1299 ADVANCE_RING();
1300 }
1301
1302 /* Increment the frame counter. The client-side 3D driver must
1303 * throttle the framerate by waiting for this value before
1304 * performing the swapbuffer ioctl.
1305 */
1306 dev_priv->sarea_priv->last_frame++;
1307
1308 BEGIN_RING(4);
1309
1310 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1311 RADEON_WAIT_UNTIL_2D_IDLE();
1312
1313 ADVANCE_RING();
1314 }
1315
1316 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1317 {
1318 drm_radeon_private_t *dev_priv = dev->dev_private;
1319 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1320 int offset = (dev_priv->current_page == 1)
1321 ? dev_priv->front_offset : dev_priv->back_offset;
1322 RING_LOCALS;
1323 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1324 __FUNCTION__,
1325 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1326
1327 /* Do some trivial performance monitoring...
1328 */
1329 if (dev_priv->do_boxes) {
1330 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1331 radeon_cp_performance_boxes(dev_priv);
1332 }
1333
1334 /* Update the frame offsets for both CRTCs
1335 */
1336 BEGIN_RING(6);
1337
1338 RADEON_WAIT_UNTIL_3D_IDLE();
1339 OUT_RING_REG(RADEON_CRTC_OFFSET,
1340 ((sarea->frame.y * dev_priv->front_pitch +
1341 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1342 + offset);
1343 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1344 + offset);
1345
1346 ADVANCE_RING();
1347
1348 /* Increment the frame counter. The client-side 3D driver must
1349 * throttle the framerate by waiting for this value before
1350 * performing the swapbuffer ioctl.
1351 */
1352 dev_priv->sarea_priv->last_frame++;
1353 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1354 1 - dev_priv->current_page;
1355
1356 BEGIN_RING(2);
1357
1358 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1359
1360 ADVANCE_RING();
1361 }
1362
1363 static int bad_prim_vertex_nr(int primitive, int nr)
1364 {
1365 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1366 case RADEON_PRIM_TYPE_NONE:
1367 case RADEON_PRIM_TYPE_POINT:
1368 return nr < 1;
1369 case RADEON_PRIM_TYPE_LINE:
1370 return (nr & 1) || nr == 0;
1371 case RADEON_PRIM_TYPE_LINE_STRIP:
1372 return nr < 2;
1373 case RADEON_PRIM_TYPE_TRI_LIST:
1374 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1375 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1376 case RADEON_PRIM_TYPE_RECT_LIST:
1377 return nr % 3 || nr == 0;
1378 case RADEON_PRIM_TYPE_TRI_FAN:
1379 case RADEON_PRIM_TYPE_TRI_STRIP:
1380 return nr < 3;
1381 default:
1382 return 1;
1383 }
1384 }
1385
1386 typedef struct {
1387 unsigned int start;
1388 unsigned int finish;
1389 unsigned int prim;
1390 unsigned int numverts;
1391 unsigned int offset;
1392 unsigned int vc_format;
1393 } drm_radeon_tcl_prim_t;
1394
1395 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1396 drm_buf_t * buf,
1397 drm_radeon_tcl_prim_t * prim)
1398 {
1399 drm_radeon_private_t *dev_priv = dev->dev_private;
1400 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1401 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1402 int numverts = (int)prim->numverts;
1403 int nbox = sarea_priv->nbox;
1404 int i = 0;
1405 RING_LOCALS;
1406
1407 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1408 prim->prim,
1409 prim->vc_format, prim->start, prim->finish, prim->numverts);
1410
1411 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1412 DRM_ERROR("bad prim %x numverts %d\n",
1413 prim->prim, prim->numverts);
1414 return;
1415 }
1416
1417 do {
1418 /* Emit the next cliprect */
1419 if (i < nbox) {
1420 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1421 }
1422
1423 /* Emit the vertex buffer rendering commands */
1424 BEGIN_RING(5);
1425
1426 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1427 OUT_RING(offset);
1428 OUT_RING(numverts);
1429 OUT_RING(prim->vc_format);
1430 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1431 RADEON_COLOR_ORDER_RGBA |
1432 RADEON_VTX_FMT_RADEON_MODE |
1433 (numverts << RADEON_NUM_VERTICES_SHIFT));
1434
1435 ADVANCE_RING();
1436
1437 i++;
1438 } while (i < nbox);
1439 }
1440
1441 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1442 {
1443 drm_radeon_private_t *dev_priv = dev->dev_private;
1444 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1445 RING_LOCALS;
1446
1447 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1448
1449 /* Emit the vertex buffer age */
1450 BEGIN_RING(2);
1451 RADEON_DISPATCH_AGE(buf_priv->age);
1452 ADVANCE_RING();
1453
1454 buf->pending = 1;
1455 buf->used = 0;
1456 }
1457
1458 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1459 drm_buf_t * buf, int start, int end)
1460 {
1461 drm_radeon_private_t *dev_priv = dev->dev_private;
1462 RING_LOCALS;
1463 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1464
1465 if (start != end) {
1466 int offset = (dev_priv->gart_buffers_offset
1467 + buf->offset + start);
1468 int dwords = (end - start + 3) / sizeof(u32);
1469
1470 /* Indirect buffer data must be an even number of
1471 * dwords, so if we've been given an odd number we must
1472 * pad the data with a Type-2 CP packet.
1473 */
1474 if (dwords & 1) {
1475 u32 *data = (u32 *)
1476 ((char *)dev->agp_buffer_map->handle
1477 + buf->offset + start);
1478 data[dwords++] = RADEON_CP_PACKET2;
1479 }
1480
1481 /* Fire off the indirect buffer */
1482 BEGIN_RING(3);
1483
1484 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1485 OUT_RING(offset);
1486 OUT_RING(dwords);
1487
1488 ADVANCE_RING();
1489 }
1490 }
1491
1492 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1493 drm_buf_t * elt_buf,
1494 drm_radeon_tcl_prim_t * prim)
1495 {
1496 drm_radeon_private_t *dev_priv = dev->dev_private;
1497 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1498 int offset = dev_priv->gart_buffers_offset + prim->offset;
1499 u32 *data;
1500 int dwords;
1501 int i = 0;
1502 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1503 int count = (prim->finish - start) / sizeof(u16);
1504 int nbox = sarea_priv->nbox;
1505
1506 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1507 prim->prim,
1508 prim->vc_format,
1509 prim->start, prim->finish, prim->offset, prim->numverts);
1510
1511 if (bad_prim_vertex_nr(prim->prim, count)) {
1512 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1513 return;
1514 }
1515
1516 if (start >= prim->finish || (prim->start & 0x7)) {
1517 DRM_ERROR("buffer prim %d\n", prim->prim);
1518 return;
1519 }
1520
1521 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1522
1523 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1524 elt_buf->offset + prim->start);
1525
1526 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1527 data[1] = offset;
1528 data[2] = prim->numverts;
1529 data[3] = prim->vc_format;
1530 data[4] = (prim->prim |
1531 RADEON_PRIM_WALK_IND |
1532 RADEON_COLOR_ORDER_RGBA |
1533 RADEON_VTX_FMT_RADEON_MODE |
1534 (count << RADEON_NUM_VERTICES_SHIFT));
1535
1536 do {
1537 if (i < nbox)
1538 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1539
1540 radeon_cp_dispatch_indirect(dev, elt_buf,
1541 prim->start, prim->finish);
1542
1543 i++;
1544 } while (i < nbox);
1545
1546 }
1547
1548 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1549
1550 static int radeon_cp_dispatch_texture(DRMFILE filp,
1551 drm_device_t * dev,
1552 drm_radeon_texture_t * tex,
1553 drm_radeon_tex_image_t * image)
1554 {
1555 drm_radeon_private_t *dev_priv = dev->dev_private;
1556 drm_file_t *filp_priv;
1557 drm_buf_t *buf;
1558 u32 format;
1559 u32 *buffer;
1560 const u8 __user *data;
1561 int size, dwords, tex_width, blit_width, spitch;
1562 u32 height;
1563 int i;
1564 u32 texpitch, microtile;
1565 u32 offset;
1566 RING_LOCALS;
1567
1568 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1569
1570 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1571 DRM_ERROR("Invalid destination offset\n");
1572 return DRM_ERR(EINVAL);
1573 }
1574
1575 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1576
1577 /* Flush the pixel cache. This ensures no pixel data gets mixed
1578 * up with the texture data from the host data blit, otherwise
1579 * part of the texture image may be corrupted.
1580 */
1581 BEGIN_RING(4);
1582 RADEON_FLUSH_CACHE();
1583 RADEON_WAIT_UNTIL_IDLE();
1584 ADVANCE_RING();
1585
1586 /* The compiler won't optimize away a division by a variable,
1587 * even if the only legal values are powers of two. Thus, we'll
1588 * use a shift instead.
1589 */
1590 switch (tex->format) {
1591 case RADEON_TXFORMAT_ARGB8888:
1592 case RADEON_TXFORMAT_RGBA8888:
1593 format = RADEON_COLOR_FORMAT_ARGB8888;
1594 tex_width = tex->width * 4;
1595 blit_width = image->width * 4;
1596 break;
1597 case RADEON_TXFORMAT_AI88:
1598 case RADEON_TXFORMAT_ARGB1555:
1599 case RADEON_TXFORMAT_RGB565:
1600 case RADEON_TXFORMAT_ARGB4444:
1601 case RADEON_TXFORMAT_VYUY422:
1602 case RADEON_TXFORMAT_YVYU422:
1603 format = RADEON_COLOR_FORMAT_RGB565;
1604 tex_width = tex->width * 2;
1605 blit_width = image->width * 2;
1606 break;
1607 case RADEON_TXFORMAT_I8:
1608 case RADEON_TXFORMAT_RGB332:
1609 format = RADEON_COLOR_FORMAT_CI8;
1610 tex_width = tex->width * 1;
1611 blit_width = image->width * 1;
1612 break;
1613 default:
1614 DRM_ERROR("invalid texture format %d\n", tex->format);
1615 return DRM_ERR(EINVAL);
1616 }
1617 spitch = blit_width >> 6;
1618 if (spitch == 0 && image->height > 1)
1619 return DRM_ERR(EINVAL);
1620
1621 texpitch = tex->pitch;
1622 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1623 microtile = 1;
1624 if (tex_width < 64) {
1625 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1626 /* we got tiled coordinates, untile them */
1627 image->x *= 2;
1628 }
1629 } else
1630 microtile = 0;
1631
1632 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1633
1634 do {
1635 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1636 tex->offset >> 10, tex->pitch, tex->format,
1637 image->x, image->y, image->width, image->height);
1638
1639 /* Make a copy of some parameters in case we have to
1640 * update them for a multi-pass texture blit.
1641 */
1642 height = image->height;
1643 data = (const u8 __user *)image->data;
1644
1645 size = height * blit_width;
1646
1647 if (size > RADEON_MAX_TEXTURE_SIZE) {
1648 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1649 size = height * blit_width;
1650 } else if (size < 4 && size > 0) {
1651 size = 4;
1652 } else if (size == 0) {
1653 return 0;
1654 }
1655
1656 buf = radeon_freelist_get(dev);
1657 if (0 && !buf) {
1658 radeon_do_cp_idle(dev_priv);
1659 buf = radeon_freelist_get(dev);
1660 }
1661 if (!buf) {
1662 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1663 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1664 return DRM_ERR(EFAULT);
1665 return DRM_ERR(EAGAIN);
1666 }
1667
1668 /* Dispatch the indirect buffer.
1669 */
1670 buffer =
1671 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1672 dwords = size / 4;
1673
1674 #define RADEON_COPY_MT(_buf, _data, _width) \
1675 do { \
1676 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1677 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1678 return DRM_ERR(EFAULT); \
1679 } \
1680 } while(0)
1681
1682 if (microtile) {
1683 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1684 however, we cannot use blitter directly for texture width < 64 bytes,
1685 since minimum tex pitch is 64 bytes and we need this to match
1686 the texture width, otherwise the blitter will tile it wrong.
1687 Thus, tiling manually in this case. Additionally, need to special
1688 case tex height = 1, since our actual image will have height 2
1689 and we need to ensure we don't read beyond the texture size
1690 from user space. */
1691 if (tex->height == 1) {
1692 if (tex_width >= 64 || tex_width <= 16) {
1693 RADEON_COPY_MT(buffer, data,
1694 (int)(tex_width * sizeof(u32)));
1695 } else if (tex_width == 32) {
1696 RADEON_COPY_MT(buffer, data, 16);
1697 RADEON_COPY_MT(buffer + 8,
1698 data + 16, 16);
1699 }
1700 } else if (tex_width >= 64 || tex_width == 16) {
1701 RADEON_COPY_MT(buffer, data,
1702 (int)(dwords * sizeof(u32)));
1703 } else if (tex_width < 16) {
1704 for (i = 0; i < tex->height; i++) {
1705 RADEON_COPY_MT(buffer, data, tex_width);
1706 buffer += 4;
1707 data += tex_width;
1708 }
1709 } else if (tex_width == 32) {
1710 /* TODO: make sure this works when not fitting in one buffer
1711 (i.e. 32bytes x 2048...) */
1712 for (i = 0; i < tex->height; i += 2) {
1713 RADEON_COPY_MT(buffer, data, 16);
1714 data += 16;
1715 RADEON_COPY_MT(buffer + 8, data, 16);
1716 data += 16;
1717 RADEON_COPY_MT(buffer + 4, data, 16);
1718 data += 16;
1719 RADEON_COPY_MT(buffer + 12, data, 16);
1720 data += 16;
1721 buffer += 16;
1722 }
1723 }
1724 } else {
1725 if (tex_width >= 32) {
1726 /* Texture image width is larger than the minimum, so we
1727 * can upload it directly.
1728 */
1729 RADEON_COPY_MT(buffer, data,
1730 (int)(dwords * sizeof(u32)));
1731 } else {
1732 /* Texture image width is less than the minimum, so we
1733 * need to pad out each image scanline to the minimum
1734 * width.
1735 */
1736 for (i = 0; i < tex->height; i++) {
1737 RADEON_COPY_MT(buffer, data, tex_width);
1738 buffer += 8;
1739 data += tex_width;
1740 }
1741 }
1742 }
1743
1744 #undef RADEON_COPY_MT
1745 buf->filp = filp;
1746 buf->used = size;
1747 offset = dev_priv->gart_buffers_offset + buf->offset;
1748 BEGIN_RING(9);
1749 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1750 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1751 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1752 RADEON_GMC_BRUSH_NONE |
1753 (format << 8) |
1754 RADEON_GMC_SRC_DATATYPE_COLOR |
1755 RADEON_ROP3_S |
1756 RADEON_DP_SRC_SOURCE_MEMORY |
1757 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1758 OUT_RING((spitch << 22) | (offset >> 10));
1759 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1760 OUT_RING(0);
1761 OUT_RING((image->x << 16) | image->y);
1762 OUT_RING((image->width << 16) | height);
1763 RADEON_WAIT_UNTIL_2D_IDLE();
1764 ADVANCE_RING();
1765
1766 radeon_cp_discard_buffer(dev, buf);
1767
1768 /* Update the input parameters for next time */
1769 image->y += height;
1770 image->height -= height;
1771 image->data = (const u8 __user *)image->data + size;
1772 } while (image->height > 0);
1773
1774 /* Flush the pixel cache after the blit completes. This ensures
1775 * the texture data is written out to memory before rendering
1776 * continues.
1777 */
1778 BEGIN_RING(4);
1779 RADEON_FLUSH_CACHE();
1780 RADEON_WAIT_UNTIL_2D_IDLE();
1781 ADVANCE_RING();
1782 return 0;
1783 }
1784
1785 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1786 {
1787 drm_radeon_private_t *dev_priv = dev->dev_private;
1788 int i;
1789 RING_LOCALS;
1790 DRM_DEBUG("\n");
1791
1792 BEGIN_RING(35);
1793
1794 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1795 OUT_RING(0x00000000);
1796
1797 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1798 for (i = 0; i < 32; i++) {
1799 OUT_RING(stipple[i]);
1800 }
1801
1802 ADVANCE_RING();
1803 }
1804
1805 static void radeon_apply_surface_regs(int surf_index,
1806 drm_radeon_private_t *dev_priv)
1807 {
1808 if (!dev_priv->mmio)
1809 return;
1810
1811 radeon_do_cp_idle(dev_priv);
1812
1813 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1814 dev_priv->surfaces[surf_index].flags);
1815 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1816 dev_priv->surfaces[surf_index].lower);
1817 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1818 dev_priv->surfaces[surf_index].upper);
1819 }
1820
1821 /* Allocates a virtual surface
1822 * doesn't always allocate a real surface, will stretch an existing
1823 * surface when possible.
1824 *
1825 * Note that refcount can be at most 2, since during a free refcount=3
1826 * might mean we have to allocate a new surface which might not always
1827 * be available.
1828 * For example : we allocate three contigous surfaces ABC. If B is
1829 * freed, we suddenly need two surfaces to store A and C, which might
1830 * not always be available.
1831 */
1832 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1833 drm_radeon_private_t *dev_priv, DRMFILE filp)
1834 {
1835 struct radeon_virt_surface *s;
1836 int i;
1837 int virt_surface_index;
1838 uint32_t new_upper, new_lower;
1839
1840 new_lower = new->address;
1841 new_upper = new_lower + new->size - 1;
1842
1843 /* sanity check */
1844 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1845 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1846 RADEON_SURF_ADDRESS_FIXED_MASK)
1847 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1848 return -1;
1849
1850 /* make sure there is no overlap with existing surfaces */
1851 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1852 if ((dev_priv->surfaces[i].refcount != 0) &&
1853 (((new_lower >= dev_priv->surfaces[i].lower) &&
1854 (new_lower < dev_priv->surfaces[i].upper)) ||
1855 ((new_lower < dev_priv->surfaces[i].lower) &&
1856 (new_upper > dev_priv->surfaces[i].lower)))) {
1857 return -1;
1858 }
1859 }
1860
1861 /* find a virtual surface */
1862 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1863 if (dev_priv->virt_surfaces[i].filp == 0)
1864 break;
1865 if (i == 2 * RADEON_MAX_SURFACES) {
1866 return -1;
1867 }
1868 virt_surface_index = i;
1869
1870 /* try to reuse an existing surface */
1871 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1872 /* extend before */
1873 if ((dev_priv->surfaces[i].refcount == 1) &&
1874 (new->flags == dev_priv->surfaces[i].flags) &&
1875 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1876 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1877 s->surface_index = i;
1878 s->lower = new_lower;
1879 s->upper = new_upper;
1880 s->flags = new->flags;
1881 s->filp = filp;
1882 dev_priv->surfaces[i].refcount++;
1883 dev_priv->surfaces[i].lower = s->lower;
1884 radeon_apply_surface_regs(s->surface_index, dev_priv);
1885 return virt_surface_index;
1886 }
1887
1888 /* extend after */
1889 if ((dev_priv->surfaces[i].refcount == 1) &&
1890 (new->flags == dev_priv->surfaces[i].flags) &&
1891 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1892 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1893 s->surface_index = i;
1894 s->lower = new_lower;
1895 s->upper = new_upper;
1896 s->flags = new->flags;
1897 s->filp = filp;
1898 dev_priv->surfaces[i].refcount++;
1899 dev_priv->surfaces[i].upper = s->upper;
1900 radeon_apply_surface_regs(s->surface_index, dev_priv);
1901 return virt_surface_index;
1902 }
1903 }
1904
1905 /* okay, we need a new one */
1906 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1907 if (dev_priv->surfaces[i].refcount == 0) {
1908 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1909 s->surface_index = i;
1910 s->lower = new_lower;
1911 s->upper = new_upper;
1912 s->flags = new->flags;
1913 s->filp = filp;
1914 dev_priv->surfaces[i].refcount = 1;
1915 dev_priv->surfaces[i].lower = s->lower;
1916 dev_priv->surfaces[i].upper = s->upper;
1917 dev_priv->surfaces[i].flags = s->flags;
1918 radeon_apply_surface_regs(s->surface_index, dev_priv);
1919 return virt_surface_index;
1920 }
1921 }
1922
1923 /* we didn't find anything */
1924 return -1;
1925 }
1926
1927 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1928 int lower)
1929 {
1930 struct radeon_virt_surface *s;
1931 int i;
1932 /* find the virtual surface */
1933 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1934 s = &(dev_priv->virt_surfaces[i]);
1935 if (s->filp) {
1936 if ((lower == s->lower) && (filp == s->filp)) {
1937 if (dev_priv->surfaces[s->surface_index].
1938 lower == s->lower)
1939 dev_priv->surfaces[s->surface_index].
1940 lower = s->upper;
1941
1942 if (dev_priv->surfaces[s->surface_index].
1943 upper == s->upper)
1944 dev_priv->surfaces[s->surface_index].
1945 upper = s->lower;
1946
1947 dev_priv->surfaces[s->surface_index].refcount--;
1948 if (dev_priv->surfaces[s->surface_index].
1949 refcount == 0)
1950 dev_priv->surfaces[s->surface_index].
1951 flags = 0;
1952 s->filp = NULL;
1953 radeon_apply_surface_regs(s->surface_index,
1954 dev_priv);
1955 return 0;
1956 }
1957 }
1958 }
1959 return 1;
1960 }
1961
1962 static void radeon_surfaces_release(DRMFILE filp,
1963 drm_radeon_private_t * dev_priv)
1964 {
1965 int i;
1966 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1967 if (dev_priv->virt_surfaces[i].filp == filp)
1968 free_surface(filp, dev_priv,
1969 dev_priv->virt_surfaces[i].lower);
1970 }
1971 }
1972
1973 /* ================================================================
1974 * IOCTL functions
1975 */
1976 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1977 {
1978 DRM_DEVICE;
1979 drm_radeon_private_t *dev_priv = dev->dev_private;
1980 drm_radeon_surface_alloc_t alloc;
1981
1982 DRM_COPY_FROM_USER_IOCTL(alloc,
1983 (drm_radeon_surface_alloc_t __user *) data,
1984 sizeof(alloc));
1985
1986 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1987 return DRM_ERR(EINVAL);
1988 else
1989 return 0;
1990 }
1991
1992 static int radeon_surface_free(DRM_IOCTL_ARGS)
1993 {
1994 DRM_DEVICE;
1995 drm_radeon_private_t *dev_priv = dev->dev_private;
1996 drm_radeon_surface_free_t memfree;
1997
1998 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
1999 sizeof(memfree));
2000
2001 if (free_surface(filp, dev_priv, memfree.address))
2002 return DRM_ERR(EINVAL);
2003 else
2004 return 0;
2005 }
2006
2007 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2008 {
2009 DRM_DEVICE;
2010 drm_radeon_private_t *dev_priv = dev->dev_private;
2011 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2012 drm_radeon_clear_t clear;
2013 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2014 DRM_DEBUG("\n");
2015
2016 LOCK_TEST_WITH_RETURN(dev, filp);
2017
2018 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2019 sizeof(clear));
2020
2021 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2022
2023 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2024 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2025
2026 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2027 sarea_priv->nbox * sizeof(depth_boxes[0])))
2028 return DRM_ERR(EFAULT);
2029
2030 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2031
2032 COMMIT_RING();
2033 return 0;
2034 }
2035
2036 /* Not sure why this isn't set all the time:
2037 */
2038 static int radeon_do_init_pageflip(drm_device_t * dev)
2039 {
2040 drm_radeon_private_t *dev_priv = dev->dev_private;
2041 RING_LOCALS;
2042
2043 DRM_DEBUG("\n");
2044
2045 BEGIN_RING(6);
2046 RADEON_WAIT_UNTIL_3D_IDLE();
2047 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2048 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2049 RADEON_CRTC_OFFSET_FLIP_CNTL);
2050 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2051 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2052 RADEON_CRTC_OFFSET_FLIP_CNTL);
2053 ADVANCE_RING();
2054
2055 dev_priv->page_flipping = 1;
2056 dev_priv->current_page = 0;
2057 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2058
2059 return 0;
2060 }
2061
2062 /* Called whenever a client dies, from drm_release.
2063 * NOTE: Lock isn't necessarily held when this is called!
2064 */
2065 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2066 {
2067 drm_radeon_private_t *dev_priv = dev->dev_private;
2068 DRM_DEBUG("\n");
2069
2070 if (dev_priv->current_page != 0)
2071 radeon_cp_dispatch_flip(dev);
2072
2073 dev_priv->page_flipping = 0;
2074 return 0;
2075 }
2076
2077 /* Swapping and flipping are different operations, need different ioctls.
2078 * They can & should be intermixed to support multiple 3d windows.
2079 */
2080 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2081 {
2082 DRM_DEVICE;
2083 drm_radeon_private_t *dev_priv = dev->dev_private;
2084 DRM_DEBUG("\n");
2085
2086 LOCK_TEST_WITH_RETURN(dev, filp);
2087
2088 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2089
2090 if (!dev_priv->page_flipping)
2091 radeon_do_init_pageflip(dev);
2092
2093 radeon_cp_dispatch_flip(dev);
2094
2095 COMMIT_RING();
2096 return 0;
2097 }
2098
2099 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2100 {
2101 DRM_DEVICE;
2102 drm_radeon_private_t *dev_priv = dev->dev_private;
2103 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2104 DRM_DEBUG("\n");
2105
2106 LOCK_TEST_WITH_RETURN(dev, filp);
2107
2108 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2109
2110 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2111 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2112
2113 radeon_cp_dispatch_swap(dev);
2114 dev_priv->sarea_priv->ctx_owner = 0;
2115
2116 COMMIT_RING();
2117 return 0;
2118 }
2119
2120 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2121 {
2122 DRM_DEVICE;
2123 drm_radeon_private_t *dev_priv = dev->dev_private;
2124 drm_file_t *filp_priv;
2125 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2126 drm_device_dma_t *dma = dev->dma;
2127 drm_buf_t *buf;
2128 drm_radeon_vertex_t vertex;
2129 drm_radeon_tcl_prim_t prim;
2130
2131 LOCK_TEST_WITH_RETURN(dev, filp);
2132
2133 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2134
2135 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2136 sizeof(vertex));
2137
2138 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2139 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2140
2141 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2142 DRM_ERROR("buffer index %d (of %d max)\n",
2143 vertex.idx, dma->buf_count - 1);
2144 return DRM_ERR(EINVAL);
2145 }
2146 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2147 DRM_ERROR("buffer prim %d\n", vertex.prim);
2148 return DRM_ERR(EINVAL);
2149 }
2150
2151 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2152 VB_AGE_TEST_WITH_RETURN(dev_priv);
2153
2154 buf = dma->buflist[vertex.idx];
2155
2156 if (buf->filp != filp) {
2157 DRM_ERROR("process %d using buffer owned by %p\n",
2158 DRM_CURRENTPID, buf->filp);
2159 return DRM_ERR(EINVAL);
2160 }
2161 if (buf->pending) {
2162 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2163 return DRM_ERR(EINVAL);
2164 }
2165
2166 /* Build up a prim_t record:
2167 */
2168 if (vertex.count) {
2169 buf->used = vertex.count; /* not used? */
2170
2171 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2172 if (radeon_emit_state(dev_priv, filp_priv,
2173 &sarea_priv->context_state,
2174 sarea_priv->tex_state,
2175 sarea_priv->dirty)) {
2176 DRM_ERROR("radeon_emit_state failed\n");
2177 return DRM_ERR(EINVAL);
2178 }
2179
2180 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2181 RADEON_UPLOAD_TEX1IMAGES |
2182 RADEON_UPLOAD_TEX2IMAGES |
2183 RADEON_REQUIRE_QUIESCENCE);
2184 }
2185
2186 prim.start = 0;
2187 prim.finish = vertex.count; /* unused */
2188 prim.prim = vertex.prim;
2189 prim.numverts = vertex.count;
2190 prim.vc_format = dev_priv->sarea_priv->vc_format;
2191
2192 radeon_cp_dispatch_vertex(dev, buf, &prim);
2193 }
2194
2195 if (vertex.discard) {
2196 radeon_cp_discard_buffer(dev, buf);
2197 }
2198
2199 COMMIT_RING();
2200 return 0;
2201 }
2202
2203 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2204 {
2205 DRM_DEVICE;
2206 drm_radeon_private_t *dev_priv = dev->dev_private;
2207 drm_file_t *filp_priv;
2208 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2209 drm_device_dma_t *dma = dev->dma;
2210 drm_buf_t *buf;
2211 drm_radeon_indices_t elts;
2212 drm_radeon_tcl_prim_t prim;
2213 int count;
2214
2215 LOCK_TEST_WITH_RETURN(dev, filp);
2216
2217 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2218
2219 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2220 sizeof(elts));
2221
2222 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2223 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2224
2225 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2226 DRM_ERROR("buffer index %d (of %d max)\n",
2227 elts.idx, dma->buf_count - 1);
2228 return DRM_ERR(EINVAL);
2229 }
2230 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2231 DRM_ERROR("buffer prim %d\n", elts.prim);
2232 return DRM_ERR(EINVAL);
2233 }
2234
2235 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2236 VB_AGE_TEST_WITH_RETURN(dev_priv);
2237
2238 buf = dma->buflist[elts.idx];
2239
2240 if (buf->filp != filp) {
2241 DRM_ERROR("process %d using buffer owned by %p\n",
2242 DRM_CURRENTPID, buf->filp);
2243 return DRM_ERR(EINVAL);
2244 }
2245 if (buf->pending) {
2246 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2247 return DRM_ERR(EINVAL);
2248 }
2249
2250 count = (elts.end - elts.start) / sizeof(u16);
2251 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2252
2253 if (elts.start & 0x7) {
2254 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2255 return DRM_ERR(EINVAL);
2256 }
2257 if (elts.start < buf->used) {
2258 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2259 return DRM_ERR(EINVAL);
2260 }
2261
2262 buf->used = elts.end;
2263
2264 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2265 if (radeon_emit_state(dev_priv, filp_priv,
2266 &sarea_priv->context_state,
2267 sarea_priv->tex_state,
2268 sarea_priv->dirty)) {
2269 DRM_ERROR("radeon_emit_state failed\n");
2270 return DRM_ERR(EINVAL);
2271 }
2272
2273 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2274 RADEON_UPLOAD_TEX1IMAGES |
2275 RADEON_UPLOAD_TEX2IMAGES |
2276 RADEON_REQUIRE_QUIESCENCE);
2277 }
2278
2279 /* Build up a prim_t record:
2280 */
2281 prim.start = elts.start;
2282 prim.finish = elts.end;
2283 prim.prim = elts.prim;
2284 prim.offset = 0; /* offset from start of dma buffers */
2285 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2286 prim.vc_format = dev_priv->sarea_priv->vc_format;
2287
2288 radeon_cp_dispatch_indices(dev, buf, &prim);
2289 if (elts.discard) {
2290 radeon_cp_discard_buffer(dev, buf);
2291 }
2292
2293 COMMIT_RING();
2294 return 0;
2295 }
2296
2297 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2298 {
2299 DRM_DEVICE;
2300 drm_radeon_private_t *dev_priv = dev->dev_private;
2301 drm_radeon_texture_t tex;
2302 drm_radeon_tex_image_t image;
2303 int ret;
2304
2305 LOCK_TEST_WITH_RETURN(dev, filp);
2306
2307 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2308 sizeof(tex));
2309
2310 if (tex.image == NULL) {
2311 DRM_ERROR("null texture image!\n");
2312 return DRM_ERR(EINVAL);
2313 }
2314
2315 if (DRM_COPY_FROM_USER(&image,
2316 (drm_radeon_tex_image_t __user *) tex.image,
2317 sizeof(image)))
2318 return DRM_ERR(EFAULT);
2319
2320 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2321 VB_AGE_TEST_WITH_RETURN(dev_priv);
2322
2323 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2324
2325 COMMIT_RING();
2326 return ret;
2327 }
2328
2329 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2330 {
2331 DRM_DEVICE;
2332 drm_radeon_private_t *dev_priv = dev->dev_private;
2333 drm_radeon_stipple_t stipple;
2334 u32 mask[32];
2335
2336 LOCK_TEST_WITH_RETURN(dev, filp);
2337
2338 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2339 sizeof(stipple));
2340
2341 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2342 return DRM_ERR(EFAULT);
2343
2344 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2345
2346 radeon_cp_dispatch_stipple(dev, mask);
2347
2348 COMMIT_RING();
2349 return 0;
2350 }
2351
2352 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2353 {
2354 DRM_DEVICE;
2355 drm_radeon_private_t *dev_priv = dev->dev_private;
2356 drm_device_dma_t *dma = dev->dma;
2357 drm_buf_t *buf;
2358 drm_radeon_indirect_t indirect;
2359 RING_LOCALS;
2360
2361 LOCK_TEST_WITH_RETURN(dev, filp);
2362
2363 DRM_COPY_FROM_USER_IOCTL(indirect,
2364 (drm_radeon_indirect_t __user *) data,
2365 sizeof(indirect));
2366
2367 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2368 indirect.idx, indirect.start, indirect.end, indirect.discard);
2369
2370 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2371 DRM_ERROR("buffer index %d (of %d max)\n",
2372 indirect.idx, dma->buf_count - 1);
2373 return DRM_ERR(EINVAL);
2374 }
2375
2376 buf = dma->buflist[indirect.idx];
2377
2378 if (buf->filp != filp) {
2379 DRM_ERROR("process %d using buffer owned by %p\n",
2380 DRM_CURRENTPID, buf->filp);
2381 return DRM_ERR(EINVAL);
2382 }
2383 if (buf->pending) {
2384 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2385 return DRM_ERR(EINVAL);
2386 }
2387
2388 if (indirect.start < buf->used) {
2389 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2390 indirect.start, buf->used);
2391 return DRM_ERR(EINVAL);
2392 }
2393
2394 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2395 VB_AGE_TEST_WITH_RETURN(dev_priv);
2396
2397 buf->used = indirect.end;
2398
2399 /* Wait for the 3D stream to idle before the indirect buffer
2400 * containing 2D acceleration commands is processed.
2401 */
2402 BEGIN_RING(2);
2403
2404 RADEON_WAIT_UNTIL_3D_IDLE();
2405
2406 ADVANCE_RING();
2407
2408 /* Dispatch the indirect buffer full of commands from the
2409 * X server. This is insecure and is thus only available to
2410 * privileged clients.
2411 */
2412 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2413 if (indirect.discard) {
2414 radeon_cp_discard_buffer(dev, buf);
2415 }
2416
2417 COMMIT_RING();
2418 return 0;
2419 }
2420
2421 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2422 {
2423 DRM_DEVICE;
2424 drm_radeon_private_t *dev_priv = dev->dev_private;
2425 drm_file_t *filp_priv;
2426 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2427 drm_device_dma_t *dma = dev->dma;
2428 drm_buf_t *buf;
2429 drm_radeon_vertex2_t vertex;
2430 int i;
2431 unsigned char laststate;
2432
2433 LOCK_TEST_WITH_RETURN(dev, filp);
2434
2435 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2436
2437 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2438 sizeof(vertex));
2439
2440 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2441 DRM_CURRENTPID, vertex.idx, vertex.discard);
2442
2443 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2444 DRM_ERROR("buffer index %d (of %d max)\n",
2445 vertex.idx, dma->buf_count - 1);
2446 return DRM_ERR(EINVAL);
2447 }
2448
2449 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2450 VB_AGE_TEST_WITH_RETURN(dev_priv);
2451
2452 buf = dma->buflist[vertex.idx];
2453
2454 if (buf->filp != filp) {
2455 DRM_ERROR("process %d using buffer owned by %p\n",
2456 DRM_CURRENTPID, buf->filp);
2457 return DRM_ERR(EINVAL);
2458 }
2459
2460 if (buf->pending) {
2461 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2462 return DRM_ERR(EINVAL);
2463 }
2464
2465 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2466 return DRM_ERR(EINVAL);
2467
2468 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2469 drm_radeon_prim_t prim;
2470 drm_radeon_tcl_prim_t tclprim;
2471
2472 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2473 return DRM_ERR(EFAULT);
2474
2475 if (prim.stateidx != laststate) {
2476 drm_radeon_state_t state;
2477
2478 if (DRM_COPY_FROM_USER(&state,
2479 &vertex.state[prim.stateidx],
2480 sizeof(state)))
2481 return DRM_ERR(EFAULT);
2482
2483 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2484 DRM_ERROR("radeon_emit_state2 failed\n");
2485 return DRM_ERR(EINVAL);
2486 }
2487
2488 laststate = prim.stateidx;
2489 }
2490
2491 tclprim.start = prim.start;
2492 tclprim.finish = prim.finish;
2493 tclprim.prim = prim.prim;
2494 tclprim.vc_format = prim.vc_format;
2495
2496 if (prim.prim & RADEON_PRIM_WALK_IND) {
2497 tclprim.offset = prim.numverts * 64;
2498 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2499
2500 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2501 } else {
2502 tclprim.numverts = prim.numverts;
2503 tclprim.offset = 0; /* not used */
2504
2505 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2506 }
2507
2508 if (sarea_priv->nbox == 1)
2509 sarea_priv->nbox = 0;
2510 }
2511
2512 if (vertex.discard) {
2513 radeon_cp_discard_buffer(dev, buf);
2514 }
2515
2516 COMMIT_RING();
2517 return 0;
2518 }
2519
2520 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2521 drm_file_t * filp_priv,
2522 drm_radeon_cmd_header_t header,
2523 drm_radeon_kcmd_buffer_t *cmdbuf)
2524 {
2525 int id = (int)header.packet.packet_id;
2526 int sz, reg;
2527 int *data = (int *)cmdbuf->buf;
2528 RING_LOCALS;
2529
2530 if (id >= RADEON_MAX_STATE_PACKETS)
2531 return DRM_ERR(EINVAL);
2532
2533 sz = packet[id].len;
2534 reg = packet[id].start;
2535
2536 if (sz * sizeof(int) > cmdbuf->bufsz) {
2537 DRM_ERROR("Packet size provided larger than data provided\n");
2538 return DRM_ERR(EINVAL);
2539 }
2540
2541 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2542 DRM_ERROR("Packet verification failed\n");
2543 return DRM_ERR(EINVAL);
2544 }
2545
2546 BEGIN_RING(sz + 1);
2547 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2548 OUT_RING_TABLE(data, sz);
2549 ADVANCE_RING();
2550
2551 cmdbuf->buf += sz * sizeof(int);
2552 cmdbuf->bufsz -= sz * sizeof(int);
2553 return 0;
2554 }
2555
2556 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2557 drm_radeon_cmd_header_t header,
2558 drm_radeon_kcmd_buffer_t *cmdbuf)
2559 {
2560 int sz = header.scalars.count;
2561 int start = header.scalars.offset;
2562 int stride = header.scalars.stride;
2563 RING_LOCALS;
2564
2565 BEGIN_RING(3 + sz);
2566 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2567 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2568 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2569 OUT_RING_TABLE(cmdbuf->buf, sz);
2570 ADVANCE_RING();
2571 cmdbuf->buf += sz * sizeof(int);
2572 cmdbuf->bufsz -= sz * sizeof(int);
2573 return 0;
2574 }
2575
2576 /* God this is ugly
2577 */
2578 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2579 drm_radeon_cmd_header_t header,
2580 drm_radeon_kcmd_buffer_t *cmdbuf)
2581 {
2582 int sz = header.scalars.count;
2583 int start = ((unsigned int)header.scalars.offset) + 0x100;
2584 int stride = header.scalars.stride;
2585 RING_LOCALS;
2586
2587 BEGIN_RING(3 + sz);
2588 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2589 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2590 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2591 OUT_RING_TABLE(cmdbuf->buf, sz);
2592 ADVANCE_RING();
2593 cmdbuf->buf += sz * sizeof(int);
2594 cmdbuf->bufsz -= sz * sizeof(int);
2595 return 0;
2596 }
2597
2598 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2599 drm_radeon_cmd_header_t header,
2600 drm_radeon_kcmd_buffer_t *cmdbuf)
2601 {
2602 int sz = header.vectors.count;
2603 int start = header.vectors.offset;
2604 int stride = header.vectors.stride;
2605 RING_LOCALS;
2606
2607 BEGIN_RING(5 + sz);
2608 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2609 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2610 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2611 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2612 OUT_RING_TABLE(cmdbuf->buf, sz);
2613 ADVANCE_RING();
2614
2615 cmdbuf->buf += sz * sizeof(int);
2616 cmdbuf->bufsz -= sz * sizeof(int);
2617 return 0;
2618 }
2619
2620 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2621 drm_radeon_cmd_header_t header,
2622 drm_radeon_kcmd_buffer_t *cmdbuf)
2623 {
2624 int sz = header.veclinear.count * 4;
2625 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2626 RING_LOCALS;
2627
2628 if (!sz)
2629 return 0;
2630 if (sz * 4 > cmdbuf->bufsz)
2631 return DRM_ERR(EINVAL);
2632
2633 BEGIN_RING(5 + sz);
2634 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2635 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2636 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2637 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2638 OUT_RING_TABLE(cmdbuf->buf, sz);
2639 ADVANCE_RING();
2640
2641 cmdbuf->buf += sz * sizeof(int);
2642 cmdbuf->bufsz -= sz * sizeof(int);
2643 return 0;
2644 }
2645
2646 static int radeon_emit_packet3(drm_device_t * dev,
2647 drm_file_t * filp_priv,
2648 drm_radeon_kcmd_buffer_t *cmdbuf)
2649 {
2650 drm_radeon_private_t *dev_priv = dev->dev_private;
2651 unsigned int cmdsz;
2652 int ret;
2653 RING_LOCALS;
2654
2655 DRM_DEBUG("\n");
2656
2657 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2658 cmdbuf, &cmdsz))) {
2659 DRM_ERROR("Packet verification failed\n");
2660 return ret;
2661 }
2662
2663 BEGIN_RING(cmdsz);
2664 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2665 ADVANCE_RING();
2666
2667 cmdbuf->buf += cmdsz * 4;
2668 cmdbuf->bufsz -= cmdsz * 4;
2669 return 0;
2670 }
2671
2672 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2673 drm_file_t *filp_priv,
2674 drm_radeon_kcmd_buffer_t *cmdbuf,
2675 int orig_nbox)
2676 {
2677 drm_radeon_private_t *dev_priv = dev->dev_private;
2678 drm_clip_rect_t box;
2679 unsigned int cmdsz;
2680 int ret;
2681 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2682 int i = 0;
2683 RING_LOCALS;
2684
2685 DRM_DEBUG("\n");
2686
2687 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2688 cmdbuf, &cmdsz))) {
2689 DRM_ERROR("Packet verification failed\n");
2690 return ret;
2691 }
2692
2693 if (!orig_nbox)
2694 goto out;
2695
2696 do {
2697 if (i < cmdbuf->nbox) {
2698 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2699 return DRM_ERR(EFAULT);
2700 /* FIXME The second and subsequent times round
2701 * this loop, send a WAIT_UNTIL_3D_IDLE before
2702 * calling emit_clip_rect(). This fixes a
2703 * lockup on fast machines when sending
2704 * several cliprects with a cmdbuf, as when
2705 * waving a 2D window over a 3D
2706 * window. Something in the commands from user
2707 * space seems to hang the card when they're
2708 * sent several times in a row. That would be
2709 * the correct place to fix it but this works
2710 * around it until I can figure that out - Tim
2711 * Smith */
2712 if (i) {
2713 BEGIN_RING(2);
2714 RADEON_WAIT_UNTIL_3D_IDLE();
2715 ADVANCE_RING();
2716 }
2717 radeon_emit_clip_rect(dev_priv, &box);
2718 }
2719
2720 BEGIN_RING(cmdsz);
2721 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2722 ADVANCE_RING();
2723
2724 } while (++i < cmdbuf->nbox);
2725 if (cmdbuf->nbox == 1)
2726 cmdbuf->nbox = 0;
2727
2728 out:
2729 cmdbuf->buf += cmdsz * 4;
2730 cmdbuf->bufsz -= cmdsz * 4;
2731 return 0;
2732 }
2733
2734 static int radeon_emit_wait(drm_device_t * dev, int flags)
2735 {
2736 drm_radeon_private_t *dev_priv = dev->dev_private;
2737 RING_LOCALS;
2738
2739 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2740 switch (flags) {
2741 case RADEON_WAIT_2D:
2742 BEGIN_RING(2);
2743 RADEON_WAIT_UNTIL_2D_IDLE();
2744 ADVANCE_RING();
2745 break;
2746 case RADEON_WAIT_3D:
2747 BEGIN_RING(2);
2748 RADEON_WAIT_UNTIL_3D_IDLE();
2749 ADVANCE_RING();
2750 break;
2751 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2752 BEGIN_RING(2);
2753 RADEON_WAIT_UNTIL_IDLE();
2754 ADVANCE_RING();
2755 break;
2756 default:
2757 return DRM_ERR(EINVAL);
2758 }
2759
2760 return 0;
2761 }
2762
2763 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2764 {
2765 DRM_DEVICE;
2766 drm_radeon_private_t *dev_priv = dev->dev_private;
2767 drm_file_t *filp_priv;
2768 drm_device_dma_t *dma = dev->dma;
2769 drm_buf_t *buf = NULL;
2770 int idx;
2771 drm_radeon_kcmd_buffer_t cmdbuf;
2772 drm_radeon_cmd_header_t header;
2773 int orig_nbox, orig_bufsz;
2774 char *kbuf = NULL;
2775
2776 LOCK_TEST_WITH_RETURN(dev, filp);
2777
2778 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2779
2780 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2781 (drm_radeon_cmd_buffer_t __user *) data,
2782 sizeof(cmdbuf));
2783
2784 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2785 VB_AGE_TEST_WITH_RETURN(dev_priv);
2786
2787 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2788 return DRM_ERR(EINVAL);
2789 }
2790
2791 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2792 * races between checking values and using those values in other code,
2793 * and simply to avoid a lot of function calls to copy in data.
2794 */
2795 orig_bufsz = cmdbuf.bufsz;
2796 if (orig_bufsz != 0) {
2797 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2798 if (kbuf == NULL)
2799 return DRM_ERR(ENOMEM);
2800 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2801 cmdbuf.bufsz)) {
2802 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2803 return DRM_ERR(EFAULT);
2804 }
2805 cmdbuf.buf = kbuf;
2806 }
2807
2808 orig_nbox = cmdbuf.nbox;
2809
2810 if (dev_priv->microcode_version == UCODE_R300) {
2811 int temp;
2812 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2813
2814 if (orig_bufsz != 0)
2815 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2816
2817 return temp;
2818 }
2819
2820 /* microcode_version != r300 */
2821 while (cmdbuf.bufsz >= sizeof(header)) {
2822
2823 header.i = *(int *)cmdbuf.buf;
2824 cmdbuf.buf += sizeof(header);
2825 cmdbuf.bufsz -= sizeof(header);
2826
2827 switch (header.header.cmd_type) {
2828 case RADEON_CMD_PACKET:
2829 DRM_DEBUG("RADEON_CMD_PACKET\n");
2830 if (radeon_emit_packets
2831 (dev_priv, filp_priv, header, &cmdbuf)) {
2832 DRM_ERROR("radeon_emit_packets failed\n");
2833 goto err;
2834 }
2835 break;
2836
2837 case RADEON_CMD_SCALARS:
2838 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2839 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2840 DRM_ERROR("radeon_emit_scalars failed\n");
2841 goto err;
2842 }
2843 break;
2844
2845 case RADEON_CMD_VECTORS:
2846 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2847 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2848 DRM_ERROR("radeon_emit_vectors failed\n");
2849 goto err;
2850 }
2851 break;
2852
2853 case RADEON_CMD_DMA_DISCARD:
2854 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2855 idx = header.dma.buf_idx;
2856 if (idx < 0 || idx >= dma->buf_count) {
2857 DRM_ERROR("buffer index %d (of %d max)\n",
2858 idx, dma->buf_count - 1);
2859 goto err;
2860 }
2861
2862 buf = dma->buflist[idx];
2863 if (buf->filp != filp || buf->pending) {
2864 DRM_ERROR("bad buffer %p %p %d\n",
2865 buf->filp, filp, buf->pending);
2866 goto err;
2867 }
2868
2869 radeon_cp_discard_buffer(dev, buf);
2870 break;
2871
2872 case RADEON_CMD_PACKET3:
2873 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2874 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2875 DRM_ERROR("radeon_emit_packet3 failed\n");
2876 goto err;
2877 }
2878 break;
2879
2880 case RADEON_CMD_PACKET3_CLIP:
2881 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2882 if (radeon_emit_packet3_cliprect
2883 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2884 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2885 goto err;
2886 }
2887 break;
2888
2889 case RADEON_CMD_SCALARS2:
2890 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2891 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2892 DRM_ERROR("radeon_emit_scalars2 failed\n");
2893 goto err;
2894 }
2895 break;
2896
2897 case RADEON_CMD_WAIT:
2898 DRM_DEBUG("RADEON_CMD_WAIT\n");
2899 if (radeon_emit_wait(dev, header.wait.flags)) {
2900 DRM_ERROR("radeon_emit_wait failed\n");
2901 goto err;
2902 }
2903 break;
2904 case RADEON_CMD_VECLINEAR:
2905 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2906 if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2907 DRM_ERROR("radeon_emit_veclinear failed\n");
2908 goto err;
2909 }
2910 break;
2911
2912 default:
2913 DRM_ERROR("bad cmd_type %d at %p\n",
2914 header.header.cmd_type,
2915 cmdbuf.buf - sizeof(header));
2916 goto err;
2917 }
2918 }
2919
2920 if (orig_bufsz != 0)
2921 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2922
2923 DRM_DEBUG("DONE\n");
2924 COMMIT_RING();
2925 return 0;
2926
2927 err:
2928 if (orig_bufsz != 0)
2929 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2930 return DRM_ERR(EINVAL);
2931 }
2932
2933 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2934 {
2935 DRM_DEVICE;
2936 drm_radeon_private_t *dev_priv = dev->dev_private;
2937 drm_radeon_getparam_t param;
2938 int value;
2939
2940 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2941 sizeof(param));
2942
2943 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2944
2945 switch (param.param) {
2946 case RADEON_PARAM_GART_BUFFER_OFFSET:
2947 value = dev_priv->gart_buffers_offset;
2948 break;
2949 case RADEON_PARAM_LAST_FRAME:
2950 dev_priv->stats.last_frame_reads++;
2951 value = GET_SCRATCH(0);
2952 break;
2953 case RADEON_PARAM_LAST_DISPATCH:
2954 value = GET_SCRATCH(1);
2955 break;
2956 case RADEON_PARAM_LAST_CLEAR:
2957 dev_priv->stats.last_clear_reads++;
2958 value = GET_SCRATCH(2);
2959 break;
2960 case RADEON_PARAM_IRQ_NR:
2961 value = dev->irq;
2962 break;
2963 case RADEON_PARAM_GART_BASE:
2964 value = dev_priv->gart_vm_start;
2965 break;
2966 case RADEON_PARAM_REGISTER_HANDLE:
2967 value = dev_priv->mmio->offset;
2968 break;
2969 case RADEON_PARAM_STATUS_HANDLE:
2970 value = dev_priv->ring_rptr_offset;
2971 break;
2972 #if BITS_PER_LONG == 32
2973 /*
2974 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2975 * pointer which can't fit into an int-sized variable. According to
2976 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2977 * not supporting it shouldn't be a problem. If the same functionality
2978 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2979 * so backwards-compatibility for the embedded platforms can be
2980 * maintained. --davidm 4-Feb-2004.
2981 */
2982 case RADEON_PARAM_SAREA_HANDLE:
2983 /* The lock is the first dword in the sarea. */
2984 value = (long)dev->lock.hw_lock;
2985 break;
2986 #endif
2987 case RADEON_PARAM_GART_TEX_HANDLE:
2988 value = dev_priv->gart_textures_offset;
2989 break;
2990 case RADEON_PARAM_SCRATCH_OFFSET:
2991 if (!dev_priv->writeback_works)
2992 return DRM_ERR(EINVAL);
2993 value = RADEON_SCRATCH_REG_OFFSET;
2994 break;
2995 case RADEON_PARAM_CARD_TYPE:
2996 if (dev_priv->flags & CHIP_IS_PCIE)
2997 value = RADEON_CARD_PCIE;
2998 else if (dev_priv->flags & CHIP_IS_AGP)
2999 value = RADEON_CARD_AGP;
3000 else
3001 value = RADEON_CARD_PCI;
3002 break;
3003 default:
3004 DRM_DEBUG("Invalid parameter %d\n", param.param);
3005 return DRM_ERR(EINVAL);
3006 }
3007
3008 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3009 DRM_ERROR("copy_to_user\n");
3010 return DRM_ERR(EFAULT);
3011 }
3012
3013 return 0;
3014 }
3015
3016 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3017 {
3018 DRM_DEVICE;
3019 drm_radeon_private_t *dev_priv = dev->dev_private;
3020 drm_file_t *filp_priv;
3021 drm_radeon_setparam_t sp;
3022 struct drm_radeon_driver_file_fields *radeon_priv;
3023
3024 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3025
3026 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3027 sizeof(sp));
3028
3029 switch (sp.param) {
3030 case RADEON_SETPARAM_FB_LOCATION:
3031 radeon_priv = filp_priv->driver_priv;
3032 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3033 break;
3034 case RADEON_SETPARAM_SWITCH_TILING:
3035 if (sp.value == 0) {
3036 DRM_DEBUG("color tiling disabled\n");
3037 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3038 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3039 dev_priv->sarea_priv->tiling_enabled = 0;
3040 } else if (sp.value == 1) {
3041 DRM_DEBUG("color tiling enabled\n");
3042 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3043 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3044 dev_priv->sarea_priv->tiling_enabled = 1;
3045 }
3046 break;
3047 case RADEON_SETPARAM_PCIGART_LOCATION:
3048 dev_priv->pcigart_offset = sp.value;
3049 break;
3050 case RADEON_SETPARAM_NEW_MEMMAP:
3051 dev_priv->new_memmap = sp.value;
3052 break;
3053 default:
3054 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3055 return DRM_ERR(EINVAL);
3056 }
3057
3058 return 0;
3059 }
3060
3061 /* When a client dies:
3062 * - Check for and clean up flipped page state
3063 * - Free any alloced GART memory.
3064 * - Free any alloced radeon surfaces.
3065 *
3066 * DRM infrastructure takes care of reclaiming dma buffers.
3067 */
3068 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3069 {
3070 if (dev->dev_private) {
3071 drm_radeon_private_t *dev_priv = dev->dev_private;
3072 if (dev_priv->page_flipping) {
3073 radeon_do_cleanup_pageflip(dev);
3074 }
3075 radeon_mem_release(filp, dev_priv->gart_heap);
3076 radeon_mem_release(filp, dev_priv->fb_heap);
3077 radeon_surfaces_release(filp, dev_priv);
3078 }
3079 }
3080
3081 void radeon_driver_lastclose(drm_device_t * dev)
3082 {
3083 radeon_do_release(dev);
3084 }
3085
3086 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3087 {
3088 drm_radeon_private_t *dev_priv = dev->dev_private;
3089 struct drm_radeon_driver_file_fields *radeon_priv;
3090
3091 DRM_DEBUG("\n");
3092 radeon_priv =
3093 (struct drm_radeon_driver_file_fields *)
3094 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3095
3096 if (!radeon_priv)
3097 return -ENOMEM;
3098
3099 filp_priv->driver_priv = radeon_priv;
3100
3101 if (dev_priv)
3102 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3103 else
3104 radeon_priv->radeon_fb_delta = 0;
3105 return 0;
3106 }
3107
3108 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3109 {
3110 struct drm_radeon_driver_file_fields *radeon_priv =
3111 filp_priv->driver_priv;
3112
3113 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3114 }
3115
3116 drm_ioctl_desc_t radeon_ioctls[] = {
3117 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3118 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3119 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3121 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3122 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3123 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3124 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3125 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3126 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3127 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3128 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3129 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3130 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3131 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3132 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3133 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3134 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3135 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3136 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3137 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3138 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3139 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3140 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3141 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3142 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3143 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3144 };
3145
3146 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
This page took 0.224377 seconds and 4 git commands to generate.