由于對(duì)于dxva2解碼得到的數(shù)據(jù)不宜copy回內(nèi)存給CPU處理,所以最好的辦法是在GPU上直接進(jìn)行處理。D3D的像素著色器能夠?qū)ο袼刂苯舆M(jìn)行操作,實(shí)現(xiàn)點(diǎn)運(yùn)算極其簡(jiǎn)單方便,簡(jiǎn)單的卷積運(yùn)算效果也非常好。但D3D9的限制也很多,對(duì)于過(guò)于復(fù)雜的圖像處理則顯得有些不能勝任。
1.點(diǎn)運(yùn)算
點(diǎn)運(yùn)算用HLSL非常容易實(shí)現(xiàn),幾乎是公式怎么寫(xiě),代碼就怎么寫(xiě)。以RGB轉(zhuǎn)灰度圖顯示為例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
texture Tex0 ; int iFlag = 0 ; float aValue= 0.0 ; float bValue= 0.0 ; sampler2D YTex = sampler_state { Texture = <Tex0> ; MipFilter = LINEAR ; MinFilter = LINEAR ; MagFilter = LINEAR ; AddressU = CLAMP ; AddressV = CLAMP ; }; struct PS_INPUT { float2 uvCoords0 : TEXCOORD0 ; }; float4 Main( PS_INPUT input ) : COLOR0 { float4 yuvColor ; //rgb to gray 不知道是不是這么顯示的,姑且這么認(rèn)為 float gray = tex2D( YTex, input.uvCoords0 ).r * 0.299 + tex2D( YTex, input.uvCoords0 ).g * 0.587 + tex2D( YTex, input.uvCoords0 ).b * 0.114 ; float s = 0 ; if (iFlag == 0) { s = aValue * gray + bValue/255 ; } else if (iFlag == 1) { s = aValue * log (1+gray) ; } else if (iFlag == 2) { s = aValue * pow ( abs (gray),bValue) ; } yuvColor.r = s ; yuvColor.g = s ; yuvColor.b = s ; yuvColor.a = 1.0 ; return yuvColor ; } |
點(diǎn)運(yùn)算如此簡(jiǎn)單是因?yàn)镚PU是并行運(yùn)算的,我個(gè)人認(rèn)為可以看成是每一個(gè)像素點(diǎn)(BGRA)對(duì)應(yīng)一個(gè)線程,這大概就是OpenCL中所謂的數(shù)據(jù)并行。這是一個(gè)非常簡(jiǎn)單的程序,指令數(shù)少,程序結(jié)構(gòu)也很簡(jiǎn)單,shader 的版本用2.0就可以輕松編過(guò)。
2.卷積運(yùn)算舉例
指令數(shù)較多的情況2.0版本的shader就搞不定了,上3.0版本可以做一些簡(jiǎn)單的卷積運(yùn)算。以中值濾波為例:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
|
texture Tex0 ; matrix WorldMatrix; matrix ViewMatrix; matrix ProjMatrix; sampler2D YTex = sampler_state { Texture = <Tex0> ; MipFilter = LINEAR ; MinFilter = LINEAR ; MagFilter = LINEAR ; AddressU = CLAMP ; AddressV = CLAMP ; }; struct VS_INPUT { float4 pos : POSITION; float4 color : COLOR0; float2 tex : TEXCOORD0; }; // struct VS_OUTPUT { float4 pos : POSITION; float4 color : COLOR0; float2 tex : TEXCOORD0; }; float2 g_v4ScreenSize; int ksize = 1 ; float fLeft = -1.0f ; float fTop = -1.0f ; float fRight = -1.0f ; float fBottom = -1.0f ; //--------------------------------- BurTechnique -------------------------------------- VS_OUTPUT MainVS_Screen( VS_INPUT In ) { VS_OUTPUT Out = ( VS_OUTPUT )0; float4x4 matWorldView = mul(WorldMatrix,ViewMatrix); float4x4 matProject = mul(matWorldView,ProjMatrix); Out.pos = mul(In.pos,matProject); Out.tex = In.tex; Out.color = In.color; return Out; } float4 MainPS_Screen( VS_INPUT In ) : COLOR0 { float4 outColor = tex2D( YTex, In.tex ).rgba ; if (ksize <= 1 || ksize%2 == 0) { return outColor ; } if ( ksize > 11 || ksize < 3) { return outColor ; } if (!(In.tex.x < fRight && In.tex.y < fBottom && In.tex.x > fLeft && In.tex.y > fTop)) { return outColor ; } // 紋理大小 float2 TexSize = float2( g_v4ScreenSize.x , g_v4ScreenSize.y ); float x_off = 1.0f / TexSize.x; float y_off = 1.0f / TexSize.y; float2 fX0Y0 = In.tex - float2(x_off * ksize/2, y_off*ksize/2) ; float3 sum = {0.0f, 0.0f, 0.0f} ; if (ksize >= 3) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*2)).rgb; } if (ksize >= 5) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*4)).rgb; } if (ksize >= 7) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*6)).rgb; } if (ksize >= 9) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*8)).rgb; } if (ksize >= 11) { sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 9, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*0)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*1)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*2)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*3)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*4)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*5)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*6)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*7)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*8)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 10, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*9)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 0, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 1, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 2, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 3, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 4, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 5, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 6, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 7, y_off*10)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * 8, y_off*10)).rgb; } outColor = float4(sum/(ksize*ksize),1.0f); return outColor ; } //--------------------------- 技術(shù)--------------------------- technique BurTechnique { pass P0 { LightEnable[0] = false ; VertexShader = compile vs_3_0 MainVS_Screen(); PixelShader = compile ps_3_0 MainPS_Screen(); } } |
由于3.0版本的shader似乎不允許pixel shader單獨(dú)出現(xiàn),所以我從點(diǎn)運(yùn)算用像素著色器實(shí)現(xiàn)改為用特效來(lái)實(shí)現(xiàn)。HLSL語(yǔ)法中有if語(yǔ)句,也有for語(yǔ)句,可是這個(gè)程序卻不厭其煩的把所有的都給列出來(lái)來(lái),而沒(méi)有使用for循環(huán)。這是因?yàn)樵趯?shí)際使用中發(fā)現(xiàn)有一些限制,比如if語(yǔ)句的if(A>B),A與B中必須有一個(gè)是常量,就像上面見(jiàn)到的那種形式;for循環(huán)中間的判斷也是如此,只是在第二層j循環(huán)中可以是第一層循環(huán)的i,即不可以
1
2
3
4
5
6
7
|
for ( int i=0;i<ksize;i++) { for ( int j=0;j<ksize1;j++) { .......... } } |
以上代碼的ksize與ksize1都必須為常數(shù),例外的情況是ksize1可以為第一層循環(huán)的 i 。這個(gè)問(wèn)題不知道后續(xù)版本的shader有沒(méi)有,反正我當(dāng)前使用的版本有。
另外有一個(gè)需要注意的地方是指令數(shù),2.0版本的shader支持的指令數(shù)相當(dāng)少,3.0版本則要多好多,我最長(zhǎng)寫(xiě)到了400多條快500條時(shí)才導(dǎo)致編譯失敗。 還有一個(gè)需要提醒的是3.0版本的shader只支持D3D 9.0C以后的。如果要求做更為復(fù)雜的圖像處理,可以的話建議上D3D11,compute shader雖然我沒(méi)用過(guò),但從介紹來(lái)說(shuō),應(yīng)該可以處理一些更為復(fù)雜的圖像處理。
以上就是本文的全部?jī)?nèi)容,希望本文的內(nèi)容對(duì)大家的學(xué)習(xí)或者工作能帶來(lái)一定的幫助,同時(shí)也希望多多支持服務(wù)器之家!
原文鏈接:http://www.cnblogs.com/betterwgo/p/6403522.html