また FFmpeg の make が通らない
MMX を有効化すると libavcodec/x86/dsputil_mmx.c をコンパイルできない。
gcc のエラーメッセージはどばーっと大量に出るんだけど、根本的な原因はたぶんこれ。
In file included from libavcodec/x86/dsputil_mmx.c:31: libavcodec/x86/mmx.h:24:2: warning: #warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected. libavcodec/x86/h264dsp_mmx.c: In function 'h264_h_loop_filter_luma_mmx2': libavcodec/x86/dsputil_mmx.c:681: error: can't find a register in class 'GENERAL_REGS' while reloading 'asm' libavcodec/x86/dsputil_mmx.c:681: error: can't find a register in class 'GENERAL_REGS' while reloading 'asm' libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/h264dsp_mmx.c:633: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/h264dsp_mmx.c:633: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
そもそもこのエラーメッセージの意味がわかっていなかったり。一体どうすればいいんだろう。
該当箇所のソースはこんなかんじ。処理はすべてインラインアセンブラで書かれている。
libavcodec/x86/dsputil_mmx.c
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... "movd %4, %%mm0 \n\t" "movd %5, %%mm1 \n\t" "movd %6, %%mm2 \n\t" "movd %7, %%mm3 \n\t" "punpcklbw %%mm1, %%mm0 \n\t" "punpcklbw %%mm3, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklwd %%mm2, %%mm0 \n\t" "punpckhwd %%mm2, %%mm1 \n\t" "movd %%mm0, %0 \n\t" "punpckhdq %%mm0, %%mm0 \n\t" "movd %%mm0, %1 \n\t" "movd %%mm1, %2 \n\t" "punpckhdq %%mm1, %%mm1 \n\t" "movd %%mm1, %3 \n\t" : "=m" (*(uint32_t*)(dst + 0*dst_stride)), "=m" (*(uint32_t*)(dst + 1*dst_stride)), "=m" (*(uint32_t*)(dst + 2*dst_stride)), "=m" (*(uint32_t*)(dst + 3*dst_stride)) : "m" (*(uint32_t*)(src + 0*src_stride)), "m" (*(uint32_t*)(src + 1*src_stride)), "m" (*(uint32_t*)(src + 2*src_stride)), "m" (*(uint32_t*)(src + 3*src_stride)) ); }
どこも悪くない気がするんだけど。。。
環境
gcc-4.3.2
git log
commit 7ec2a3e3b1e44ae6e56c787acbeeb92138d66c88 Author: stefanoDate: Sat Mar 14 10:18:18 2009 +0000 Add an example showing how to create a video from many images. git-svn-id: file:///var/local/repositories/ffmpeg/trunk@17955 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b
追記
こう直したら通った。これが正しい治療法なのかどうかは不明。
--- libswscale/rgb2rgb_template.c.orig 2009-03-14 23:53:56.000000000 +0900 +++ libswscale/rgb2rgb_template.c 2009-03-15 00:08:47.000000000 +0900 @@ -146,11 +146,11 @@ while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm5 \n\t" + PREFETCH" 32%0 \n\t" + "movq %0, %%mm0 \n\t" + "movq 8%0, %%mm1 \n\t" + "movq 16%0, %%mm4 \n\t" + "movq 24%0, %%mm5 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm4, %%mm6 \n\t" @@ -159,14 +159,14 @@ "psrlq $8, %%mm3 \n\t" "psrlq $8, %%mm6 \n\t" "psrlq $8, %%mm7 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm1 \n\t" - "pand %2, %%mm4 \n\t" - "pand %2, %%mm5 \n\t" - "pand %3, %%mm2 \n\t" - "pand %3, %%mm3 \n\t" - "pand %3, %%mm6 \n\t" - "pand %3, %%mm7 \n\t" + "pand %1, %%mm0 \n\t" + "pand %1, %%mm1 \n\t" + "pand %1, %%mm4 \n\t" + "pand %1, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm3 \n\t" + "pand %2, %%mm6 \n\t" + "pand %2, %%mm7 \n\t" "por %%mm2, %%mm0 \n\t" "por %%mm3, %%mm1 \n\t" "por %%mm6, %%mm4 \n\t" @@ -176,22 +176,25 @@ "movq %%mm4, %%mm3 \n\t" "psllq $48, %%mm2 \n\t" "psllq $32, %%mm3 \n\t" - "pand %4, %%mm2 \n\t" - "pand %5, %%mm3 \n\t" + "pand %3, %%mm2 \n\t" + "pand %4, %%mm3 \n\t" "por %%mm2, %%mm0 \n\t" "psrlq $16, %%mm1 \n\t" "psrlq $32, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm3, %%mm1 \n\t" - "pand %6, %%mm5 \n\t" + "pand %5, %%mm5 \n\t" "por %%mm5, %%mm4 \n\t" - + : + :"m"(*s),"m"(mask24l), + "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) + ); + __asm__ volatile( MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm1, 8%0 \n\t" MOVNTQ" %%mm4, 16%0" :"=m"(*dest) - :"m"(*s),"m"(mask24l), - "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) + : :"memory"); dest += 24; s += 32; @@ -1011,14 +1014,14 @@ "psrlq $8, %%mm3 \n\t" "psrlq $8, %%mm6 \n\t" "psrlq $8, %%mm7 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm1 \n\t" - "pand %2, %%mm4 \n\t" - "pand %2, %%mm5 \n\t" - "pand %3, %%mm2 \n\t" - "pand %3, %%mm3 \n\t" - "pand %3, %%mm6 \n\t" - "pand %3, %%mm7 \n\t" + "pand %1, %%mm0 \n\t" + "pand %1, %%mm1 \n\t" + "pand %1, %%mm4 \n\t" + "pand %1, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm3 \n\t" + "pand %2, %%mm6 \n\t" + "pand %2, %%mm7 \n\t" "por %%mm2, %%mm0 \n\t" "por %%mm3, %%mm1 \n\t" "por %%mm6, %%mm4 \n\t" @@ -1028,22 +1031,20 @@ "movq %%mm4, %%mm3 \n\t" "psllq $48, %%mm2 \n\t" "psllq $32, %%mm3 \n\t" - "pand %4, %%mm2 \n\t" - "pand %5, %%mm3 \n\t" + "pand %3, %%mm2 \n\t" + "pand %4, %%mm3 \n\t" "por %%mm2, %%mm0 \n\t" "psrlq $16, %%mm1 \n\t" "psrlq $32, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm3, %%mm1 \n\t" - "pand %6, %%mm5 \n\t" + "pand %5, %%mm5 \n\t" "por %%mm5, %%mm4 \n\t" - MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm1, 8%0 \n\t" MOVNTQ" %%mm4, 16%0" - :"=m"(*d) - :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) + :"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) :"memory"); d += 24; s += 8; @@ -1152,14 +1153,14 @@ "psrlq $8, %%mm3 \n\t" "psrlq $8, %%mm6 \n\t" "psrlq $8, %%mm7 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm1 \n\t" - "pand %2, %%mm4 \n\t" - "pand %2, %%mm5 \n\t" - "pand %3, %%mm2 \n\t" - "pand %3, %%mm3 \n\t" - "pand %3, %%mm6 \n\t" - "pand %3, %%mm7 \n\t" + "pand %1, %%mm0 \n\t" + "pand %1, %%mm1 \n\t" + "pand %1, %%mm4 \n\t" + "pand %1, %%mm5 \n\t" + "pand %2, %%mm2 \n\t" + "pand %2, %%mm3 \n\t" + "pand %2, %%mm6 \n\t" + "pand %2, %%mm7 \n\t" "por %%mm2, %%mm0 \n\t" "por %%mm3, %%mm1 \n\t" "por %%mm6, %%mm4 \n\t" @@ -1169,14 +1170,14 @@ "movq %%mm4, %%mm3 \n\t" "psllq $48, %%mm2 \n\t" "psllq $32, %%mm3 \n\t" - "pand %4, %%mm2 \n\t" - "pand %5, %%mm3 \n\t" + "pand %3, %%mm2 \n\t" + "pand %4, %%mm3 \n\t" "por %%mm2, %%mm0 \n\t" "psrlq $16, %%mm1 \n\t" "psrlq $32, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm3, %%mm1 \n\t" - "pand %6, %%mm5 \n\t" + "pand %5, %%mm5 \n\t" "por %%mm5, %%mm4 \n\t" MOVNTQ" %%mm0, %0 \n\t" @@ -1184,7 +1185,7 @@ MOVNTQ" %%mm4, 16%0" :"=m"(*d) - :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) + :"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) :"memory"); d += 24; s += 8;
--- libavcodec/x86/dsputil_mmx.c.orig 2009-03-14 23:19:05.000000000 +0900 +++ libavcodec/x86/dsputil_mmx.c 2009-03-14 23:48:57.000000000 +0900 @@ -679,10 +679,16 @@ static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... - "movd %4, %%mm0 \n\t" - "movd %5, %%mm1 \n\t" - "movd %6, %%mm2 \n\t" - "movd %7, %%mm3 \n\t" + "movd %0, %%mm0 \n\t" + "movd %1, %%mm1 \n\t" + "movd %2, %%mm2 \n\t" + "movd %3, %%mm3 \n\t" + : "=m" (*(uint32_t*)(dst + 0*dst_stride)), + "=m" (*(uint32_t*)(dst + 1*dst_stride)), + "=m" (*(uint32_t*)(dst + 2*dst_stride)), + "=m" (*(uint32_t*)(dst + 3*dst_stride)) + ); + __asm__ volatile( "punpcklbw %%mm1, %%mm0 \n\t" "punpcklbw %%mm3, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" @@ -694,11 +700,7 @@ "movd %%mm1, %2 \n\t" "punpckhdq %%mm1, %%mm1 \n\t" "movd %%mm1, %3 \n\t" - - : "=m" (*(uint32_t*)(dst + 0*dst_stride)), - "=m" (*(uint32_t*)(dst + 1*dst_stride)), - "=m" (*(uint32_t*)(dst + 2*dst_stride)), - "=m" (*(uint32_t*)(dst + 3*dst_stride)) + : : "m" (*(uint32_t*)(src + 0*src_stride)), "m" (*(uint32_t*)(src + 1*src_stride)), "m" (*(uint32_t*)(src + 2*src_stride)),
--- libavcodec/x86/flacdsp_mmx.c.orig 2009-03-14 23:26:37.000000000 +0900 +++ libavcodec/x86/flacdsp_mmx.c 2009-03-14 23:53:01.000000000 +0900 @@ -89,28 +89,31 @@ "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t" "1: \n\t" - "movapd (%4,%0), %%xmm3 \n\t" - "movupd -8(%5,%0), %%xmm4 \n\t" - "movapd (%5,%0), %%xmm5 \n\t" + "movapd (%1,%0), %%xmm3 \n\t" + "movupd -8(%2,%0), %%xmm4 \n\t" + "movapd (%2,%0), %%xmm5 \n\t" "mulpd %%xmm3, %%xmm4 \n\t" "mulpd %%xmm3, %%xmm5 \n\t" - "mulpd -16(%5,%0), %%xmm3 \n\t" + "mulpd -16(%2,%0), %%xmm3 \n\t" "addpd %%xmm4, %%xmm1 \n\t" "addpd %%xmm5, %%xmm0 \n\t" "addpd %%xmm3, %%xmm2 \n\t" "add $16, %0 \n\t" "jl 1b \n\t" + :"+&r"(i) + :"r"(data1+len), "r"(data1+len-j) + ); + __asm__ volatile( "movhlps %%xmm0, %%xmm3 \n\t" "movhlps %%xmm1, %%xmm4 \n\t" "movhlps %%xmm2, %%xmm5 \n\t" "addsd %%xmm3, %%xmm0 \n\t" "addsd %%xmm4, %%xmm1 \n\t" "addsd %%xmm5, %%xmm2 \n\t" - "movsd %%xmm0, %1 \n\t" - "movsd %%xmm1, %2 \n\t" - "movsd %%xmm2, %3 \n\t" - :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) - :"r"(data1+len), "r"(data1+len-j) + "movsd %%xmm0, %0 \n\t" + "movsd %%xmm1, %1 \n\t" + "movsd %%xmm2, %2 \n\t" + : "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) ); } else { __asm__ volatile(