また FFmpeg の make が通らない

MMX を有効化すると libavcodec/x86/dsputil_mmx.c をコンパイルできない。
gcc のエラーメッセージはどばーっと大量に出るんだけど、根本的な原因はたぶんこれ。

In file included from libavcodec/x86/dsputil_mmx.c:31:
libavcodec/x86/mmx.h:24:2: warning: #warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected.
libavcodec/x86/h264dsp_mmx.c: In function 'h264_h_loop_filter_luma_mmx2':
libavcodec/x86/dsputil_mmx.c:681: error: can't find a register in class 'GENERAL_REGS' while reloading 'asm'
libavcodec/x86/dsputil_mmx.c:681: error: can't find a register in class 'GENERAL_REGS' while reloading 'asm'
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/h264dsp_mmx.c:633: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/h264dsp_mmx.c:633: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints
libavcodec/x86/dsputil_mmx.c:681: error: 'asm' operand has impossible constraints

そもそもこのエラーメッセージの意味がわかっていなかったり。一体どうすればいいんだろう。


該当箇所のソースはこんなかんじ。処理はすべてインラインアセンブラで書かれている。
libavcodec/x86/dsputil_mmx.c

static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
        "movd  %4, %%mm0                \n\t"
        "movd  %5, %%mm1                \n\t"
        "movd  %6, %%mm2                \n\t"
        "movd  %7, %%mm3                \n\t"
        "punpcklbw %%mm1, %%mm0         \n\t"
        "punpcklbw %%mm3, %%mm2         \n\t"
        "movq %%mm0, %%mm1              \n\t"
        "punpcklwd %%mm2, %%mm0         \n\t"
        "punpckhwd %%mm2, %%mm1         \n\t"
        "movd  %%mm0, %0                \n\t"
        "punpckhdq %%mm0, %%mm0         \n\t"
        "movd  %%mm0, %1                \n\t"
        "movd  %%mm1, %2                \n\t"
        "punpckhdq %%mm1, %%mm1         \n\t"
        "movd  %%mm1, %3                \n\t"

        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
          "=m" (*(uint32_t*)(dst + 3*dst_stride))
        :  "m" (*(uint32_t*)(src + 0*src_stride)),
           "m" (*(uint32_t*)(src + 1*src_stride)),
           "m" (*(uint32_t*)(src + 2*src_stride)),
           "m" (*(uint32_t*)(src + 3*src_stride))
    );
}

どこも悪くない気がするんだけど。。。

環境

gcc-4.3.2
git log

commit 7ec2a3e3b1e44ae6e56c787acbeeb92138d66c88
Author: stefano 
Date:   Sat Mar 14 10:18:18 2009 +0000
    Add an example showing how to create a video from many images.
    
    
    git-svn-id: file:///var/local/repositories/ffmpeg/trunk@17955 9553f0bf-9b14-0410-a0b8-cfaf0461ba5b

追記

こう直したら通った。これが正しい治療法なのかどうかは不明。

--- libswscale/rgb2rgb_template.c.orig	2009-03-14 23:53:56.000000000 +0900
+++ libswscale/rgb2rgb_template.c	2009-03-15 00:08:47.000000000 +0900
@@ -146,11 +146,11 @@
     while (s < mm_end)
     {
         __asm__ volatile(
-        PREFETCH"    32%1           \n\t"
-        "movq          %1, %%mm0    \n\t"
-        "movq         8%1, %%mm1    \n\t"
-        "movq        16%1, %%mm4    \n\t"
-        "movq        24%1, %%mm5    \n\t"
+        PREFETCH"    32%0           \n\t"
+        "movq          %0, %%mm0    \n\t"
+        "movq         8%0, %%mm1    \n\t"
+        "movq        16%0, %%mm4    \n\t"
+        "movq        24%0, %%mm5    \n\t"
         "movq       %%mm0, %%mm2    \n\t"
         "movq       %%mm1, %%mm3    \n\t"
         "movq       %%mm4, %%mm6    \n\t"
@@ -159,14 +159,14 @@
         "psrlq         $8, %%mm3    \n\t"
         "psrlq         $8, %%mm6    \n\t"
         "psrlq         $8, %%mm7    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm1    \n\t"
-        "pand          %2, %%mm4    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "pand          %3, %%mm2    \n\t"
-        "pand          %3, %%mm3    \n\t"
-        "pand          %3, %%mm6    \n\t"
-        "pand          %3, %%mm7    \n\t"
+        "pand          %1, %%mm0    \n\t"
+        "pand          %1, %%mm1    \n\t"
+        "pand          %1, %%mm4    \n\t"
+        "pand          %1, %%mm5    \n\t"
+        "pand          %2, %%mm2    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "pand          %2, %%mm6    \n\t"
+        "pand          %2, %%mm7    \n\t"
         "por        %%mm2, %%mm0    \n\t"
         "por        %%mm3, %%mm1    \n\t"
         "por        %%mm6, %%mm4    \n\t"
@@ -176,22 +176,25 @@
         "movq       %%mm4, %%mm3    \n\t"
         "psllq        $48, %%mm2    \n\t"
         "psllq        $32, %%mm3    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "pand          %5, %%mm3    \n\t"
+        "pand          %3, %%mm2    \n\t"
+        "pand          %4, %%mm3    \n\t"
         "por        %%mm2, %%mm0    \n\t"
         "psrlq        $16, %%mm1    \n\t"
         "psrlq        $32, %%mm4    \n\t"
         "psllq        $16, %%mm5    \n\t"
         "por        %%mm3, %%mm1    \n\t"
-        "pand          %6, %%mm5    \n\t"
+        "pand          %5, %%mm5    \n\t"
         "por        %%mm5, %%mm4    \n\t"
-
+        :
+        :"m"(*s),"m"(mask24l),
+         "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        );
+        __asm__ volatile(
         MOVNTQ"     %%mm0,   %0     \n\t"
         MOVNTQ"     %%mm1,  8%0     \n\t"
         MOVNTQ"     %%mm4, 16%0"
         :"=m"(*dest)
-        :"m"(*s),"m"(mask24l),
-         "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        :
         :"memory");
         dest += 24;
         s += 32;
@@ -1011,14 +1014,14 @@
         "psrlq         $8, %%mm3    \n\t"
         "psrlq         $8, %%mm6    \n\t"
         "psrlq         $8, %%mm7    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm1    \n\t"
-        "pand          %2, %%mm4    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "pand          %3, %%mm2    \n\t"
-        "pand          %3, %%mm3    \n\t"
-        "pand          %3, %%mm6    \n\t"
-        "pand          %3, %%mm7    \n\t"
+        "pand          %1, %%mm0    \n\t"
+        "pand          %1, %%mm1    \n\t"
+        "pand          %1, %%mm4    \n\t"
+        "pand          %1, %%mm5    \n\t"
+        "pand          %2, %%mm2    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "pand          %2, %%mm6    \n\t"
+        "pand          %2, %%mm7    \n\t"
         "por        %%mm2, %%mm0    \n\t"
         "por        %%mm3, %%mm1    \n\t"
         "por        %%mm6, %%mm4    \n\t"
@@ -1028,22 +1031,20 @@
         "movq       %%mm4, %%mm3    \n\t"
         "psllq        $48, %%mm2    \n\t"
         "psllq        $32, %%mm3    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "pand          %5, %%mm3    \n\t"
+        "pand          %3, %%mm2    \n\t"
+        "pand          %4, %%mm3    \n\t"
         "por        %%mm2, %%mm0    \n\t"
         "psrlq        $16, %%mm1    \n\t"
         "psrlq        $32, %%mm4    \n\t"
         "psllq        $16, %%mm5    \n\t"
         "por        %%mm3, %%mm1    \n\t"
-        "pand          %6, %%mm5    \n\t"
+        "pand          %5, %%mm5    \n\t"
         "por        %%mm5, %%mm4    \n\t"
-
         MOVNTQ"     %%mm0,   %0     \n\t"
         MOVNTQ"     %%mm1,  8%0     \n\t"
         MOVNTQ"     %%mm4, 16%0"
-
         :"=m"(*d)
-        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        :"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
         :"memory");
         d += 24;
         s += 8;
@@ -1152,14 +1153,14 @@
         "psrlq         $8, %%mm3    \n\t"
         "psrlq         $8, %%mm6    \n\t"
         "psrlq         $8, %%mm7    \n\t"
-        "pand          %2, %%mm0    \n\t"
-        "pand          %2, %%mm1    \n\t"
-        "pand          %2, %%mm4    \n\t"
-        "pand          %2, %%mm5    \n\t"
-        "pand          %3, %%mm2    \n\t"
-        "pand          %3, %%mm3    \n\t"
-        "pand          %3, %%mm6    \n\t"
-        "pand          %3, %%mm7    \n\t"
+        "pand          %1, %%mm0    \n\t"
+        "pand          %1, %%mm1    \n\t"
+        "pand          %1, %%mm4    \n\t"
+        "pand          %1, %%mm5    \n\t"
+        "pand          %2, %%mm2    \n\t"
+        "pand          %2, %%mm3    \n\t"
+        "pand          %2, %%mm6    \n\t"
+        "pand          %2, %%mm7    \n\t"
         "por        %%mm2, %%mm0    \n\t"
         "por        %%mm3, %%mm1    \n\t"
         "por        %%mm6, %%mm4    \n\t"
@@ -1169,14 +1170,14 @@
         "movq       %%mm4, %%mm3    \n\t"
         "psllq        $48, %%mm2    \n\t"
         "psllq        $32, %%mm3    \n\t"
-        "pand          %4, %%mm2    \n\t"
-        "pand          %5, %%mm3    \n\t"
+        "pand          %3, %%mm2    \n\t"
+        "pand          %4, %%mm3    \n\t"
         "por        %%mm2, %%mm0    \n\t"
         "psrlq        $16, %%mm1    \n\t"
         "psrlq        $32, %%mm4    \n\t"
         "psllq        $16, %%mm5    \n\t"
         "por        %%mm3, %%mm1    \n\t"
-        "pand          %6, %%mm5    \n\t"
+        "pand          %5, %%mm5    \n\t"
         "por        %%mm5, %%mm4    \n\t"
 
         MOVNTQ"     %%mm0,   %0     \n\t"
@@ -1184,7 +1185,7 @@
         MOVNTQ"     %%mm4, 16%0"
 
         :"=m"(*d)
-        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
+        :"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
         :"memory");
         d += 24;
         s += 8;
--- libavcodec/x86/dsputil_mmx.c.orig	2009-03-14 23:19:05.000000000 +0900
+++ libavcodec/x86/dsputil_mmx.c	2009-03-14 23:48:57.000000000 +0900
@@ -679,10 +679,16 @@
 
 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
     __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
-        "movd  %4, %%mm0                \n\t"
-        "movd  %5, %%mm1                \n\t"
-        "movd  %6, %%mm2                \n\t"
-        "movd  %7, %%mm3                \n\t"
+        "movd  %0, %%mm0                \n\t"
+        "movd  %1, %%mm1                \n\t"
+        "movd  %2, %%mm2                \n\t"
+        "movd  %3, %%mm3                \n\t"
+        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 3*dst_stride))
+    );
+    __asm__ volatile(
         "punpcklbw %%mm1, %%mm0         \n\t"
         "punpcklbw %%mm3, %%mm2         \n\t"
         "movq %%mm0, %%mm1              \n\t"
@@ -694,11 +700,7 @@
         "movd  %%mm1, %2                \n\t"
         "punpckhdq %%mm1, %%mm1         \n\t"
         "movd  %%mm1, %3                \n\t"
-
-        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
-          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
-          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
-          "=m" (*(uint32_t*)(dst + 3*dst_stride))
+        :
         :  "m" (*(uint32_t*)(src + 0*src_stride)),
            "m" (*(uint32_t*)(src + 1*src_stride)),
            "m" (*(uint32_t*)(src + 2*src_stride)),
--- libavcodec/x86/flacdsp_mmx.c.orig	2009-03-14 23:26:37.000000000 +0900
+++ libavcodec/x86/flacdsp_mmx.c	2009-03-14 23:53:01.000000000 +0900
@@ -89,28 +89,31 @@
                 "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
                 "movsd    "MANGLE(ff_pd_1)", %%xmm2 \n\t"
                 "1:                                 \n\t"
-                "movapd   (%4,%0), %%xmm3           \n\t"
-                "movupd -8(%5,%0), %%xmm4           \n\t"
-                "movapd   (%5,%0), %%xmm5           \n\t"
+                "movapd   (%1,%0), %%xmm3           \n\t"
+                "movupd -8(%2,%0), %%xmm4           \n\t"
+                "movapd   (%2,%0), %%xmm5           \n\t"
                 "mulpd     %%xmm3, %%xmm4           \n\t"
                 "mulpd     %%xmm3, %%xmm5           \n\t"
-                "mulpd -16(%5,%0), %%xmm3           \n\t"
+                "mulpd -16(%2,%0), %%xmm3           \n\t"
                 "addpd     %%xmm4, %%xmm1           \n\t"
                 "addpd     %%xmm5, %%xmm0           \n\t"
                 "addpd     %%xmm3, %%xmm2           \n\t"
                 "add       $16,    %0               \n\t"
                 "jl 1b                              \n\t"
+                :"+&r"(i)
+                :"r"(data1+len), "r"(data1+len-j)
+            );
+            __asm__ volatile(
                 "movhlps   %%xmm0, %%xmm3           \n\t"
                 "movhlps   %%xmm1, %%xmm4           \n\t"
                 "movhlps   %%xmm2, %%xmm5           \n\t"
                 "addsd     %%xmm3, %%xmm0           \n\t"
                 "addsd     %%xmm4, %%xmm1           \n\t"
                 "addsd     %%xmm5, %%xmm2           \n\t"
-                "movsd     %%xmm0, %1               \n\t"
-                "movsd     %%xmm1, %2               \n\t"
-                "movsd     %%xmm2, %3               \n\t"
-                :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
-                :"r"(data1+len), "r"(data1+len-j)
+                "movsd     %%xmm0, %0               \n\t"
+                "movsd     %%xmm1, %1               \n\t"
+                "movsd     %%xmm2, %2               \n\t"
+                : "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
             );
         } else {
             __asm__ volatile(

追々記

上記パッチはコンパイルは通るものの、ffmpeg の変換がおかしくなります
ちくしょー、ダメだったか。