comparison libpostproc/postprocess_template.c @ 2039:f25e485a7850 libavcodec

mmx optimized version of the per line/accurate deblock filter vertical default mmx deblock filter fix
author michael
date Thu, 27 May 2004 21:42:00 +0000
parents 02b59a3c62cd
children 5de466b3360e
comparison
equal deleted inserted replaced
2038:02b59a3c62cd 2039:f25e485a7850
1029 "movq %%mm1, %%mm6 \n\t" 1029 "movq %%mm1, %%mm6 \n\t"
1030 "psubusw %%mm3, %%mm6 \n\t" 1030 "psubusw %%mm3, %%mm6 \n\t"
1031 "psubw %%mm6, %%mm1 \n\t" 1031 "psubw %%mm6, %%mm1 \n\t"
1032 #endif 1032 #endif
1033 1033
1034 "movd %2, %%mm2 \n\t" // QP
1035 "punpcklbw %%mm7, %%mm2 \n\t"
1036
1034 "movq %%mm7, %%mm6 \n\t" // 0 1037 "movq %%mm7, %%mm6 \n\t" // 0
1035 "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5) 1038 "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
1036 "pxor %%mm6, %%mm4 \n\t" 1039 "pxor %%mm6, %%mm4 \n\t"
1037 "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 - 2L5| 1040 "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 - 2L5|
1038 "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5) 1041 "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
1039 "pxor %%mm7, %%mm5 \n\t" 1042 "pxor %%mm7, %%mm5 \n\t"
1040 "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5| 1043 "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5|
1041 // 100 opcodes 1044 // 100 opcodes
1042 "movd %2, %%mm2 \n\t" // QP
1043 "psllw $3, %%mm2 \n\t" // 8QP 1045 "psllw $3, %%mm2 \n\t" // 8QP
1044 "movq %%mm2, %%mm3 \n\t" // 8QP 1046 "movq %%mm2, %%mm3 \n\t" // 8QP
1045 "pcmpgtw %%mm4, %%mm2 \n\t" 1047 "pcmpgtw %%mm4, %%mm2 \n\t"
1046 "pcmpgtw %%mm5, %%mm3 \n\t" 1048 "pcmpgtw %%mm5, %%mm3 \n\t"
1047 "pand %%mm2, %%mm4 \n\t" 1049 "pand %%mm2, %%mm4 \n\t"
2608 } 2610 }
2609 } 2611 }
2610 #endif 2612 #endif
2611 } 2613 }
2612 2614
2615 #ifdef HAVE_MMX
2616 /**
2617 * accurate deblock filter
2618 */
2619 static always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
2620 int y;
2621 const int QP= c->QP;
2622 int64_t dc_mask, eq_mask;
2623 src+= step*3; // src points to begin of the 8x8 Block
2624 //START_TIMER
2625 asm volatile(
2626 "movq %0, %%mm7 \n\t"
2627 "movq %1, %%mm6 \n\t"
2628 : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
2629 );
2630
2631 asm volatile(
2632 "leal (%2, %3), %%eax \n\t"
2633 // 0 1 2 3 4 5 6 7 8 9
2634 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
2635
2636 "movq (%2), %%mm0 \n\t"
2637 "movq (%%eax), %%mm1 \n\t"
2638 "movq %%mm1, %%mm3 \n\t"
2639 "movq %%mm1, %%mm4 \n\t"
2640 "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
2641 "paddb %%mm7, %%mm0 \n\t"
2642 "pcmpgtb %%mm6, %%mm0 \n\t"
2643
2644 "movq (%%eax,%3), %%mm2 \n\t"
2645 PMAXUB(%%mm2, %%mm4)
2646 PMINUB(%%mm2, %%mm3, %%mm5)
2647 "psubb %%mm2, %%mm1 \n\t"
2648 "paddb %%mm7, %%mm1 \n\t"
2649 "pcmpgtb %%mm6, %%mm1 \n\t"
2650 "paddb %%mm1, %%mm0 \n\t"
2651
2652 "movq (%%eax, %3, 2), %%mm1 \n\t"
2653 PMAXUB(%%mm1, %%mm4)
2654 PMINUB(%%mm1, %%mm3, %%mm5)
2655 "psubb %%mm1, %%mm2 \n\t"
2656 "paddb %%mm7, %%mm2 \n\t"
2657 "pcmpgtb %%mm6, %%mm2 \n\t"
2658 "paddb %%mm2, %%mm0 \n\t"
2659
2660 "leal (%%eax, %3, 4), %%eax \n\t"
2661
2662 "movq (%2, %3, 4), %%mm2 \n\t"
2663 PMAXUB(%%mm2, %%mm4)
2664 PMINUB(%%mm2, %%mm3, %%mm5)
2665 "psubb %%mm2, %%mm1 \n\t"
2666 "paddb %%mm7, %%mm1 \n\t"
2667 "pcmpgtb %%mm6, %%mm1 \n\t"
2668 "paddb %%mm1, %%mm0 \n\t"
2669
2670 "movq (%%eax), %%mm1 \n\t"
2671 PMAXUB(%%mm1, %%mm4)
2672 PMINUB(%%mm1, %%mm3, %%mm5)
2673 "psubb %%mm1, %%mm2 \n\t"
2674 "paddb %%mm7, %%mm2 \n\t"
2675 "pcmpgtb %%mm6, %%mm2 \n\t"
2676 "paddb %%mm2, %%mm0 \n\t"
2677
2678 "movq (%%eax, %3), %%mm2 \n\t"
2679 PMAXUB(%%mm2, %%mm4)
2680 PMINUB(%%mm2, %%mm3, %%mm5)
2681 "psubb %%mm2, %%mm1 \n\t"
2682 "paddb %%mm7, %%mm1 \n\t"
2683 "pcmpgtb %%mm6, %%mm1 \n\t"
2684 "paddb %%mm1, %%mm0 \n\t"
2685
2686 "movq (%%eax, %3, 2), %%mm1 \n\t"
2687 PMAXUB(%%mm1, %%mm4)
2688 PMINUB(%%mm1, %%mm3, %%mm5)
2689 "psubb %%mm1, %%mm2 \n\t"
2690 "paddb %%mm7, %%mm2 \n\t"
2691 "pcmpgtb %%mm6, %%mm2 \n\t"
2692 "paddb %%mm2, %%mm0 \n\t"
2693
2694 "movq (%2, %3, 8), %%mm2 \n\t"
2695 PMAXUB(%%mm2, %%mm4)
2696 PMINUB(%%mm2, %%mm3, %%mm5)
2697 "psubb %%mm2, %%mm1 \n\t"
2698 "paddb %%mm7, %%mm1 \n\t"
2699 "pcmpgtb %%mm6, %%mm1 \n\t"
2700 "paddb %%mm1, %%mm0 \n\t"
2701
2702 "movq (%%eax, %3, 4), %%mm1 \n\t"
2703 "psubb %%mm1, %%mm2 \n\t"
2704 "paddb %%mm7, %%mm2 \n\t"
2705 "pcmpgtb %%mm6, %%mm2 \n\t"
2706 "paddb %%mm2, %%mm0 \n\t"
2707 "psubusb %%mm3, %%mm4 \n\t"
2708
2709 "movq %4, %%mm7 \n\t" // QP,..., QP
2710 "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP
2711 "pcmpgtb %%mm4, %%mm7 \n\t" // Diff < 2QP -> FF
2712 "movq %%mm7, %1 \n\t"
2713
2714 "pxor %%mm6, %%mm6 \n\t"
2715 "movq %5, %%mm7 \n\t"
2716 "punpcklbw %%mm7, %%mm7 \n\t"
2717 "punpcklbw %%mm7, %%mm7 \n\t"
2718 "punpcklbw %%mm7, %%mm7 \n\t"
2719 "psubb %%mm0, %%mm6 \n\t"
2720 "pcmpgtb %%mm7, %%mm6 \n\t"
2721 "movq %%mm6, %0 \n\t"
2722
2723 : "=m" (eq_mask), "=m" (dc_mask)
2724 : "r" (src), "r" (step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
2725 : "%eax"
2726 );
2727
2728 src+= step; // src points to begin of the 8x8 Block
2729
2730 if(eq_mask != -1LL){
2731 asm volatile(
2732 "pxor %%mm7, %%mm7 \n\t"
2733 "leal -40(%%esp), %%ecx \n\t" // make space for 4 8-byte vars
2734 "andl $0xFFFFFFF8, %%ecx \n\t" // align
2735 // 0 1 2 3 4 5 6 7 8 9
2736 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1
2737
2738 "movq (%0), %%mm0 \n\t"
2739 "movq %%mm0, %%mm1 \n\t"
2740 "punpcklbw %%mm7, %%mm0 \n\t" // low part of line 0
2741 "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0
2742
2743 "movq (%0, %1), %%mm2 \n\t"
2744 "leal (%0, %1, 2), %%eax \n\t"
2745 "movq %%mm2, %%mm3 \n\t"
2746 "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1
2747 "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1
2748
2749 "movq (%%eax), %%mm4 \n\t"
2750 "movq %%mm4, %%mm5 \n\t"
2751 "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2
2752 "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2
2753
2754 "paddw %%mm0, %%mm0 \n\t" // 2L0
2755 "paddw %%mm1, %%mm1 \n\t" // 2H0
2756 "psubw %%mm4, %%mm2 \n\t" // L1 - L2
2757 "psubw %%mm5, %%mm3 \n\t" // H1 - H2
2758 "psubw %%mm2, %%mm0 \n\t" // 2L0 - L1 + L2
2759 "psubw %%mm3, %%mm1 \n\t" // 2H0 - H1 + H2
2760
2761 "psllw $2, %%mm2 \n\t" // 4L1 - 4L2
2762 "psllw $2, %%mm3 \n\t" // 4H1 - 4H2
2763 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2
2764 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2
2765
2766 "movq (%%eax, %1), %%mm2 \n\t"
2767 "movq %%mm2, %%mm3 \n\t"
2768 "punpcklbw %%mm7, %%mm2 \n\t" // L3
2769 "punpckhbw %%mm7, %%mm3 \n\t" // H3
2770
2771 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3
2772 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3
2773 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
2774 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
2775 "movq %%mm0, (%%ecx) \n\t" // 2L0 - 5L1 + 5L2 - 2L3
2776 "movq %%mm1, 8(%%ecx) \n\t" // 2H0 - 5H1 + 5H2 - 2H3
2777
2778 "movq (%%eax, %1, 2), %%mm0 \n\t"
2779 "movq %%mm0, %%mm1 \n\t"
2780 "punpcklbw %%mm7, %%mm0 \n\t" // L4
2781 "punpckhbw %%mm7, %%mm1 \n\t" // H4
2782
2783 "psubw %%mm0, %%mm2 \n\t" // L3 - L4
2784 "psubw %%mm1, %%mm3 \n\t" // H3 - H4
2785 "movq %%mm2, 16(%%ecx) \n\t" // L3 - L4
2786 "movq %%mm3, 24(%%ecx) \n\t" // H3 - H4
2787 "paddw %%mm4, %%mm4 \n\t" // 2L2
2788 "paddw %%mm5, %%mm5 \n\t" // 2H2
2789 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4
2790 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4
2791
2792 "leal (%%eax, %1), %0 \n\t"
2793 "psllw $2, %%mm2 \n\t" // 4L3 - 4L4
2794 "psllw $2, %%mm3 \n\t" // 4H3 - 4H4
2795 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4
2796 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4
2797 //50 opcodes so far
2798 "movq (%0, %1, 2), %%mm2 \n\t"
2799 "movq %%mm2, %%mm3 \n\t"
2800 "punpcklbw %%mm7, %%mm2 \n\t" // L5
2801 "punpckhbw %%mm7, %%mm3 \n\t" // H5
2802 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - L5
2803 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - H5
2804 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5
2805 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5
2806
2807 "movq (%%eax, %1, 4), %%mm6 \n\t"
2808 "punpcklbw %%mm7, %%mm6 \n\t" // L6
2809 "psubw %%mm6, %%mm2 \n\t" // L5 - L6
2810 "movq (%%eax, %1, 4), %%mm6 \n\t"
2811 "punpckhbw %%mm7, %%mm6 \n\t" // H6
2812 "psubw %%mm6, %%mm3 \n\t" // H5 - H6
2813
2814 "paddw %%mm0, %%mm0 \n\t" // 2L4
2815 "paddw %%mm1, %%mm1 \n\t" // 2H4
2816 "psubw %%mm2, %%mm0 \n\t" // 2L4 - L5 + L6
2817 "psubw %%mm3, %%mm1 \n\t" // 2H4 - H5 + H6
2818
2819 "psllw $2, %%mm2 \n\t" // 4L5 - 4L6
2820 "psllw $2, %%mm3 \n\t" // 4H5 - 4H6
2821 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6
2822 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6
2823
2824 "movq (%0, %1, 4), %%mm2 \n\t"
2825 "movq %%mm2, %%mm3 \n\t"
2826 "punpcklbw %%mm7, %%mm2 \n\t" // L7
2827 "punpckhbw %%mm7, %%mm3 \n\t" // H7
2828
2829 "paddw %%mm2, %%mm2 \n\t" // 2L7
2830 "paddw %%mm3, %%mm3 \n\t" // 2H7
2831 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
2832 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
2833
2834 "movq (%%ecx), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
2835 "movq 8(%%ecx), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
2836
2837 #ifdef HAVE_MMX2
2838 "movq %%mm7, %%mm6 \n\t" // 0
2839 "psubw %%mm0, %%mm6 \n\t"
2840 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
2841 "movq %%mm7, %%mm6 \n\t" // 0
2842 "psubw %%mm1, %%mm6 \n\t"
2843 "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
2844 "movq %%mm7, %%mm6 \n\t" // 0
2845 "psubw %%mm2, %%mm6 \n\t"
2846 "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
2847 "movq %%mm7, %%mm6 \n\t" // 0
2848 "psubw %%mm3, %%mm6 \n\t"
2849 "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
2850 #else
2851 "movq %%mm7, %%mm6 \n\t" // 0
2852 "pcmpgtw %%mm0, %%mm6 \n\t"
2853 "pxor %%mm6, %%mm0 \n\t"
2854 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
2855 "movq %%mm7, %%mm6 \n\t" // 0
2856 "pcmpgtw %%mm1, %%mm6 \n\t"
2857 "pxor %%mm6, %%mm1 \n\t"
2858 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
2859 "movq %%mm7, %%mm6 \n\t" // 0
2860 "pcmpgtw %%mm2, %%mm6 \n\t"
2861 "pxor %%mm6, %%mm2 \n\t"
2862 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
2863 "movq %%mm7, %%mm6 \n\t" // 0
2864 "pcmpgtw %%mm3, %%mm6 \n\t"
2865 "pxor %%mm6, %%mm3 \n\t"
2866 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
2867 #endif
2868
2869 #ifdef HAVE_MMX2
2870 "pminsw %%mm2, %%mm0 \n\t"
2871 "pminsw %%mm3, %%mm1 \n\t"
2872 #else
2873 "movq %%mm0, %%mm6 \n\t"
2874 "psubusw %%mm2, %%mm6 \n\t"
2875 "psubw %%mm6, %%mm0 \n\t"
2876 "movq %%mm1, %%mm6 \n\t"
2877 "psubusw %%mm3, %%mm6 \n\t"
2878 "psubw %%mm6, %%mm1 \n\t"
2879 #endif
2880
2881 "movd %2, %%mm2 \n\t" // QP
2882 "punpcklbw %%mm7, %%mm2 \n\t"
2883
2884 "movq %%mm7, %%mm6 \n\t" // 0
2885 "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
2886 "pxor %%mm6, %%mm4 \n\t"
2887 "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 - 2L5|
2888 "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
2889 "pxor %%mm7, %%mm5 \n\t"
2890 "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5|
2891 // 100 opcodes
2892 "psllw $3, %%mm2 \n\t" // 8QP
2893 "movq %%mm2, %%mm3 \n\t" // 8QP
2894 "pcmpgtw %%mm4, %%mm2 \n\t"
2895 "pcmpgtw %%mm5, %%mm3 \n\t"
2896 "pand %%mm2, %%mm4 \n\t"
2897 "pand %%mm3, %%mm5 \n\t"
2898
2899
2900 "psubusw %%mm0, %%mm4 \n\t" // hd
2901 "psubusw %%mm1, %%mm5 \n\t" // ld
2902
2903
2904 "movq "MANGLE(w05)", %%mm2 \n\t" // 5
2905 "pmullw %%mm2, %%mm4 \n\t"
2906 "pmullw %%mm2, %%mm5 \n\t"
2907 "movq "MANGLE(w20)", %%mm2 \n\t" // 32
2908 "paddw %%mm2, %%mm4 \n\t"
2909 "paddw %%mm2, %%mm5 \n\t"
2910 "psrlw $6, %%mm4 \n\t"
2911 "psrlw $6, %%mm5 \n\t"
2912
2913 "movq 16(%%ecx), %%mm0 \n\t" // L3 - L4
2914 "movq 24(%%ecx), %%mm1 \n\t" // H3 - H4
2915
2916 "pxor %%mm2, %%mm2 \n\t"
2917 "pxor %%mm3, %%mm3 \n\t"
2918
2919 "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4)
2920 "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4)
2921 "pxor %%mm2, %%mm0 \n\t"
2922 "pxor %%mm3, %%mm1 \n\t"
2923 "psubw %%mm2, %%mm0 \n\t" // |L3-L4|
2924 "psubw %%mm3, %%mm1 \n\t" // |H3-H4|
2925 "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2
2926 "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2
2927
2928 "pxor %%mm6, %%mm2 \n\t"
2929 "pxor %%mm7, %%mm3 \n\t"
2930 "pand %%mm2, %%mm4 \n\t"
2931 "pand %%mm3, %%mm5 \n\t"
2932
2933 #ifdef HAVE_MMX2
2934 "pminsw %%mm0, %%mm4 \n\t"
2935 "pminsw %%mm1, %%mm5 \n\t"
2936 #else
2937 "movq %%mm4, %%mm2 \n\t"
2938 "psubusw %%mm0, %%mm2 \n\t"
2939 "psubw %%mm2, %%mm4 \n\t"
2940 "movq %%mm5, %%mm2 \n\t"
2941 "psubusw %%mm1, %%mm2 \n\t"
2942 "psubw %%mm2, %%mm5 \n\t"
2943 #endif
2944 "pxor %%mm6, %%mm4 \n\t"
2945 "pxor %%mm7, %%mm5 \n\t"
2946 "psubw %%mm6, %%mm4 \n\t"
2947 "psubw %%mm7, %%mm5 \n\t"
2948 "packsswb %%mm5, %%mm4 \n\t"
2949 "movq %3, %%mm1 \n\t"
2950 "pandn %%mm4, %%mm1 \n\t"
2951 "movq (%0), %%mm0 \n\t"
2952 "paddb %%mm1, %%mm0 \n\t"
2953 "movq %%mm0, (%0) \n\t"
2954 "movq (%0, %1), %%mm0 \n\t"
2955 "psubb %%mm1, %%mm0 \n\t"
2956 "movq %%mm0, (%0, %1) \n\t"
2957
2958 : "+r" (src)
2959 : "r" (step), "m" (c->pQPb), "m"(eq_mask)
2960 : "%eax", "%ecx"
2961 );
2962 src-= 3*step; //reverse src change from asm
2963 }
2964
2965 for(y=0; y<8; y++){
2966 if((eq_mask>>(y*8))&1){
2967 if((dc_mask>>(y*8))&1){
2968 const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
2969 const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
2970
2971 int sums[10];
2972 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
2973 sums[1] = sums[0] - first + src[3*step];
2974 sums[2] = sums[1] - first + src[4*step];
2975 sums[3] = sums[2] - first + src[5*step];
2976 sums[4] = sums[3] - first + src[6*step];
2977 sums[5] = sums[4] - src[0*step] + src[7*step];
2978 sums[6] = sums[5] - src[1*step] + last;
2979 sums[7] = sums[6] - src[2*step] + last;
2980 sums[8] = sums[7] - src[3*step] + last;
2981 sums[9] = sums[8] - src[4*step] + last;
2982
2983 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
2984 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
2985 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
2986 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
2987 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
2988 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
2989 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
2990 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
2991 }
2992 }
2993
2994 src += stride;
2995 }
2996 /*if(step==16){
2997 STOP_TIMER("step16")
2998 }else{
2999 STOP_TIMER("stepX")
3000 }*/
3001 }
3002 #endif //HAVE_MMX
3003
2613 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, 3004 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
2614 QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c); 3005 QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
2615 3006
2616 /** 3007 /**
2617 * Copies a block from src to dst and fixes the blacklevel 3008 * Copies a block from src to dst and fixes the blacklevel
3111 if(t==1) 3502 if(t==1)
3112 RENAME(doVertLowPass)(dstBlock, stride, &c); 3503 RENAME(doVertLowPass)(dstBlock, stride, &c);
3113 else if(t==2) 3504 else if(t==2)
3114 RENAME(doVertDefFilter)(dstBlock, stride, &c); 3505 RENAME(doVertDefFilter)(dstBlock, stride, &c);
3115 }else if(mode & V_A_DEBLOCK){ 3506 }else if(mode & V_A_DEBLOCK){
3116 do_a_deblock(dstBlock, stride, 1, &c); 3507 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
3117 } 3508 }
3118 } 3509 }
3119 3510
3120 #ifdef HAVE_MMX 3511 #ifdef HAVE_MMX
3121 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); 3512 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
3134 if(t==1) 3525 if(t==1)
3135 RENAME(doVertLowPass)(tempBlock1, 16, &c); 3526 RENAME(doVertLowPass)(tempBlock1, 16, &c);
3136 else if(t==2) 3527 else if(t==2)
3137 RENAME(doVertDefFilter)(tempBlock1, 16, &c); 3528 RENAME(doVertDefFilter)(tempBlock1, 16, &c);
3138 }else if(mode & H_A_DEBLOCK){ 3529 }else if(mode & H_A_DEBLOCK){
3139 do_a_deblock(tempBlock1, 16, 1, &c); 3530 RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
3140 } 3531 }
3141 3532
3142 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16); 3533 RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
3143 3534
3144 #else 3535 #else
3151 if(t==1) 3542 if(t==1)
3152 RENAME(doHorizLowPass)(dstBlock-4, stride, &c); 3543 RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
3153 else if(t==2) 3544 else if(t==2)
3154 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); 3545 RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
3155 }else if(mode & H_A_DEBLOCK){ 3546 }else if(mode & H_A_DEBLOCK){
3156 do_a_deblock(dstBlock-8, 1, stride, &c); 3547 RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
3157 } 3548 }
3158 #endif 3549 #endif
3159 if(mode & DERING) 3550 if(mode & DERING)
3160 { 3551 {
3161 //FIXME filter first line 3552 //FIXME filter first line