/*
 * Copyright (c) 2005-2009 FAUmachine Team <info@faumachine.org>.
 * Copyright (c) 2004 Fabrice Bellard.
 *
 * Derived from QEMU (hw/cirrus_vga_rop2.h)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/*
 * These are the raster operations that depend on the color depth of
 * the display. This file get's included four times (once for each depth)
 * from chip_cirrus_gd5446_rop.c
 *
 * If e.g. the following is defined
 *
 * #define ROP_NAME src_xor_dst
 * #define ROP_CODE(d, s) d = (s) ^ (d)
 * #define ROP_DEPTH 16
 *
 * these 6 functions will be defined:
 *
 * _cirrus_bitblt_color_fill_src_xor_dst_16
 * _cirrus_bitblt_color_expansion_src_xor_dst_16
 * _cirrus_bitblt_transparent_color_expansion_src_xor_dst_16
 * _cirrus_bitblt_color_pattern_expansion_src_xor_dst_16
 * _cirrus_bitblt_transparent_color_pattern_expansion_src_xor_dst_16
 * _cirrus_bitblt_pattern_fill_src_xor_dst_16
 *
 * Special thanks to qemu's cirrus vga for this neat idea!
 */

/* PUT_PIXEL writes pixel of "color" into "dest" */
#if   ROP_DEPTH == 8
#define PUT_PIXEL() uint8_t d = video_readb(cpssp, dest);		\
		    ROP_CODE(d, (uint8_t) (color & 0x000000ff));	\
		    video_writeb(cpssp, dest, d)

#elif ROP_DEPTH == 16
#define PUT_PIXEL() uint16_t d = video_readw(cpssp, dest);		\
		    ROP_CODE(d, (uint16_t) (color & 0x0000ffff));	\
		    video_writew(cpssp, dest, d)

#elif ROP_DEPTH == 24
#define PUT_PIXEL() uint8_t d0 = video_readb(cpssp, dest + 0);			\
		    uint8_t d1 = video_readb(cpssp, dest + 1);			\
		    uint8_t d2 = video_readb(cpssp, dest + 2);			\
		    ROP_CODE(d0, (uint8_t) ((color & 0x000000ff) >> 0));	\
		    ROP_CODE(d1, (uint8_t) ((color & 0x0000ff00) >> 8));	\
                    ROP_CODE(d2, (uint8_t) ((color & 0x00ff0000) >> 16));	\
		    video_writeb(cpssp, dest + 0, d0);				\
		    video_writeb(cpssp, dest + 1, d1);				\
		    video_writeb(cpssp, dest + 2, d2)

#elif ROP_DEPTH == 32
#define PUT_PIXEL() uint32_t d = video_readl(cpssp, dest);	\
		    ROP_CODE(d, color);				\
		    video_writel(cpssp, dest, d)

#else
#error chip_cirrus_gd5446_rop_depth.c needs ROP_DEPTH defined to either 8, 16, 24 or 32
#endif

/*
 * Another simple raster operation. The destination rectangle is filled
 * with the foreground color. Works for all color expansion depths.
 * (Same as _cirrus_bitblt_color_pattern_expansion_BLA with an all-1-pattern.)
 */
static void
paste(paste(paste(_cirrus_bitblt_color_fill_, ROP_NAME), _), ROP_DEPTH)(struct cpssp *cpssp)
{
        unsigned long color;
        unsigned long dest, dest_line;
        int x, y;

#ifdef CIRRUS_DEBUG_BITBLITTER
        faum_log(FAUM_LOG_DEBUG, __FUNCTION__, "", "\n");
#endif

        color     = cpssp->bitblt.fg_color;
        dest_line = cpssp->bitblt.destination_pointer;

        for (y = 0; y < cpssp->bitblt.height; y++) {
                dest = dest_line;

                for (x = 0; x < cpssp->bitblt.width; x += (ROP_DEPTH / 8)) {
                        PUT_PIXEL();
                        dest += (ROP_DEPTH / 8);
                }

                dest_line += cpssp->bitblt.dest_pitch;
        }
}

/*
 * The source is a monochrome image. Each bit of the source is replaced
 * with an entire pixel that can have either of two colors in destination.
 * 24bpp color expansion has to be transparent, so we don't do it's
 * special skip.
 */
static void
paste(paste(paste(_cirrus_bitblt_color_expansion_, ROP_NAME), _), ROP_DEPTH)(struct cpssp *cpssp)
{
        unsigned long color;
        unsigned long dest, dest_line, src;
        int x, y;

        unsigned char dest_skip, src_skip;
        unsigned char src_bits, src_bitmask;

#ifdef CIRRUS_DEBUG_BITBLITTER
        faum_log(FAUM_LOG_DEBUG, __FUNCTION__, "", "\n");
#endif

        dest_line = cpssp->bitblt.destination_pointer;
        src       = cpssp->bitblt.source_pointer;

        src_skip  = cpssp->bitblt.dest_left_side_clipping & 0x07;
        dest_skip = src_skip * (ROP_DEPTH / 8);

        for (y = 0; y < cpssp->bitblt.height; y++) {
                src_bitmask = 0x80 >> src_skip;
                src_bits = video_readb(cpssp, src++);
                dest = dest_line + dest_skip;

                for (x = dest_skip;
                     x < cpssp->bitblt.width;
                     x += (ROP_DEPTH / 8)) {
                        if (!src_bitmask) {
                                src_bitmask = 0x80;
                                src_bits = video_readb(cpssp, src++);
                        }
                        if (src_bitmask & src_bits) {
                                color = cpssp->bitblt.fg_color;
                        } else {
                                color = cpssp->bitblt.bg_color;
                        }
                        PUT_PIXEL();
                        dest += (ROP_DEPTH / 8);
                        src_bitmask >>= 1;
                }

                dest_line += cpssp->bitblt.dest_pitch;
        }
}

/*
 * The source is a monochrome image. Each enabled bit of the source is replaced
 * with an entire pixel (of foreground color) in destination. Each disabled
 * source bit doesn't change the current destination pixel.
 */
static void
paste(paste(paste(_cirrus_bitblt_transparent_color_expansion_, ROP_NAME), _), ROP_DEPTH)(struct cpssp *cpssp)
{
        unsigned long color;
        unsigned long dest, dest_line, src;
        int x, y;

        unsigned char dest_skip, src_skip;
        unsigned char src_bits, src_bitmask, src_xormask;

#ifdef CIRRUS_DEBUG_BITBLITTER
        faum_log(FAUM_LOG_DEBUG, __FUNCTION__, "", "\n");
#endif

        color      = cpssp->bitblt.fg_color;
        dest_line  = cpssp->bitblt.destination_pointer;
        src        = cpssp->bitblt.source_pointer;

        /* see trm 5.8, dest_skip is bytes, src_skip is bits */
#if ROP_DEPTH == 24
        dest_skip = cpssp->bitblt.dest_left_side_clipping & 0x1f;
        src_skip  = dest_skip / 3;
#else
        src_skip  = cpssp->bitblt.dest_left_side_clipping & 0x07;
        dest_skip = src_skip * (ROP_DEPTH / 8);
#endif

        if (cpssp->bitblt.mode_extensions & BITBLT_INVERT_COLOR_EXPAND) {
                src_xormask = 0xff;
        } else {
                src_xormask = 0x00;
        }

        for (y = 0; y < cpssp->bitblt.height; y++) {
                src_bitmask = 0x80 >> src_skip;
                src_bits = video_readb(cpssp, src++) ^ src_xormask; /* potentially inversify */
                dest = dest_line + dest_skip;

                for (x = dest_skip;
                     x < cpssp->bitblt.width;
                     x += (ROP_DEPTH / 8)) {
                        if (!src_bitmask) {
                                src_bitmask = 0x80;
                                src_bits = video_readb(cpssp, src++) ^ src_xormask;
                        }
                        if (src_bitmask & src_bits) {
                                PUT_PIXEL();
                        }
                        dest += (ROP_DEPTH / 8);
                        src_bitmask >>= 1;
                }

                dest_line += cpssp->bitblt.dest_pitch;
        }
}

/*
 * Source is 8 bytes of monochrome data. This 8x8 pixel pattern gets
 * written color expanded into destination area repeatedly.
 * 24bpp color expansion has to be transparent, so we don't do it's
 * special skip.
 * Only screen-to-screen pattern fills are supported in hardware, but why
 * should we care where cpssp->bitblt.source_pointer points to?
 */
static void
paste(paste(paste(_cirrus_bitblt_color_pattern_expansion_, ROP_NAME), _), ROP_DEPTH)(struct cpssp *cpssp)
{
        unsigned long color;
        unsigned long dest, dest_line, src;
        int x, y;

        unsigned char dest_skip, src_skip;
        int pattern_y; /* position in pattern in bytes */
        unsigned char bit_x; /* position in pattern byte as bitmask */

#ifdef CIRRUS_DEBUG_BITBLITTER
        faum_log(FAUM_LOG_DEBUG, __FUNCTION__, "", "\n");
#endif

        dest_line = cpssp->bitblt.destination_pointer;
        src       = cpssp->bitblt.source_pointer;

        src_skip  = cpssp->bitblt.dest_left_side_clipping & 0x07;
        dest_skip = src_skip * (ROP_DEPTH / 8);

        pattern_y = cpssp->blt_source_start[0] & 0x07;

        for (y = 0; y < cpssp->bitblt.height; y++) {
                dest = dest_line + dest_skip;
                bit_x = 0x80 >> src_skip;
                for (x = dest_skip;
                     x < cpssp->bitblt.width;
                     x += (ROP_DEPTH / 8)) {
                        if (bit_x & video_readb(cpssp, src + pattern_y)) {
                                color = cpssp->bitblt.fg_color;
                        } else {
                                color = cpssp->bitblt.bg_color;
                        }
                        PUT_PIXEL();
                        dest += (ROP_DEPTH / 8);
                        bit_x >>= 1;
                        if (!bit_x) {
                                bit_x = 0x80;
                        }
                }

                dest_line += cpssp->bitblt.dest_pitch;
                pattern_y = (pattern_y + 1) % 8;
        }
}

/*
 * Source is 8 bytes of monochrome data. This 8x8 pixel pattern gets
 * written into destination area repeatedly. Each enabled bit
 * is replaced with an entire pixel (of foreground color) in destination.
 * Each disabled bit doesn't change the current destination pixel.
 * Only screen-to-screen pattern fills are supported in hardware, but why
 * should we care where cpssp->bitblt.source_pointer points to?
 */
static void
paste(paste(paste(_cirrus_bitblt_transparent_color_pattern_expansion_, ROP_NAME), _), ROP_DEPTH)(struct cpssp *cpssp)
{
        unsigned long color;
        unsigned long dest, dest_line, src;
        int x, y;

        unsigned char dest_skip, src_skip;
        int pattern_y; /* position in pattern in bytes */
        unsigned char bit_x; /* position in pattern byte as bitmask */
        unsigned char src_xormask;

#ifdef CIRRUS_DEBUG_BITBLITTER
        faum_log(FAUM_LOG_DEBUG, __FUNCTION__, "", "\n");
#endif

        color     = cpssp->bitblt.fg_color;
        dest_line = cpssp->bitblt.destination_pointer;
        src       = cpssp->bitblt.source_pointer;

#if ROP_DEPTH == 24
        dest_skip = cpssp->bitblt.dest_left_side_clipping & 0x1f;
        src_skip  = dest_skip / 3;
#else
        src_skip  = cpssp->bitblt.dest_left_side_clipping & 0x07;
        dest_skip = src_skip * (ROP_DEPTH / 8);
#endif

        if (cpssp->bitblt.mode_extensions & BITBLT_INVERT_COLOR_EXPAND) {
                src_xormask = 0xff;
        } else {
                src_xormask = 0x00;
        }

        pattern_y = cpssp->blt_source_start[0] & 0x07;

        for (y = 0; y < cpssp->bitblt.height; y++) {
                dest = dest_line + dest_skip;
                bit_x = 0x80 >> src_skip;

                for (x = dest_skip;
                     x < cpssp->bitblt.width;
                     x += (ROP_DEPTH / 8)) {
                        if (bit_x & (video_readb(cpssp, src + pattern_y) ^ src_xormask)) {
                                PUT_PIXEL();
                        }
                        dest += (ROP_DEPTH / 8);
                        bit_x >>= 1;
                        if (!bit_x) {
                                bit_x = 0x80;
                        }
                }

                dest_line += cpssp->bitblt.dest_pitch;
                pattern_y = (pattern_y + 1) % 8;
        }
}

/*
 * Source is an array of 8 pixels by 8 scanlines containing the pattern.
 * This pattern is repeatedly copyed into destination.
 * 8-bpp:  64 bytes of color data for 64 pixels.
 * 16-bpp: 128 bytes of color data for 64 pixels
 * 24-bpp: 24 bytes of color data, plus 8 bytes of padding
 *         for each scanline, repeated 8 times.
 * 32-bpp: 256 bytes of color/alpha data for 64 pixels.
 * Vertical offset into pattern contained in three lowest bit of source start.
 * Only screen-to-screen pattern fills are supported in hardware, but why
 * should we care where cpssp->bitblt.source_pointer points to?
 */
static void
paste(paste(paste(_cirrus_bitblt_pattern_fill_, ROP_NAME), _), ROP_DEPTH)(struct cpssp *cpssp)
{
        unsigned long color;
        unsigned long dest, dest_line, src, src_line;
        int x, y;

        int pattern_x, pattern_y; /* position in pattern in bytes */
        unsigned char dest_skip;

#ifdef CIRRUS_DEBUG_BITBLITTER
        faum_log(FAUM_LOG_DEBUG, __FUNCTION__, "", "\n");
#endif

        dest_line = cpssp->bitblt.destination_pointer;
        src       = cpssp->bitblt.source_pointer;

#if ROP_DEPTH == 24
        dest_skip = cpssp->bitblt.dest_left_side_clipping & 0x1f;
#else
        dest_skip = cpssp->bitblt.dest_left_side_clipping & 0x07
                * (ROP_DEPTH / 8);
#endif

        pattern_y = cpssp->blt_source_start[0] & 0x07;

        for (y = 0; y < cpssp->bitblt.height; y++) {
                dest = dest_line + dest_skip;
#if ROP_DEPTH == 8
                src_line = src + pattern_y * 8;
#elif ROP_DEPTH == 16
                src_line = src + pattern_y * 16;
#else
                src_line = src + pattern_y * 32;
#endif
                pattern_x = dest_skip;

                for (x = dest_skip;
                     x < cpssp->bitblt.width;
                     x += (ROP_DEPTH / 8)) {
#if ROP_DEPTH == 8
                        color = video_readb(cpssp, src_line + pattern_x);
                        pattern_x = (pattern_x + 1) % 8;
#elif ROP_DEPTH == 16
                        color = video_readw(cpssp, src_line + pattern_x);
                        pattern_x = (pattern_x + 2) % 16;
#elif ROP_DEPTH == 24
                        color = video_readb(cpssp, src_line + pattern_x) |
                                (video_readb(cpssp, src_line + pattern_x + 1) << 8) |
                                (video_readb(cpssp, src_line + pattern_x + 2) << 16);
                        pattern_x = (pattern_x + 3) % 24;
#else
                        color = video_readl(cpssp, src_line + pattern_x);
                        pattern_x = (pattern_x + 4) % 32;

#endif
                        PUT_PIXEL();
                        dest += (ROP_DEPTH / 8);
                }

                dest_line += cpssp->bitblt.dest_pitch;
                pattern_y = (pattern_y + 1) % 8;
        }
}

#undef PUT_PIXEL
