#pragma once
#include <stdint.h>
#include "../lifeconsts.h"
#include "../lifeperm.h"
#include "../eors.h"
namespace b3s23 {

    bool iterate_sse2_32_28(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 32; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 32; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x3ffffffc, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, (%1) \n\t"
        "movups 32(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 16(%1) \n\t"
        "movups 48(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 32(%1) \n\t"
        "movups 64(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 48(%1) \n\t"
        "movups 80(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 64(%1) \n\t"
        "movups 96(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 80(%1) \n\t"
        "movups 112(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 96(%1) \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 112(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 31; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 31; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 30; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x3ffffffc, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 8(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 8(%0) \n\t"
        "movdqa %%xmm8, %%xmm15 \n\t"
        "pxor %%xmm1, %%xmm15 \n\t"
        "movups %%xmm15, (%1) \n\t"
        "movups 32(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 24(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 24(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 48(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 40(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 40(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 64(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 56(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 56(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 80(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 72(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 72(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 96(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 88(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 88(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 112(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 104(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 104(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups %%xmm8, 32(%1) \n\t"
        "movups %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 30; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 30; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_sse2_28_24(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 28; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 28; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x0ffffff0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, (%1) \n\t"
        "movups 32(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 16(%1) \n\t"
        "movups 48(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 32(%1) \n\t"
        "movups 64(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 48(%1) \n\t"
        "movups 80(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 64(%1) \n\t"
        "movups 96(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 80(%1) \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 27; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 27; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 26; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x0ffffff0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 8(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 8(%0) \n\t"
        "movdqa %%xmm8, %%xmm15 \n\t"
        "pxor %%xmm1, %%xmm15 \n\t"
        "movups %%xmm15, (%1) \n\t"
        "movups 32(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 24(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 24(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 48(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 40(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 40(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 64(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 56(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 56(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 80(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 72(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 72(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 96(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 88(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 88(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups %%xmm8, 32(%1) \n\t"
        "movups %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 26; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 26; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_sse2_24_20(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 24; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 24; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x03ffffc0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, (%1) \n\t"
        "movups 32(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 16(%1) \n\t"
        "movups 48(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 32(%1) \n\t"
        "movups 64(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 48(%1) \n\t"
        "movups 80(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 64(%1) \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 80(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 23; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 23; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 22; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x03ffffc0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 8(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 8(%0) \n\t"
        "movdqa %%xmm8, %%xmm15 \n\t"
        "pxor %%xmm1, %%xmm15 \n\t"
        "movups %%xmm15, (%1) \n\t"
        "movups 32(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 24(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 24(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 48(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 40(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 40(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 64(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 56(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 56(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 80(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 72(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 72(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups %%xmm8, 32(%1) \n\t"
        "movups %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 22; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 22; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_sse2_20_16(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 20; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 20; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x00ffff00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, (%1) \n\t"
        "movups 32(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 16(%1) \n\t"
        "movups 48(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 32(%1) \n\t"
        "movups 64(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 48(%1) \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 19; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 19; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 18; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x00ffff00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 8(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 8(%0) \n\t"
        "movdqa %%xmm8, %%xmm15 \n\t"
        "pxor %%xmm1, %%xmm15 \n\t"
        "movups %%xmm15, (%1) \n\t"
        "movups 32(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 24(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 24(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 48(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 40(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 40(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 64(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 56(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 56(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups %%xmm8, 32(%1) \n\t"
        "movups %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 18; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 18; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_sse2_16_12(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 16; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 16; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x003ffc00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, (%1) \n\t"
        "movups 32(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 16(%1) \n\t"
        "movups 48(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 32(%1) \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 48(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 15; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 15; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 14; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x003ffc00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 8(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 8(%0) \n\t"
        "movdqa %%xmm8, %%xmm15 \n\t"
        "pxor %%xmm1, %%xmm15 \n\t"
        "movups %%xmm15, (%1) \n\t"
        "movups 32(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 24(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 24(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups 48(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 40(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 40(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups %%xmm8, 32(%1) \n\t"
        "movups %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 14; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 14; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_sse2_12_8(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 12; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 12; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x000ff000, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%0), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, (%1) \n\t"
        "movups 32(%0), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "movups %%xmm6, 16(%1) \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "movups %%xmm6, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 11; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 11; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 10; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x000ff000, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "pshufd $1, %%xmm13, %%xmm13 \n\t"
        "pshufd $0, %%xmm14, %%xmm14 \n\t"
        "movups (%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "movups 16(%1), %%xmm0 \n\t"
        "movdqa %%xmm0, %%xmm6 \n\t"
        "movdqa %%xmm0, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm2 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm1, %%xmm3 \n\t"
        "pand %%xmm6, %%xmm3 \n\t"
        "movdqa %%xmm2, %%xmm1 \n\t"
        "pand %%xmm0, %%xmm1 \n\t"
        "movdqa %%xmm2, %%xmm4 \n\t"
        "pxor %%xmm0, %%xmm4 \n\t"
        "movdqa %%xmm3, %%xmm5 \n\t"
        "por %%xmm1, %%xmm5 \n\t"
        "pand %%xmm13, %%xmm9 \n\t"
        "pand %%xmm13, %%xmm10 \n\t"
        "pand %%xmm13, %%xmm7 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm2, %%xmm1 \n\t"
        "por %%xmm1, %%xmm9 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm3, %%xmm1 \n\t"
        "por %%xmm1, %%xmm10 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm0, %%xmm1 \n\t"
        "por %%xmm1, %%xmm7 \n\t"
        "shufps $0x39, %%xmm9, %%xmm9 \n\t"
        "shufps $0x39, %%xmm10, %%xmm10 \n\t"
        "movdqa %%xmm11, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm4, %%xmm6 \n\t"
        "movdqa %%xmm12, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm5, %%xmm8 \n\t"
        "shufps $0x39, %%xmm7, %%xmm7 \n\t"
        "pxor %%xmm11, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm11, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm9, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm9, %%xmm7 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm12, %%xmm1 \n\t"
        "por %%xmm12, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm9 \n\t"
        "por %%xmm10, %%xmm9 \n\t"
        "pxor %%xmm10, %%xmm8 \n\t"
        "pxor %%xmm9, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm7, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 8(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 8(%0) \n\t"
        "movdqa %%xmm8, %%xmm15 \n\t"
        "pxor %%xmm1, %%xmm15 \n\t"
        "movups %%xmm15, (%1) \n\t"
        "movups 32(%1), %%xmm7 \n\t"
        "movdqa %%xmm7, %%xmm6 \n\t"
        "movdqa %%xmm7, %%xmm1 \n\t"
        "psrld $1, %%xmm6 \n\t"
        "pslld $1, %%xmm1 \n\t"
        "movdqa %%xmm1, %%xmm9 \n\t"
        "pxor %%xmm6, %%xmm9 \n\t"
        "movdqa %%xmm1, %%xmm10 \n\t"
        "pand %%xmm6, %%xmm10 \n\t"
        "movdqa %%xmm9, %%xmm1 \n\t"
        "pand %%xmm7, %%xmm1 \n\t"
        "movdqa %%xmm9, %%xmm11 \n\t"
        "pxor %%xmm7, %%xmm11 \n\t"
        "movdqa %%xmm10, %%xmm12 \n\t"
        "por %%xmm1, %%xmm12 \n\t"
        "pand %%xmm13, %%xmm2 \n\t"
        "pand %%xmm13, %%xmm3 \n\t"
        "pand %%xmm13, %%xmm0 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm9, %%xmm1 \n\t"
        "por %%xmm1, %%xmm2 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm10, %%xmm1 \n\t"
        "por %%xmm1, %%xmm3 \n\t"
        "movdqa %%xmm13, %%xmm1 \n\t"
        "pandn %%xmm7, %%xmm1 \n\t"
        "por %%xmm1, %%xmm0 \n\t"
        "shufps $0x39, %%xmm2, %%xmm2 \n\t"
        "shufps $0x39, %%xmm3, %%xmm3 \n\t"
        "movdqa %%xmm4, %%xmm6 \n\t"
        "shufps $0x4e, %%xmm11, %%xmm6 \n\t"
        "movdqa %%xmm5, %%xmm8 \n\t"
        "shufps $0x4e, %%xmm12, %%xmm8 \n\t"
        "shufps $0x39, %%xmm0, %%xmm0 \n\t"
        "pxor %%xmm4, %%xmm6 \n\t"
        "pxor %%xmm6, %%xmm2 \n\t"
        "movdqa %%xmm4, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "pand %%xmm2, %%xmm6 \n\t"
        "pandn %%xmm1, %%xmm6 \n\t"
        "por %%xmm2, %%xmm0 \n\t"
        "movdqa %%xmm6, %%xmm1 \n\t"
        "pxor %%xmm5, %%xmm1 \n\t"
        "por %%xmm5, %%xmm6 \n\t"
        "movdqa %%xmm8, %%xmm2 \n\t"
        "por %%xmm3, %%xmm2 \n\t"
        "pxor %%xmm3, %%xmm8 \n\t"
        "pxor %%xmm2, %%xmm6 \n\t"
        "pxor %%xmm8, %%xmm1 \n\t"
        "pand %%xmm1, %%xmm6 \n\t"
        "pand %%xmm0, %%xmm6 \n\t"
        "pand %%xmm14, %%xmm6 \n\t"
        "movups 24(%0), %%xmm8 \n\t"
        "movdqa %%xmm14, %%xmm1 \n\t"
        "pandn %%xmm8, %%xmm1 \n\t"
        "por %%xmm6, %%xmm1 \n\t"
        "movups %%xmm1, 24(%0) \n\t"
        "pxor %%xmm1, %%xmm8 \n\t"
        "por %%xmm8, %%xmm15 \n\t"
        "movups %%xmm8, 32(%1) \n\t"
        "movups %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 10; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 10; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    int iterate_var_sse2(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h, uint32_t * __restrict__ j) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_sse2_32_28(d, e, h, j, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_sse2_28_24(d+2, e+2, h+2, j+2, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_sse2_24_20(d+4, e+4, h+4, j+4, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_sse2_20_16(d+6, e+6, h+6, j+6, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_sse2(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_sse2_32_28(d, e, h, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_sse2_28_24(d+2, e+2, h+2, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_sse2_24_20(d+4, e+4, h+4, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_sse2_20_16(d+6, e+6, h+6, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_sse2(int n, uint32_t * __restrict__ d) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_sse2_32_28(d, e, 0, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_sse2_28_24(d+2, e+2, 0, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_sse2_24_20(d+4, e+4, 0, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_sse2_20_16(d+6, e+6, 0, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

    bool iterate_avx_32_28(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 32; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 32; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x3ffffffc, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, (%1) \n\t"
        "vmovdqu 32(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 16(%1) \n\t"
        "vmovdqu 48(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
        "vmovdqu 64(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 48(%1) \n\t"
        "vmovdqu 80(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 64(%1) \n\t"
        "vmovdqu 96(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 80(%1) \n\t"
        "vmovdqu 112(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 96(%1) \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 112(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 31; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 31; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 30; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x3ffffffc, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 8(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 8(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm15 \n\t"
        "vmovdqu %%xmm15, (%1) \n\t"
        "vmovdqu 32(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 24(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 24(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 48(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 40(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 40(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 64(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 56(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 56(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 80(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 72(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 72(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 96(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 88(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 88(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 112(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 104(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 104(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu %%xmm8, 32(%1) \n\t"
        "vmovdqu %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 30; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 30; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx_28_24(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 28; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 28; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x0ffffff0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, (%1) \n\t"
        "vmovdqu 32(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 16(%1) \n\t"
        "vmovdqu 48(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
        "vmovdqu 64(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 48(%1) \n\t"
        "vmovdqu 80(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 64(%1) \n\t"
        "vmovdqu 96(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 80(%1) \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 27; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 27; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 26; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x0ffffff0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 8(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 8(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm15 \n\t"
        "vmovdqu %%xmm15, (%1) \n\t"
        "vmovdqu 32(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 24(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 24(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 48(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 40(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 40(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 64(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 56(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 56(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 80(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 72(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 72(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 96(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 88(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 88(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu %%xmm8, 32(%1) \n\t"
        "vmovdqu %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 26; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 26; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx_24_20(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 24; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 24; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x03ffffc0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, (%1) \n\t"
        "vmovdqu 32(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 16(%1) \n\t"
        "vmovdqu 48(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
        "vmovdqu 64(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 48(%1) \n\t"
        "vmovdqu 80(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 64(%1) \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 80(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 23; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 23; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 22; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x03ffffc0, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 8(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 8(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm15 \n\t"
        "vmovdqu %%xmm15, (%1) \n\t"
        "vmovdqu 32(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 24(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 24(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 48(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 40(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 40(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 64(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 56(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 56(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 80(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 72(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 72(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu %%xmm8, 32(%1) \n\t"
        "vmovdqu %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 22; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 22; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx_20_16(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 20; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 20; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x00ffff00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, (%1) \n\t"
        "vmovdqu 32(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 16(%1) \n\t"
        "vmovdqu 48(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
        "vmovdqu 64(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 48(%1) \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 19; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 19; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 18; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x00ffff00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 8(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 8(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm15 \n\t"
        "vmovdqu %%xmm15, (%1) \n\t"
        "vmovdqu 32(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 24(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 24(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 48(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 40(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 40(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 64(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 56(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 56(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu %%xmm8, 32(%1) \n\t"
        "vmovdqu %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 18; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 18; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx_16_12(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 16; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 16; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x003ffc00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, (%1) \n\t"
        "vmovdqu 32(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 16(%1) \n\t"
        "vmovdqu 48(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 48(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 15; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 15; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 14; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x003ffc00, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 8(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 8(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm15 \n\t"
        "vmovdqu %%xmm15, (%1) \n\t"
        "vmovdqu 32(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 24(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 24(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu 48(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 40(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 40(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu %%xmm8, 32(%1) \n\t"
        "vmovdqu %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 14; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 14; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx_12_8(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 12; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 12; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x000ff000, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, (%1) \n\t"
        "vmovdqu 32(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 16(%1) \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 11; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 11; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 10; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "mov $0xffffffff, %%ebx \n\t"
        "movd %%ebx, %%xmm13 \n\t"
        "mov $0x000ff000, %%ebx \n\t"
        "movd %%ebx, %%xmm14 \n\t"
        "vpshufd $1, %%xmm13, %%xmm13 \n\t"
        "vpshufd $0, %%xmm14, %%xmm14 \n\t"
        "vmovdqu (%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vmovdqu 16(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm2 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm3 \n\t"
        "vpand %%xmm0, %%xmm2, %%xmm1 \n\t"
        "vpxor %%xmm0, %%xmm2, %%xmm4 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm5 \n\t"
        "vpand %%xmm13, %%xmm9, %%xmm9 \n\t"
        "vpand %%xmm13, %%xmm10, %%xmm10 \n\t"
        "vpand %%xmm13, %%xmm7, %%xmm7 \n\t"
        "vpandn %%xmm2, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm9, %%xmm9 \n\t"
        "vpandn %%xmm3, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm10 \n\t"
        "vpandn %%xmm0, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm7, %%xmm7 \n\t"
        "vshufps $0x39, %%xmm9, %%xmm9, %%xmm9 \n\t"
        "vshufps $0x39, %%xmm10, %%xmm10, %%xmm10 \n\t"
        "vshufps $0x4e, %%xmm4, %%xmm11, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm5, %%xmm12, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm7, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm11, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm9, %%xmm9 \n\t"
        "vpor %%xmm6, %%xmm11, %%xmm1 \n\t"
        "vpand %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm9, %%xmm7, %%xmm7 \n\t"
        "vpxor %%xmm12, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm12, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm10, %%xmm8, %%xmm9 \n\t"
        "vpxor %%xmm10, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm9, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm7, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 8(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 8(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm15 \n\t"
        "vmovdqu %%xmm15, (%1) \n\t"
        "vmovdqu 32(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%xmm6, %%xmm1, %%xmm9 \n\t"
        "vpand %%xmm6, %%xmm1, %%xmm10 \n\t"
        "vpand %%xmm7, %%xmm9, %%xmm1 \n\t"
        "vpxor %%xmm7, %%xmm9, %%xmm11 \n\t"
        "vpor %%xmm1, %%xmm10, %%xmm12 \n\t"
        "vpand %%xmm13, %%xmm2, %%xmm2 \n\t"
        "vpand %%xmm13, %%xmm3, %%xmm3 \n\t"
        "vpand %%xmm13, %%xmm0, %%xmm0 \n\t"
        "vpandn %%xmm9, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm2, %%xmm2 \n\t"
        "vpandn %%xmm10, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm3, %%xmm3 \n\t"
        "vpandn %%xmm7, %%xmm13, %%xmm1 \n\t"
        "vpor %%xmm1, %%xmm0, %%xmm0 \n\t"
        "vshufps $0x39, %%xmm2, %%xmm2, %%xmm2 \n\t"
        "vshufps $0x39, %%xmm3, %%xmm3, %%xmm3 \n\t"
        "vshufps $0x4e, %%xmm11, %%xmm4, %%xmm6 \n\t"
        "vshufps $0x4e, %%xmm12, %%xmm5, %%xmm8 \n\t"
        "vshufps $0x39, %%xmm0, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm4, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm6, %%xmm2, %%xmm2 \n\t"
        "vpor %%xmm6, %%xmm4, %%xmm1 \n\t"
        "vpand %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpandn %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm2, %%xmm0, %%xmm0 \n\t"
        "vpxor %%xmm5, %%xmm6, %%xmm1 \n\t"
        "vpor %%xmm5, %%xmm6, %%xmm6 \n\t"
        "vpor %%xmm3, %%xmm8, %%xmm2 \n\t"
        "vpxor %%xmm3, %%xmm8, %%xmm8 \n\t"
        "vpxor %%xmm2, %%xmm6, %%xmm6 \n\t"
        "vpxor %%xmm8, %%xmm1, %%xmm1 \n\t"
        "vpand %%xmm1, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm0, %%xmm6, %%xmm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 24(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 24(%0) \n\t"
        "vpxor %%xmm1, %%xmm8, %%xmm8 \n\t"
        "vpor %%xmm8, %%xmm15, %%xmm15 \n\t"
        "vmovdqu %%xmm8, 32(%1) \n\t"
        "vmovdqu %%xmm15, 16(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 10; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 10; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    int iterate_var_avx(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h, uint32_t * __restrict__ j) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx_32_28(d, e, h, j, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx_28_24(d+2, e+2, h+2, j+2, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx_24_20(d+4, e+4, h+4, j+4, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx_20_16(d+6, e+6, h+6, j+6, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_avx(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx_32_28(d, e, h, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx_28_24(d+2, e+2, h+2, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx_24_20(d+4, e+4, h+4, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx_20_16(d+6, e+6, h+6, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_avx(int n, uint32_t * __restrict__ d) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx_32_28(d, e, 0, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx_28_24(d+2, e+2, 0, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx_24_20(d+4, e+4, 0, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx_20_16(d+6, e+6, 0, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

    bool iterate_avx2_32_28(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 32; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 32; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%0), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vmovdqu 64(%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 32(%1) \n\t"
        "vmovdqu 96(%0), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 64(%1) \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 31; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 31; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 30; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%1), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 8(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm15 \n\t"
        "vmovdqu %%ymm15, (%1) \n\t"
        "vmovdqu 64(%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 40(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 40(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu 96(%1), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 72(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 72(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 104(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 104(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 30; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 30; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[18] | e[19];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx2_28_24(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 28; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 28; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%0), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vmovdqu 64(%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 32(%1) \n\t"
        "vmovdqu 96(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 64(%1) \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%xmm6, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 27; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 27; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 26; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%1), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 8(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm15 \n\t"
        "vmovdqu %%ymm15, (%1) \n\t"
        "vmovdqu 64(%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 40(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 40(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu 96(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 72(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 72(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 26; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 26; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[22] | e[23];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx2_24_20(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 24; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 24; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%0), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vmovdqu 64(%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 32(%1) \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 23; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 23; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 22; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%1), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 8(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm15 \n\t"
        "vmovdqu %%ymm15, (%1) \n\t"
        "vmovdqu 64(%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 40(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 40(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 72(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 72(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 22; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 22; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[18] | e[19];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx2_20_16(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 20; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 20; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%0), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vmovdqu 64(%0), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 32(%1) \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%xmm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 19; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 19; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 18; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%1), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 8(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm15 \n\t"
        "vmovdqu %%ymm15, (%1) \n\t"
        "vmovdqu 64(%1), %%xmm7 \n\t"
        "vpsrld $1, %%xmm7, %%xmm6 \n\t"
        "vpslld $1, %%xmm7, %%xmm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 40(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 40(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 18; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 18; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[22] | e[23];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx2_16_12(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 16; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 16; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%0), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 15; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 15; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 14; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%1), %%ymm0 \n\t"
        "vpsrld $1, %%ymm0, %%ymm6 \n\t"
        "vpslld $1, %%ymm0, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 8(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm15 \n\t"
        "vmovdqu %%ymm15, (%1) \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vpand %%xmm14, %%xmm6, %%xmm6 \n\t"
        "vmovdqu 40(%0), %%xmm8 \n\t"
        "vpandn %%xmm8, %%xmm14, %%xmm1 \n\t"
        "vpor %%xmm6, %%xmm1, %%xmm1 \n\t"
        "vmovdqu %%xmm1, 40(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm8 \n\t"
        "vpor %%ymm8, %%ymm15, %%ymm15 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 14; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 14; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[18] | e[19];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx2_12_8(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 12; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 12; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%0), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vpblendd $1, %%ymm9, %%ymm2, %%ymm2 \n\t"
        "vpblendd $1, %%ymm10, %%ymm3, %%ymm3 \n\t"
        "vpblendd $3, %%ymm11, %%ymm4, %%ymm6 \n\t"
        "vpblendd $3, %%ymm12, %%ymm5, %%ymm8 \n\t"
        "vpblendd $1, %%ymm7, %%ymm0, %%ymm0 \n\t"
        "vpermd %%ymm2, %%ymm13, %%ymm2 \n\t"
        "vpermd %%ymm3, %%ymm13, %%ymm3 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm0, %%ymm13, %%ymm0 \n\t"
        "vpxor %%ymm4, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm2, %%ymm2 \n\t"
        "vpor %%ymm6, %%ymm4, %%ymm1 \n\t"
        "vpand %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm2, %%ymm0, %%ymm0 \n\t"
        "vpxor %%ymm5, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm5, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm3, %%ymm8, %%ymm2 \n\t"
        "vpxor %%ymm3, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm2, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm0, %%ymm6, %%ymm6 \n\t"
        "vmovdqu %%xmm6, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 1; i < 11; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 11; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 10; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu (%2), %%ymm14 \n\t"
        "vmovdqu 192(%2), %%ymm13 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vpsrld $1, %%ymm7, %%ymm6 \n\t"
        "vpslld $1, %%ymm7, %%ymm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm9 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm10 \n\t"
        "vpand %%ymm7, %%ymm9, %%ymm1 \n\t"
        "vpxor %%ymm7, %%ymm9, %%ymm11 \n\t"
        "vpor %%ymm1, %%ymm10, %%ymm12 \n\t"
        "vmovdqu 32(%1), %%xmm0 \n\t"
        "vpsrld $1, %%xmm0, %%xmm6 \n\t"
        "vpslld $1, %%xmm0, %%xmm1 \n\t"
        "vpxor %%ymm6, %%ymm1, %%ymm2 \n\t"
        "vpand %%ymm6, %%ymm1, %%ymm3 \n\t"
        "vpand %%ymm0, %%ymm2, %%ymm1 \n\t"
        "vpxor %%ymm0, %%ymm2, %%ymm4 \n\t"
        "vpor %%ymm1, %%ymm3, %%ymm5 \n\t"
        "vpblendd $1, %%ymm2, %%ymm9, %%ymm9 \n\t"
        "vpblendd $1, %%ymm3, %%ymm10, %%ymm10 \n\t"
        "vpblendd $3, %%ymm4, %%ymm11, %%ymm6 \n\t"
        "vpblendd $3, %%ymm5, %%ymm12, %%ymm8 \n\t"
        "vpblendd $1, %%ymm0, %%ymm7, %%ymm7 \n\t"
        "vpermd %%ymm9, %%ymm13, %%ymm9 \n\t"
        "vpermd %%ymm10, %%ymm13, %%ymm10 \n\t"
        "vpermq $57, %%ymm6, %%ymm6 \n\t"
        "vpermq $57, %%ymm8, %%ymm8 \n\t"
        "vpermd %%ymm7, %%ymm13, %%ymm7 \n\t"
        "vpxor %%ymm11, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm6, %%ymm9, %%ymm9 \n\t"
        "vpor %%ymm6, %%ymm11, %%ymm1 \n\t"
        "vpand %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpandn %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm9, %%ymm7, %%ymm7 \n\t"
        "vpxor %%ymm12, %%ymm6, %%ymm1 \n\t"
        "vpor %%ymm12, %%ymm6, %%ymm6 \n\t"
        "vpor %%ymm10, %%ymm8, %%ymm9 \n\t"
        "vpxor %%ymm10, %%ymm8, %%ymm8 \n\t"
        "vpxor %%ymm9, %%ymm6, %%ymm6 \n\t"
        "vpxor %%ymm8, %%ymm1, %%ymm1 \n\t"
        "vpand %%ymm1, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm7, %%ymm6, %%ymm6 \n\t"
        "vpand %%ymm14, %%ymm6, %%ymm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpandn %%ymm8, %%ymm14, %%ymm1 \n\t"
        "vpor %%ymm6, %%ymm1, %%ymm1 \n\t"
        "vmovdqu %%ymm1, 8(%0) \n\t"
        "vpxor %%ymm1, %%ymm8, %%ymm15 \n\t"
        "vmovdqu %%ymm15, (%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory");

        if (h) {
            for (int i = 2; i < 10; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 10; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[22] | e[23];
        }
        return (bigdiff == 0);
    }

    int iterate_var_avx2(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h, uint32_t * __restrict__ j) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx2_32_28(d, e, h, j, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx2_28_24(d+2, e+2, h+2, j+2, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx2_24_20(d+4, e+4, h+4, j+4, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx2_20_16(d+6, e+6, h+6, j+6, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_avx2(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx2_32_28(d, e, h, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx2_28_24(d+2, e+2, h+2, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx2_24_20(d+4, e+4, h+4, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx2_20_16(d+6, e+6, h+6, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_avx2(int n, uint32_t * __restrict__ d) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx2_32_28(d, e, 0, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx2_28_24(d+2, e+2, 0, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx2_24_20(d+4, e+4, 0, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx2_20_16(d+6, e+6, 0, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }



#ifdef __AVX512F__

    bool iterate_avx512_48_28(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 48; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 48; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu64 64(%0), %%zmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, (%1) \n\t"
        "vmovdqu64 128(%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 64(%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 128(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 47; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 47; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 46; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu64 64(%1), %%zmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 8(%0), %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 8(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
        "vmovdqu64 128(%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 72(%0), %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 72(%0) \n\t"
        "vpternlogd $246, %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu 136(%0), %%ymm8 \n\t"
        "vmovdqu 168(%0), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm8, %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vshufi32x4 $78, %%zmm6, %%zmm6, %%zmm13 \n\t"
        "vmovdqu %%ymm6, 136(%0) \n\t"
        "vmovdqu %%xmm13, 168(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm8 \n\t"
        "vshufi32x4 $78, %%zmm15, %%zmm15, %%zmm13 \n\t"
        "vpord %%zmm13, %%zmm15, %%zmm15 \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
        "vshufi32x4 $78, %%zmm8, %%zmm8, %%zmm13 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%xmm13, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 46; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 46; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7] | e64[8] | e64[9] | e64[10] | e64[11] | e64[12] | e64[13];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[26] | e[27];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx512_32_28(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 32; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 32; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu64 64(%0), %%zmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 31; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 31; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 30; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu64 64(%1), %%zmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 8(%0), %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 8(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu 72(%0), %%ymm8 \n\t"
        "vmovdqu 104(%0), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm8, %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vshufi32x4 $78, %%zmm6, %%zmm6, %%zmm13 \n\t"
        "vmovdqu %%ymm6, 72(%0) \n\t"
        "vmovdqu %%xmm13, 104(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm8 \n\t"
        "vshufi32x4 $78, %%zmm15, %%zmm15, %%zmm13 \n\t"
        "vpord %%zmm13, %%zmm15, %%zmm15 \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
        "vshufi32x4 $78, %%zmm8, %%zmm8, %%zmm13 \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
        "vmovdqu %%xmm13, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen28)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 30; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 30; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7] | e64[8] | e64[9] | e64[10] | e64[11] | e64[12] | e64[13];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[26] | e[27];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx512_28_24(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 28; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 28; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu 64(%0), %%ymm0 \n\t"
        "vmovdqu 96(%0), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm0, %%zmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vshufi32x4 $78, %%zmm6, %%zmm6, %%zmm13 \n\t"
        "vmovdqu %%ymm6, 64(%1) \n\t"
        "vmovdqu %%xmm13, 96(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 27; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 27; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 26; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu 64(%1), %%ymm0 \n\t"
        "vmovdqu 96(%1), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm0, %%zmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 8(%0), %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 8(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu 72(%0), %%ymm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu %%ymm6, 72(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm8 \n\t"
        "vshufi32x4 $78, %%zmm15, %%zmm15, %%zmm13 \n\t"
        "vpord %%zmm13, %%zmm15, %%zmm15 \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
        "vmovdqu %%ymm8, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen24)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 26; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 26; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7] | e64[8] | e64[9] | e64[10] | e64[11];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[22] | e[23];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx512_24_20(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 24; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 24; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu 64(%0), %%ymm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu %%ymm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 23; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 23; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 22; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu 64(%1), %%ymm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 8(%0), %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 8(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu 72(%0), %%xmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu %%xmm6, 72(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm8 \n\t"
        "vshufi32x4 $78, %%zmm15, %%zmm15, %%zmm13 \n\t"
        "vpord %%zmm13, %%zmm15, %%zmm15 \n\t"
        "vmovdqu %%ymm15, 32(%1) \n\t"
        "vmovdqu %%xmm8, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen20)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 22; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 22; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[4] | e64[5] | e64[6] | e64[7] | e64[8] | e64[9];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[18] | e[19];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx512_20_16(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 20; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 20; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu 64(%0), %%xmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, (%1) \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm9, %%zmm2, %%zmm18 \n\t"
        "vpermi2d %%zmm11, %%zmm4, %%zmm6 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm8 \n\t"
        "vpermi2d %%zmm7, %%zmm0, %%zmm20 \n\t"
        "vpermi2d %%zmm12, %%zmm5, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm4, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm4 \n\t"
        "vpternlogd $166, %%zmm4, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm5, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm4, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu %%xmm6, 64(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 19; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 19; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 18; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqu 64(%1), %%xmm0 \n\t"
        "vpsrld $1, %%zmm0, %%zmm6 \n\t"
        "vpslld $1, %%zmm0, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm2 \n\t"
        "vmovdqa32 %%zmm0, %%zmm5 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm5 \n\t"
        "vpxord %%zmm0, %%zmm2, %%zmm4 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 8(%0), %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, 8(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen16)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 18; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 18; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[0] | e64[1] | e64[2] | e64[3] | e64[4] | e64[5] | e64[6] | e64[7];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[14] | e[15];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx512_16_12(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 16; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 16; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%0), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu64 %%zmm6, (%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 15; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 15; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 14; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu64 (%1), %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vmovdqu 40(%0), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm8, %%zmm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vshufi32x4 $78, %%zmm6, %%zmm6, %%zmm13 \n\t"
        "vmovdqu %%ymm6, 8(%0) \n\t"
        "vmovdqu %%xmm13, 40(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen12)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 14; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 14; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[0] | e64[1] | e64[2] | e64[3] | e64[4] | e64[5];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[10] | e[11];
        }
        return (bigdiff == 0);
    }

    bool iterate_avx512_12_8(uint32_t * __restrict__ d, uint32_t * __restrict__ e, uint32_t * __restrict__ h, uint32_t * __restrict__ j, uint32_t * __restrict__ diffs, bool onegen) {
        if (h) {
            for (int i = 0; i < 12; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 0; i < 12; i++) {
                j[i] &= d[i];
            }
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu (%0), %%ymm7 \n\t"
        "vmovdqu 32(%0), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm7, %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vshufi32x4 $78, %%zmm6, %%zmm6, %%zmm13 \n\t"
        "vmovdqu %%ymm6, (%1) \n\t"
        "vmovdqu %%xmm13, 32(%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 1; i < 11; i++) {
                h[i] |= e[i-1];
            }
        }
        if (j) {
            for (int i = 1; i < 11; i++) {
                j[i] &= e[i-1];
            }
        }
        if (onegen) {
            for (int i = 2; i < 10; i++) {
                d[i] = e[i-1];
            }
            return false;
        }
        asm (
        "vmovdqu64 (%2), %%zmm14 \n\t"
        "vmovdqu64 64(%2), %%zmm16 \n\t"
        "vmovdqu64 128(%2), %%zmm17 \n\t"
        "vmovdqu (%1), %%ymm7 \n\t"
        "vmovdqu 32(%1), %%xmm13 \n\t"
        "vshufi32x4 $68, %%zmm13, %%zmm7, %%zmm7 \n\t"
        "vpsrld $1, %%zmm7, %%zmm6 \n\t"
        "vpslld $1, %%zmm7, %%zmm1 \n\t"
        "vpxord %%zmm6, %%zmm1, %%zmm9 \n\t"
        "vmovdqa32 %%zmm7, %%zmm12 \n\t"
        "vpternlogd $232, %%zmm6, %%zmm1, %%zmm12 \n\t"
        "vpxord %%zmm7, %%zmm9, %%zmm11 \n\t"
        "vmovdqa64 %%zmm16, %%zmm18 \n\t"
        "vmovdqa64 %%zmm17, %%zmm6 \n\t"
        "vmovdqa64 %%zmm17, %%zmm8 \n\t"
        "vmovdqa64 %%zmm16, %%zmm20 \n\t"
        "vmovdqa64 %%zmm16, %%zmm19 \n\t"
        "vpermi2d %%zmm2, %%zmm9, %%zmm18 \n\t"
        "vpermi2d %%zmm4, %%zmm11, %%zmm6 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm8 \n\t"
        "vpermi2d %%zmm0, %%zmm7, %%zmm20 \n\t"
        "vpermi2d %%zmm5, %%zmm12, %%zmm19 \n\t"
        "vpternlogd $150, %%zmm18, %%zmm11, %%zmm6 \n\t"
        "vpternlogd $178, %%zmm18, %%zmm6, %%zmm11 \n\t"
        "vpternlogd $166, %%zmm11, %%zmm19, %%zmm18 \n\t"
        "vpternlogd $22, %%zmm18, %%zmm12, %%zmm8 \n\t"
        "vpternlogd $162, %%zmm8, %%zmm11, %%zmm18 \n\t"
        "vpternlogd $168, %%zmm18, %%zmm20, %%zmm6 \n\t"
        "vmovdqu 8(%0), %%ymm8 \n\t"
        "vpternlogd $228, %%zmm14, %%zmm8, %%zmm6 \n\t"
        "vmovdqu %%ymm6, 8(%0) \n\t"
        "vpxord %%zmm6, %%zmm8, %%zmm15 \n\t"
        "vmovdqu64 %%zmm15, (%1) \n\t"
                : /* no output operands */ 
                : "r" (d), "r" (e), "r" (apg::__sixteen8)
                : "ebx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", 
                    "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", 
                    "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", 
                    "xmm17", "xmm18", "xmm19", "xmm20", "memory");

        if (h) {
            for (int i = 2; i < 10; i++) {
                h[i] |= d[i];
            }
        }
        if (j) {
            for (int i = 2; i < 10; i++) {
                j[i] &= d[i];
            }
        }
        uint64_t* e64 = ((uint64_t*) e);
        uint64_t bigdiff = e64[0] | e64[1] | e64[2] | e64[3];
        if (diffs != 0) {
            diffs[0] = (bigdiff | (bigdiff >> 32));
            diffs[1] = e[0] | e[1];
            diffs[2] = e[6] | e[7];
        }
        return (bigdiff == 0);
    }

    int iterate_var_avx512(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h, uint32_t * __restrict__ j) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx512_32_28(d, e, h, j, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx512_28_24(d+2, e+2, h+2, j+2, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx512_24_20(d+4, e+4, h+4, j+4, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx512_20_16(d+6, e+6, h+6, j+6, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_avx512(int n, uint32_t * __restrict__ d, uint32_t * __restrict__ h) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx512_32_28(d, e, h, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx512_28_24(d+2, e+2, h+2, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx512_24_20(d+4, e+4, h+4, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx512_20_16(d+6, e+6, h+6, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

    int iterate_var_avx512(int n, uint32_t * __restrict__ d) {
        uint32_t e[32];
        if (n >= 7) { if (iterate_avx512_32_28(d, e, 0, 0, 0, (n == 7))) {return 8;} }
        if (n >= 5) { if (iterate_avx512_28_24(d+2, e+2, 0, 0, 0, (n == 5))) {return 6;} }
        if (n >= 3) { if (iterate_avx512_24_20(d+4, e+4, 0, 0, 0, (n == 3))) {return 4;} }
        if (n >= 1) { if (iterate_avx512_20_16(d+6, e+6, 0, 0, 0, (n == 1))) {return 2;} }
        return 0;
    }

#include "../leaf_iterators_avx512.h"
#else
#include "../leaf_iterators.h"
#endif

}
