M4RIE  0.20120415
 All Data Structures Files Functions Variables Macros Groups Pages
mzed.h
Go to the documentation of this file.
1 
26 #ifndef M4RIE_MZED_H
27 #define M4RIE_MZED_H
28 
29 /******************************************************************************
30 *
31 * M4RIE: Linear Algebra over GF(2^e)
32 *
33 * Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com>
34 *
35 * Distributed under the terms of the GNU General Public License (GEL)
36 * version 2 or higher.
37 *
38 * This code is distributed in the hope that it will be useful,
39 * but WITHOUT ANY WARRANTY; without even the implied warranty of
40 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41 * General Public License for more details.
42 *
43 * The full text of the GPL is available at:
44 *
45 * http://www.gnu.org/licenses/
46 ******************************************************************************/
47 
48 #include <m4ri/m4ri.h>
49 #include "gf2e.h"
50 #include "m4ri_functions.h"
51 
58 typedef struct {
59  mzd_t *x;
60  const gf2e *finite_field;
61  rci_t nrows;
62  rci_t ncols;
63  wi_t w;
64 } mzed_t;
65 
66 
79 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n);
80 
89 void mzed_free(mzed_t *A);
90 
91 
110 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) {
111  if(C==NULL)
112  C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols);
113  mzd_concat(C->x, A->x, B->x);
114  return C;
115 }
116 
134 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) {
135  if(C==NULL)
136  C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols);
137  mzd_stack(C->x, A->x, B->x);
138  return C;
139 }
140 
141 
156 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
157  if(S==NULL)
158  S = mzed_init(M->finite_field, highr - lowr, highc - lowc);
159 
160  mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w);
161  return S;
162 }
163 
186 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
187  mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t));
188  B->finite_field = A->finite_field;
190  B->nrows = highr - lowr;
191  B->ncols = highc - lowc;
192  B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc);
193  return B;
194 }
195 
204 static inline void mzed_free_window(mzed_t *A) {
205  mzd_free_window(A->x);
206  m4ri_mm_free(A);
207 }
208 
222 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
223 
234 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
235 
246 #define mzed_sub mzed_add
247 
258 #define _mzed_sub _mzed_add
259 
270 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
271 
282 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
283 
294 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
295 
306 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
307 
308 
322 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
323 
337 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
338 
349 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
350 
361 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B);
362 
373 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear);
374 
385 void mzed_randomize(mzed_t *A);
386 
396 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A);
397 
410 void mzed_set_ui(mzed_t *A, word value);
411 
412 
423 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) {
424  return __mzd_read_bits(A->x, row, A->w*col, A->w);
425 }
426 
438 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
439  __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
440 }
441 
453 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
454  __mzd_clear_bits(A->x, row, A->w*col, A->w);
455  __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
456 }
457 
471 static inline int mzed_cmp(mzed_t *A, mzed_t *B) {
472  return mzd_cmp(A->x,B->x);
473 }
474 
475 
483 static inline int mzed_is_zero(const mzed_t *A) {
484  return mzd_is_zero(A->x);
485 }
486 
500 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word *X, rci_t start_col);
501 
514 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) {
515  assert(A->ncols == B->ncols && A->finite_field == B->finite_field);
516  assert(A->x->offset == B->x->offset);
517  assert(start_col < A->ncols);
518 
519  const rci_t start = A->x->offset + A->w*start_col;
520  const wi_t startblock = start/m4ri_radix;
521  const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
522  const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix);
523 
524  word *_a = A->x->rows[ar];
525  const word *_b = B->x->rows[br];
526  wi_t j;
527 
528  if (A->x->width - startblock > 1) {
529  _a[startblock] ^= _b[startblock] & bitmask_begin;
530  for(j=startblock+1; j<A->x->width-1; j++)
531  _a[j] ^= _b[j];
532  _a[j] ^= _b[j] & bitmask_end;
533  } else {
534  _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
535  }
536 }
537 
549 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word *X) {
550  assert(start_col < A->ncols);
551 
552  const rci_t start = A->x->offset + A->w*start_col;
553  const wi_t startblock = start/m4ri_radix;
554  word *_a = A->x->rows[r];
555  const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
556  const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix);
557  register word __a = _a[startblock]>>(start%m4ri_radix);
558  register word __t = 0;
559  int j;
560 
561  if(A->w == 2) {
562  switch( (start/2) % 32 ) {
563  case 0: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2;
564  case 1: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2;
565  case 2: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2;
566  case 3: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2;
567  case 4: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2;
568  case 5: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2;
569  case 6: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2;
570  case 7: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2;
571  case 8: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2;
572  case 9: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2;
573  case 10: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2;
574  case 11: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2;
575  case 12: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2;
576  case 13: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2;
577  case 14: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2;
578  case 15: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2;
579  case 16: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2;
580  case 17: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2;
581  case 18: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2;
582  case 19: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2;
583  case 20: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2;
584  case 21: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2;
585  case 22: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2;
586  case 23: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2;
587  case 24: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2;
588  case 25: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2;
589  case 26: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2;
590  case 27: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2;
591  case 28: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2;
592  case 29: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2;
593  case 30: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2;
594  case 31: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62; break;
595  default: m4ri_die("impossible");
596  }
597  if(A->x->width-startblock == 1) {
598  _a[startblock] &= ~(bitmask_begin & bitmask_end);
599  _a[startblock] ^= __t & bitmask_begin & bitmask_end;
600  return;
601  } else {
602  _a[startblock] &= ~bitmask_begin;
603  _a[startblock] ^= __t & bitmask_begin;
604  }
605 
606  for(j=startblock+1; j<A->x->width -1; j++) {
607  __a = _a[j], __t = 0;
608  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2;
609  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2;
610  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2;
611  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2;
612  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2;
613  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2;
614  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2;
615  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2;
616  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2;
617  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2;
618  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2;
619  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2;
620  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2;
621  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2;
622  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2;
623  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2;
624  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2;
625  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2;
626  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2;
627  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2;
628  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2;
629  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2;
630  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2;
631  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2;
632  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2;
633  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2;
634  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2;
635  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2;
636  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2;
637  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2;
638  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2;
639  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62;
640  _a[j] = __t;
641  }
642 
643  __t = _a[j] & ~bitmask_end;
644  switch((A->x->offset+A->x->ncols) % m4ri_radix) {
645  case 0: __t ^= ((word)X[(int)((_a[j] & 0xC000000000000000ULL)>>62)])<<62;
646  case 62: __t ^= ((word)X[(int)((_a[j] & 0x3000000000000000ULL)>>60)])<<60;
647  case 60: __t ^= ((word)X[(int)((_a[j] & 0x0C00000000000000ULL)>>58)])<<58;
648  case 58: __t ^= ((word)X[(int)((_a[j] & 0x0300000000000000ULL)>>56)])<<56;
649  case 56: __t ^= ((word)X[(int)((_a[j] & 0x00C0000000000000ULL)>>54)])<<54;
650  case 54: __t ^= ((word)X[(int)((_a[j] & 0x0030000000000000ULL)>>52)])<<52;
651  case 52: __t ^= ((word)X[(int)((_a[j] & 0x000C000000000000ULL)>>50)])<<50;
652  case 50: __t ^= ((word)X[(int)((_a[j] & 0x0003000000000000ULL)>>48)])<<48;
653  case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000C00000000000ULL)>>46)])<<46;
654  case 46: __t ^= ((word)X[(int)((_a[j] & 0x0000300000000000ULL)>>44)])<<44;
655  case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000C0000000000ULL)>>42)])<<42;
656  case 42: __t ^= ((word)X[(int)((_a[j] & 0x0000030000000000ULL)>>40)])<<40;
657  case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000C000000000ULL)>>38)])<<38;
658  case 38: __t ^= ((word)X[(int)((_a[j] & 0x0000003000000000ULL)>>36)])<<36;
659  case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000C00000000ULL)>>34)])<<34;
660  case 34: __t ^= ((word)X[(int)((_a[j] & 0x0000000300000000ULL)>>32)])<<32;
661  case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000C0000000ULL)>>30)])<<30;
662  case 30: __t ^= ((word)X[(int)((_a[j] & 0x0000000030000000ULL)>>28)])<<28;
663  case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000C000000ULL)>>26)])<<26;
664  case 26: __t ^= ((word)X[(int)((_a[j] & 0x0000000003000000ULL)>>24)])<<24;
665  case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000C00000ULL)>>22)])<<22;
666  case 22: __t ^= ((word)X[(int)((_a[j] & 0x0000000000300000ULL)>>20)])<<20;
667  case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000C0000ULL)>>18)])<<18;
668  case 18: __t ^= ((word)X[(int)((_a[j] & 0x0000000000030000ULL)>>16)])<<16;
669  case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000C000ULL)>>14)])<<14;
670  case 14: __t ^= ((word)X[(int)((_a[j] & 0x0000000000003000ULL)>>12)])<<12;
671  case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000C00ULL)>>10)])<<10;
672  case 10: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000300ULL)>> 8)])<< 8;
673  case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000C0ULL)>> 6)])<< 6;
674  case 6: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000030ULL)>> 4)])<< 4;
675  case 4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000CULL)>> 2)])<< 2;
676  case 2: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000003ULL)>> 0)])<< 0;
677  };
678  _a[j] = __t;
679 
680  } else if(A->w == 4) {
681  switch( (start/4)%16 ) {
682  case 0: __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4;
683  case 1: __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4;
684  case 2: __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4;
685  case 3: __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4;
686  case 4: __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4;
687  case 5: __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4;
688  case 6: __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4;
689  case 7: __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4;
690  case 8: __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4;
691  case 9: __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4;
692  case 10: __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4;
693  case 11: __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4;
694  case 12: __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4;
695  case 13: __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4;
696  case 14: __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4;
697  case 15: __t ^= (X[((__a)& 0x000000000000000FULL)])<<60; break;
698  default: m4ri_die("impossible");
699  }
700  if(A->x->width-startblock == 1) {
701  _a[startblock] &= ~(bitmask_begin & bitmask_end);
702  _a[startblock] ^= __t & bitmask_begin & bitmask_end;
703  return;
704  } else {
705  _a[startblock] &= ~bitmask_begin;
706  _a[startblock] ^= __t & bitmask_begin;
707  }
708 
709  for(j=startblock+1; j<A->x->width -1; j++) {
710  __a = _a[j], __t = 0;
711  __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4;
712  __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4;
713  __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4;
714  __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4;
715  __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4;
716  __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4;
717  __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4;
718  __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4;
719  __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4;
720  __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4;
721  __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4;
722  __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4;
723  __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4;
724  __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4;
725  __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4;
726  __t ^= (X[((__a)& 0x000000000000000FULL)])<<60;
727  _a[j] = __t;
728  }
729 
730  __t = _a[j] & ~bitmask_end;
731  switch( (A->x->offset + A->x->ncols) % m4ri_radix) {
732  case 0: __t ^= ((word)X[(int)((_a[j] & 0xF000000000000000ULL)>>60)])<<60;
733  case 60: __t ^= ((word)X[(int)((_a[j] & 0x0F00000000000000ULL)>>56)])<<56;
734  case 56: __t ^= ((word)X[(int)((_a[j] & 0x00F0000000000000ULL)>>52)])<<52;
735  case 52: __t ^= ((word)X[(int)((_a[j] & 0x000F000000000000ULL)>>48)])<<48;
736  case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000F00000000000ULL)>>44)])<<44;
737  case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000F0000000000ULL)>>40)])<<40;
738  case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000F000000000ULL)>>36)])<<36;
739  case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000F00000000ULL)>>32)])<<32;
740  case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000F0000000ULL)>>28)])<<28;
741  case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000F000000ULL)>>24)])<<24;
742  case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000F00000ULL)>>20)])<<20;
743  case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000F0000ULL)>>16)])<<16;
744  case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000F000ULL)>>12)])<<12;
745  case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000F00ULL)>> 8)])<< 8;
746  case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000F0ULL)>> 4)])<< 4;
747  case 4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000FULL)>> 0)])<< 0;
748  };
749  _a[j] = __t;
750 
751  } else if (A->w == 8) {
752 
753  register word __a0 = _a[startblock]>>(start%m4ri_radix);
754  register word __a1;
755  register word __t0 = 0;
756  register word __t1;
757 
758  switch( (start/8) %8 ) {
759  case 0: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
760  case 1: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
761  case 2: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
762  case 3: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
763  case 4: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
764  case 5: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
765  case 6: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
766  case 7: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; break;
767  default: m4ri_die("impossible");
768  }
769  if(A->x->width-startblock == 1) {
770  _a[startblock] &= ~(bitmask_begin & bitmask_end);
771  _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
772  return;
773  } else {
774  _a[startblock] &= ~bitmask_begin;
775  _a[startblock] ^= __t0 & bitmask_begin;
776  }
777 
778  for(j=startblock+1; j+2 < A->x->width; j+=2) {
779  __a0 = _a[j], __t0 = 0;
780  __a1 = _a[j+1], __t1 = 0;
781  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
782  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<0; __a1 >>= 8;
783  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
784  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<8; __a1 >>= 8;
785  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
786  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<16; __a1 >>= 8;
787  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
788  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<24; __a1 >>= 8;
789  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
790  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<32; __a1 >>= 8;
791  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
792  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<40; __a1 >>= 8;
793  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
794  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<48; __a1 >>= 8;
795  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; __a0 >>= 8;
796  __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<56;
797  _a[j] = __t0;
798  _a[j+1] = __t1;
799  }
800 
801  for(; j < A->x->width-1; j++) {
802  __a0 = _a[j], __t0 = 0;
803  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8;
804  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8;
805  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
806  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
807  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
808  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
809  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
810  __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56;
811  _a[j] = __t0;
812  }
813 
814  __t = _a[j] & ~bitmask_end;
815  switch( (A->x->offset + A->x->ncols) % m4ri_radix ) {
816  case 0: __t ^= ((word)X[(int)((_a[j] & 0xFF00000000000000ULL)>>56)])<<56;
817  case 56: __t ^= ((word)X[(int)((_a[j] & 0x00FF000000000000ULL)>>48)])<<48;
818  case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FF0000000000ULL)>>40)])<<40;
819  case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000FF00000000ULL)>>32)])<<32;
820  case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FF000000ULL)>>24)])<<24;
821  case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000FF0000ULL)>>16)])<<16;
822  case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FF00ULL)>> 8)])<< 8;
823  case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000FFULL)>> 0)])<< 0;
824  };
825  _a[j] = __t;
826 
827  } else if (A->w == 16) {
828  switch( (start/16) %4 ) {
829  case 0: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
830  case 1: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
831  case 2: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
832  case 3: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; break;
833  default: m4ri_die("impossible");
834  }
835  if(A->x->width-startblock == 1) {
836  _a[startblock] &= ~(bitmask_begin & bitmask_end);
837  _a[startblock] ^= __t & bitmask_begin & bitmask_end;
838  return;
839  } else {
840  _a[startblock] &= ~bitmask_begin;
841  _a[startblock] ^= __t & bitmask_begin;
842  }
843 
844  for(j=startblock+1; j+4<A->x->width; j+=4) {
845  __a = _a[j], __t = 0;
846  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
847  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
848  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
849  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
850  _a[j] = __t;
851 
852  __a = _a[j+1], __t = 0;
853  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
854  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
855  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
856  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
857  _a[j+1] = __t;
858 
859 
860  __a = _a[j+2], __t = 0;
861  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
862  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
863  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
864  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
865  _a[j+2] = __t;
866 
867  __a = _a[j+3], __t = 0;
868  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
869  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
870  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
871  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
872  _a[j+3] = __t;
873  }
874  for( ; j<A->x->width-1; j++) {
875  __a = _a[j], __t = 0;
876  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16;
877  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
878  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
879  __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
880  _a[j] = __t;
881  }
882 
883  __t = _a[j] & ~bitmask_end;
884  switch( (A->x->offset + A->x->ncols) % m4ri_radix) {
885  case 0: __t ^= ((word)X[(int)((_a[j] & 0xFFFF000000000000ULL)>>48)])<<48;
886  case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FFFF00000000ULL)>>32)])<<32;
887  case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FFFF0000ULL)>>16)])<<16;
888  case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FFFFULL)>> 0)])<< 0;
889  };
890  _a[j] = __t;
891 
892  } else {
893  for(rci_t j=start_col; j<A->ncols; j++) {
894  mzed_write_elem(A, r, j, X[mzed_read_elem(A, r, j)]);
895  }
896  }
897 }
898 
909 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) {
910  mzd_row_swap(M->x, rowa, rowb);
911 }
912 
927 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) {
928  mzd_copy_row(B->x, i, A->x, j);
929 }
930 
941 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) {
942  for(rci_t i=0; i<M->w; i++)
943  mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i);
944 }
945 
958 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) {
959  for(unsigned int e=0; e < A->finite_field->degree; e++) {
960  mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row);
961  };
962 }
963 
977 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) {
978  mzd_row_add(M->x, sourcerow, destrow);
979 }
980 
991 static inline rci_t mzed_first_zero_row(mzed_t *A) {
992  return mzd_first_zero_row(A->x);
993 }
994 
995 
1006 static inline void mzed_row_clear_offset(mzed_t *M, const rci_t row, const rci_t coloffset) {
1007  mzd_row_clear_offset(M->x, row, coloffset*M->w);
1008 }
1009 
1023 rci_t mzed_echelonize_naive(mzed_t *A, int full);
1024 
1033 void mzed_print(const mzed_t *M);
1034 
1035 #endif //M4RIE_MATRIX_H