1use super::Block;
2
3const MOD: u64 = 0b10000111; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
7cpufeatures::new!(target_feature_pclmulqdq, "pclmulqdq");
8
9impl Block {
10 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
14 #[inline]
15 pub fn clmul(&self, rhs: &Self) -> (Self, Self) {
16 if target_feature_pclmulqdq::get() {
17 unsafe {
19 let (low, high) = clmul::clmul128(self.into(), rhs.into());
20 (low.into(), high.into())
21 }
22 } else {
23 let (low, high) = scalar::clmul128(self.into(), rhs.into());
24 (low.into(), high.into())
25 }
26 }
27
28 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
32 #[inline]
33 pub fn clmul(&self, rhs: &Self) -> (Self, Self) {
34 let (low, high) = scalar::clmul128(self.into(), rhs.into());
35 (low.into(), high.into())
36 }
37
38 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
42 #[inline]
43 pub fn gf_mul(&self, rhs: &Self) -> Self {
44 if target_feature_pclmulqdq::get() {
45 unsafe { clmul::gf128_mul(self.into(), rhs.into()).into() }
47 } else {
48 scalar::gf128_mul(self.into(), rhs.into()).into()
49 }
50 }
51
52 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
56 #[inline]
57 pub fn gf_mul(&self, rhs: &Self) -> Self {
58 scalar::gf128_mul(self.into(), rhs.into()).into()
59 }
60
61 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
63 #[inline]
64 pub fn gf_reduce(low: &Self, high: &Self) -> Self {
65 if target_feature_pclmulqdq::get() {
66 unsafe { clmul::gf128_reduce(low.into(), high.into()).into() }
68 } else {
69 scalar::gf128_reduce(low.into(), high.into()).into()
70 }
71 }
72
73 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
75 #[inline]
76 pub fn gf_reduce(low: &Self, high: &Self) -> Self {
77 scalar::gf128_reduce(low.into(), high.into()).into()
78 }
79
80 #[inline]
81 pub fn gf_pow(&self, mut exp: u64) -> Block {
82 let mut s = Block::ONE;
83 let mut pow2 = *self;
84
85 while exp != 0 {
86 if exp & 1 != 0 {
87 s = s.gf_mul(&pow2);
88 }
89 pow2 = pow2.gf_mul(&pow2);
90 exp >>= 1;
91 }
92 s
93 }
94}
95
96#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
97mod clmul {
98 #[cfg(target_arch = "x86")]
99 use std::arch::x86::*;
100 #[cfg(target_arch = "x86_64")]
101 use std::arch::x86_64::*;
102
103 use super::MOD;
104
105 #[target_feature(enable = "pclmulqdq")]
106 #[inline]
107 pub unsafe fn gf128_mul(a: __m128i, b: __m128i) -> __m128i {
108 unsafe {
109 let (low, high) = clmul128(a, b);
110 gf128_reduce(low, high)
111 }
112 }
113
114 #[target_feature(enable = "pclmulqdq")]
118 #[inline]
119 pub unsafe fn clmul128(a: __m128i, b: __m128i) -> (__m128i, __m128i) {
120 #[allow(unused_unsafe)]
125 unsafe {
126 let ab_low = _mm_clmulepi64_si128::<0x00>(a, b);
129 let ab_high = _mm_clmulepi64_si128::<0x11>(a, b);
130 let ab_lohi1 = _mm_clmulepi64_si128::<0x01>(a, b);
131 let ab_lohi2 = _mm_clmulepi64_si128::<0x10>(a, b);
132 let ab_mid = _mm_xor_si128(ab_lohi1, ab_lohi2);
133 let low = _mm_xor_si128(ab_low, _mm_slli_si128::<8>(ab_mid));
134 let high = _mm_xor_si128(ab_high, _mm_srli_si128::<8>(ab_mid));
135 (low, high)
136 }
137 }
138
139 #[target_feature(enable = "pclmulqdq")]
140 #[inline]
141 pub unsafe fn gf128_reduce(mut low: __m128i, mut high: __m128i) -> __m128i {
142 unsafe {
145 let modulus = [MOD, 0];
146 let modulus = _mm_loadu_si64(modulus.as_ptr().cast());
147
148 let tmp = _mm_clmulepi64_si128::<0x01>(high, modulus);
149 let tmp_shifted = _mm_slli_si128::<8>(tmp);
150 low = _mm_xor_si128(low, tmp_shifted);
151 high = _mm_xor_si128(high, tmp_shifted);
152
153 let tmp = _mm_clmulepi64_si128::<0x01>(tmp, modulus);
155 low = _mm_xor_si128(low, tmp);
156
157 let tmp = _mm_clmulepi64_si128::<0x00>(high, modulus);
158 _mm_xor_si128(low, tmp)
159 }
160 }
161
162 #[cfg(all(test, target_feature = "pclmulqdq"))]
163 mod test {
164 use std::{arch::x86_64::__m128i, mem::transmute};
165
166 use crate::block::gf128::clmul::{clmul128, gf128_mul, gf128_reduce};
167
168 #[test]
169 fn test_gf128_mul_zero() {
170 unsafe {
171 let a = transmute(0x19831239123916248127031273012381_u128);
172 let b = transmute(0_u128);
173 let exp = 0_u128;
174 let mul = transmute(gf128_mul(a, b));
175 assert_eq!(exp, mul);
176 }
177 }
178
179 #[test]
180 fn test_gf128_mul_onw() {
181 unsafe {
182 let a = transmute(0x19831239123916248127031273012381_u128);
183 let b = transmute(0x1_u128);
184 let exp = 0x19831239123916248127031273012381_u128;
185 let mul = transmute(gf128_mul(a, b));
186 assert_eq!(exp, mul);
187 }
188 }
189
190 #[test]
191 fn test_gf128_mul() {
192 unsafe {
193 let a = transmute(0x19831239123916248127031273012381_u128);
194 let b = transmute(0xabcdef0123456789abcdef0123456789_u128);
195 let exp = 0x63a033d0ed643e85153c50f4268a7d9_u128;
196 let mul = transmute(gf128_mul(a, b));
197 assert_eq!(exp, mul);
198 }
199 }
200
201 #[test]
202 fn test_clmul128() {
203 unsafe {
204 let a: __m128i = transmute(0x19831239123916248127031273012381_u128);
205 let b: __m128i = transmute(0xabcdef0123456789abcdef0123456789_u128);
206 let (low, high) = clmul128(a, b);
207 let [low, high] = transmute([low, high]);
208 let exp_low: u128 = 0xa5de9b50e6db7b5147e92b99ee261809;
209 let exp_high: u128 = 0xf1d6d37d58114afed2addfedd7c77f7;
210 assert_eq!(exp_low, low);
211 assert_eq!(exp_high, high);
212 }
213 }
214
215 #[test]
216 fn test_gf128_reduce() {
217 unsafe {
218 let low: __m128i = transmute(0x0123456789abcdef0123456789abcdef_u128);
220 let high: __m128i = transmute(0xabcdef0123456789abcdef0123456789_u128);
221 let exp = 0xb4b548f1c3c23f86b4b548f1c3c21572_u128;
222 let res: u128 = transmute(gf128_reduce(low, high));
223
224 println!("res: {res:b}");
225 println!("exp: {exp:b}");
226 assert_eq!(exp, res);
227 }
228 }
229 }
230
231 #[cfg(all(test, feature = "nightly", target_feature = "pclmulqdq"))]
232 mod benches {
233 extern crate test;
234
235 use std::mem::transmute;
236
237 use criterion::black_box;
238 use rand::{Rng, rng};
239 use test::Bencher;
240
241 #[bench]
242 fn bench_gf128_mul(b: &mut Bencher) {
243 let [low, high] = unsafe { transmute(rng().random::<[u128; 2]>()) };
244 b.iter(|| black_box(unsafe { super::gf128_mul(black_box(low), black_box(high)) }));
245 }
246
247 #[bench]
248 fn bench_gf128_reduce(b: &mut Bencher) {
249 let [low, high] = unsafe { transmute(rng().random::<[u128; 2]>()) };
250 b.iter(|| black_box(unsafe { super::gf128_reduce(black_box(low), black_box(high)) }));
251 }
252 }
253}
254
255#[allow(dead_code)]
258mod scalar {
259 #[inline]
260 pub fn gf128_mul(a: u128, b: u128) -> u128 {
261 let (low, high) = clmul128(a, b);
262 gf128_reduce(low, high)
263 }
264
265 #[inline]
269 pub fn clmul128(a: u128, b: u128) -> (u128, u128) {
270 let (a_low, a_high) = (a as u64, (a >> 64) as u64);
271 let (b_low, b_high) = (b as u64, (b >> 64) as u64);
272
273 let ab_low = clmul64(a_low, b_low);
275 let ab_high = clmul64(a_high, b_high);
276 let ab_mid = clmul64(a_low ^ a_high, b_low ^ b_high) ^ ab_low ^ ab_high;
277 let low = ab_low ^ (ab_mid << 64);
278 let high = ab_high ^ (ab_mid >> 64);
279 (low, high)
280 }
281
282 #[inline]
293 fn clmul64(x: u64, y: u64) -> u128 {
294 let x0 = (x & 0x1111_1111_1111_1111) as u128;
295 let x1 = (x & 0x2222_2222_2222_2222) as u128;
296 let x2 = (x & 0x4444_4444_4444_4444) as u128;
297 let x3 = (x & 0x8888_8888_8888_8888) as u128;
298 let y0 = (y & 0x1111_1111_1111_1111) as u128;
299 let y1 = (y & 0x2222_2222_2222_2222) as u128;
300 let y2 = (y & 0x4444_4444_4444_4444) as u128;
301 let y3 = (y & 0x8888_8888_8888_8888) as u128;
302
303 let mut z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
304 let mut z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
305 let mut z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
306 let mut z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
307
308 z0 &= 0x1111_1111_1111_1111_1111_1111_1111_1111;
309 z1 &= 0x2222_2222_2222_2222_2222_2222_2222_2222;
310 z2 &= 0x4444_4444_4444_4444_4444_4444_4444_4444;
311 z3 &= 0x8888_8888_8888_8888_8888_8888_8888_8888;
312
313 z0 | z1 | z2 | z3
314 }
315
316 #[inline]
324 pub fn gf128_reduce(low: u128, high: u128) -> u128 {
325 #[inline]
329 fn shift_u128(x: u128, shift: u32) -> (u128, u128) {
330 let overflow = x >> (128 - shift);
332 let lower = x << shift;
333 (overflow, lower)
334 }
335
336 let (ov7, lo7) = shift_u128(high, 7);
342 let (ov2, lo2) = shift_u128(high, 2);
343 let (ov1, lo1) = shift_u128(high, 1);
344 let lo0 = high; let combined_low = lo7 ^ lo2 ^ lo1 ^ lo0;
348 let combined_overflow = ov7 ^ ov2 ^ ov1;
350
351 let reduced_overflow = (combined_overflow << 7)
355 ^ (combined_overflow << 2)
356 ^ (combined_overflow << 1)
357 ^ combined_overflow;
358
359 let poly_contrib = combined_low ^ reduced_overflow;
362
363 low ^ poly_contrib
365 }
366
367 #[cfg(test)]
368 mod tests {
369 use super::{clmul128, gf128_mul, gf128_reduce};
370
371 #[test]
372 fn test_gf128_mul_zero() {
373 let a = 0x19831239123916248127031273012381;
374 let b = 0;
375 let exp = 0;
376 let mul = gf128_mul(a, b);
377 assert_eq!(exp, mul);
378 }
379
380 #[test]
381 fn test_gf128_mul_one() {
382 let a = 0x19831239123916248127031273012381;
383 let b = 1;
384 let exp = 0x19831239123916248127031273012381;
385 let mul = gf128_mul(a, b);
386 assert_eq!(exp, mul);
387 }
388
389 #[test]
390 fn test_gf128_mul() {
391 let a = 0x19831239123916248127031273012381;
392 let b = 0xabcdef0123456789abcdef0123456789;
393 let exp = 0x63a033d0ed643e85153c50f4268a7d9;
394 let mul = gf128_mul(a, b);
395 assert_eq!(exp, mul);
396 }
397
398 #[test]
399 fn test_gf128_reduce_zero() {
400 assert_eq!(gf128_reduce(0, 0), 0);
401 }
402
403 #[test]
404 fn test_gf128_reduce_low_only() {
405 assert_eq!(gf128_reduce(1, 0), 1);
406 assert_eq!(gf128_reduce(0x87, 0), 0x87); assert_eq!(gf128_reduce(0xFFFFFFFFFFFFFFFF, 0), 0xFFFFFFFFFFFFFFFF);
408 }
409
410 #[test]
411 fn test_gf128_reduce_high_only() {
412 assert_eq!(gf128_reduce(0, 1), 0x87);
414 assert_eq!(gf128_reduce(0, 2), 0x87 << 1);
415 assert_eq!(gf128_reduce(0, 3), (0x87 << 1) ^ 0x87);
416
417 assert_eq!(gf128_reduce(0, 1 << 63), 0x87 << 63);
418 }
419
420 #[test]
421 fn test_gf128_reduce_overflow() {
422 let high = u128::MAX; let low = u128::MAX; assert_eq!(gf128_reduce(low, high), 0xffffffffffffffffffffffffffffc071);
425 }
426
427 #[test]
428 fn tests_gf128_reduce() {
429 let low = 0x0123456789abcdef0123456789abcdef;
431 let high = 0xabcdef0123456789abcdef0123456789;
432 let exp = 0xb4b548f1c3c23f86b4b548f1c3c21572;
433 let res = gf128_reduce(low, high);
434
435 println!("res: {res:b}");
436 println!("exp: {exp:b}");
437 assert_eq!(exp, res);
438 }
439
440 #[test]
441 fn test_clmul128() {
442 let a = 0x19831239123916248127031273012381;
443 let b = 0xabcdef0123456789abcdef0123456789;
444 let (low, high) = clmul128(a, b);
445 let exp_low = 0xa5de9b50e6db7b5147e92b99ee261809;
446 let exp_high = 0xf1d6d37d58114afed2addfedd7c77f7;
447 assert_eq!(exp_low, low);
448 assert_eq!(exp_high, high);
449 }
450 }
451
452 #[cfg(all(test, feature = "nightly"))]
453 mod benches {
454 extern crate test;
455
456 use criterion::black_box;
457 use rand::{Rng, rng};
458 use test::Bencher;
459
460 #[bench]
461 fn bench_gf128_mul(b: &mut Bencher) {
462 let [low, high] = rng().random::<[u128; 2]>();
463 b.iter(|| black_box(super::gf128_mul(black_box(low), black_box(high))));
464 }
465
466 #[bench]
467 fn bench_gf128_reduce(b: &mut Bencher) {
468 let [low, high] = rng().random::<[u128; 2]>();
469 b.iter(|| black_box(super::gf128_reduce(black_box(low), black_box(high))));
470 }
471 }
472}
473
474#[cfg(all(test, not(miri), target_feature = "pclmulqdq"))]
477mod scalar_simd_tests {
478 use std::mem::transmute;
479
480 use rand::{Rng, rng};
481
482 use super::{clmul, scalar};
483
484 #[test]
485 fn test_clmul128() {
486 for _ in 0..1000 {
487 let (a, b) = rng().random::<(u128, u128)>();
488 unsafe {
489 let clmul_res = clmul::clmul128(transmute(a), transmute(b));
490 let scalar_res = scalar::clmul128(a, b);
491 assert_eq!(scalar_res.0, transmute(clmul_res.0));
492 }
493 }
494 }
495
496 #[test]
497 fn test_gf128_reduce() {
498 for _ in 0..1000 {
499 let (a, b) = rng().random::<(u128, u128)>();
500 unsafe {
501 let clmul_res = clmul::gf128_reduce(transmute(a), transmute(b));
502 let scalar_res = scalar::gf128_reduce(a, b);
503 assert_eq!(scalar_res, transmute(clmul_res));
504 }
505 }
506 }
507
508 #[test]
509 fn test_gf128_mul() {
510 for _ in 0..1000 {
511 let (a, b) = rng().random::<(u128, u128)>();
512 unsafe {
513 let clmul_res = clmul::gf128_mul(transmute(a), transmute(b));
514 let scalar_res = scalar::gf128_mul(a, b);
515 assert_eq!(scalar_res, transmute(clmul_res));
516 }
517 }
518 }
519}
520
521#[cfg(test)]
522mod tests {
523 use crate::Block;
524
525 #[test]
526 fn test_gf_pow() {
527 let b: Block = 24646523424323_u128.into();
528 assert_eq!(Block::ONE, b.gf_pow(0));
529 assert_eq!(b, b.gf_pow(1));
530 assert_eq!(b.gf_mul(&b), b.gf_pow(2));
531 assert_eq!(b.gf_mul(&b.gf_mul(&b)), b.gf_pow(3));
532 }
533}