HPCombi
High Performance Combinatorics in C++ using vector instructions v1.0.2
Loading...
Searching...
No Matches
epu8.hpp
Go to the documentation of this file.
1//****************************************************************************//
2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
6// HP-Combi is free software: you can redistribute it and/or modify it //
7// under the terms of the GNU General Public License as published by the //
8// Free Software Foundation, either version 3 of the License, or //
9// (at your option) any later version. //
10// //
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
14// for more details. //
15// //
16// You should have received a copy of the GNU General Public License along //
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
18//****************************************************************************//
19
25
26#ifndef HPCOMBI_EPU8_HPP_
27#define HPCOMBI_EPU8_HPP_
28
29#include <array> // for array
30#include <cstddef> // for size_t
31#include <cstdint> // for uint8_t, uint64_t, int8_t
32#include <ostream> // for ostream
33#include <string> // for string
34
35#include "builder.hpp" // for TPUBuild
36#include "debug.hpp" // for HPCOMBI_ASSERT
37#include "vect_generic.hpp" // for VectGeneric
38
39#if defined(__GNUC__) && !defined(__clang__)
40#pragma GCC diagnostic push
41#pragma GCC diagnostic ignored "-Wswitch-default"
42#pragma GCC diagnostic ignored "-Wpacked"
43#endif
44#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde...
45#include "simde/x86/sse4.2.h" // for ???
46#if defined(__GNUC__) && !defined(__clang__)
47#pragma GCC diagnostic pop
48#endif
49
50namespace HPCombi {
51
53inline constexpr uint8_t
54operator"" _u8(unsigned long long arg) noexcept { // NOLINT
55 return static_cast<uint8_t>(arg);
56}
57
66using epu8 = uint8_t __attribute__((vector_size(16)));
67
68static_assert(alignof(epu8) == 16,
69 "epu8 type is not properly aligned by the compiler !");
70
74constexpr TPUBuild<epu8> Epu8{};
75
77inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); }
79inline bool is_all_one(epu8 a) noexcept {
80 return simde_mm_testc_si128(a, Epu8(0xFF));
81}
82
84inline bool equal(epu8 a, epu8 b) noexcept {
85 return is_all_zero(simde_mm_xor_si128(a, b));
86}
87
88inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }
89
92inline epu8 permuted_ref(epu8 a, epu8 b) noexcept;
93
96inline epu8 permuted(epu8 a, epu8 b) noexcept {
97 return simde_mm_shuffle_epi8(a, b);
98}
99
103inline epu8 shifted_right(epu8 a) noexcept {
104 return simde_mm_bslli_si128(a, 1);
105}
106
110inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
111
113inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); }
114
116inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
118inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); }
119
121inline bool is_sorted(epu8 a) noexcept;
127inline epu8 sorted(epu8 a) noexcept;
132inline epu8 sorted8(epu8 a) noexcept;
138inline epu8 revsorted(epu8 a) noexcept;
143inline epu8 revsorted8(epu8 a) noexcept;
144
149inline epu8 sort_perm(epu8 &a) noexcept;
154inline epu8 sort8_perm(epu8 &a) noexcept;
155
163inline void merge(epu8 &a, epu8 &b) noexcept;
164
165#ifdef SIMDE_X86_SSE4_2_NATIVE
170inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept;
171#endif
172
177inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept;
178
188inline epu8 permutation_of(epu8 a, epu8 b) noexcept;
189
191constexpr uint64_t prime = 0x9e3779b97f4a7bb9;
192
200inline epu8 random_epu8(uint16_t bnd);
201
209inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept;
210
216
217inline uint8_t horiz_sum_ref(epu8) noexcept;
218
225
226inline uint8_t horiz_sum_gen(epu8) noexcept;
227
233inline uint8_t horiz_sum4(epu8) noexcept;
234
240inline uint8_t horiz_sum3(epu8) noexcept;
241
253inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); }
254
260inline epu8 partial_sums_ref(epu8) noexcept;
261
268inline epu8 partial_sums_gen(epu8) noexcept;
269
275inline epu8 partial_sums_round(epu8) noexcept;
276
287inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); }
288
294inline uint8_t horiz_max_ref(epu8) noexcept;
295
302inline uint8_t horiz_max_gen(epu8) noexcept;
303
309inline uint8_t horiz_max4(epu8) noexcept;
310
316inline uint8_t horiz_max3(epu8) noexcept;
317
328inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); }
329
335inline epu8 partial_max_ref(epu8) noexcept;
336
343inline epu8 partial_max_gen(epu8) noexcept;
344
350inline epu8 partial_max_round(epu8) noexcept;
351
362inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); }
363
369inline uint8_t horiz_min_ref(epu8) noexcept;
370
377inline uint8_t horiz_min_gen(epu8) noexcept;
378
384inline uint8_t horiz_min4(epu8) noexcept;
385
391inline uint8_t horiz_min3(epu8) noexcept;
392
403inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); }
404
410inline epu8 partial_min_ref(epu8) noexcept;
411
418inline epu8 partial_min_gen(epu8) noexcept;
419
425inline epu8 partial_min_round(epu8) noexcept;
426
437inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); }
438
444inline epu8 eval16_ref(epu8 v) noexcept;
445
451inline epu8 eval16_arr(epu8 v) noexcept;
452
458inline epu8 eval16_cycle(epu8 v) noexcept;
459
465inline epu8 eval16_popcount(epu8 v) noexcept;
466
481inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); }
482
488inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
489
490#ifdef SIMDE_X86_SSE4_2_NATIVE
496inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
497#endif
498
504inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
505
524inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
525 return first_diff_mask(a, b, bound);
526}
527
533inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
534
535#ifdef SIMDE_X86_SSE4_2_NATIVE
541inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
542#endif
543
549inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
550
569inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
570 return last_diff_mask(a, b, bound);
571}
572
574inline bool less(epu8 a, epu8 b) noexcept;
580inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept;
581
585inline uint64_t first_zero(epu8 v, int bnd) noexcept;
589inline uint64_t last_zero(epu8 v, int bnd) noexcept;
593inline uint64_t first_non_zero(epu8 v, int bnd) noexcept;
597inline uint64_t last_non_zero(epu8 v, int bnd) noexcept;
598
601inline epu8 popcount16(epu8 v) noexcept;
602
618inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept;
619
635inline bool is_transformation(epu8 v, const size_t k = 16) noexcept;
636
653inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept;
654
655#ifdef SIMDE_X86_SSE4_2_NATIVE
660inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept;
661#endif
666inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept;
667
672inline bool is_permutation_eval(epu8 v, const size_t k = 16) noexcept;
673
690inline bool is_permutation(epu8 v, const size_t k = 16) noexcept;
691
692} // namespace HPCombi
693
694namespace std {
695
696inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a);
697
698inline std::string to_string(HPCombi::epu8 const &a);
699
706} // namespace std
707
708#include "epu8_impl.hpp"
709
710#endif // HPCOMBI_EPU8_HPP_
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:66
HPCombi::TPUBuild and casts from HPCombi::TPU.
defines the macro HPCOMBI_ASSERT
implementation of epu8.hpp ; this file should not be included directly.
std::ostream & operator<<(std::ostream &out, const std::vector< T > &v)
Definition image.cpp:35
Definition bmat8.hpp:42
uint8_t horiz_min4(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:419
epu8 max(epu8 a, epu8 b) noexcept
Vector max between two HPCombi::epu8 0.
Definition epu8.hpp:118
uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as last_diff but with a different implementation.
Definition epu8_impl.hpp:93
uint64_t first_non_zero(epu8 v, int bnd) noexcept
return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
Definition epu8_impl.hpp:127
uint8_t horiz_min_ref(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:410
epu8 eval16_arr(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:453
epu8 permuted(epu8 a, epu8 b) noexcept
Same as permuted_ref but with an optimized implementation using intrinsics.
Definition epu8.hpp:96
epu8 sort8_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:220
epu8 shifted_right(epu8 a) noexcept
Left shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:103
uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as first_diff but with a different implementation.
Definition epu8_impl.hpp:78
uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as first_diff but with a different implementation.
Definition epu8_impl.hpp:89
epu8 partial_sums_ref(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:358
epu8 partial_sums(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:287
uint8_t horiz_min(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:403
epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
Remove duplicates in a sorted HPCombi::epu8.
Definition epu8_impl.hpp:261
epu8 revsorted8(epu8 a) noexcept
Return a HPCombi::epu8 with both halves reverse sorted.
Definition epu8_impl.hpp:213
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of another one.
Definition epu8_impl.hpp:304
bool is_permutation(epu8 v, const size_t k=16) noexcept
Definition epu8_impl.hpp:531
int8_t less_partial(epu8 a, epu8 b, int k) noexcept
Partial lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:114
uint8_t horiz_max4(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:384
uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
Same interface as last_diff but with a different implementation.
Definition epu8_impl.hpp:106
uint8_t horiz_max3(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:385
bool is_all_one(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are one.
Definition epu8.hpp:79
constexpr uint64_t prime
A prime number good for hashing.
Definition epu8.hpp:191
uint8_t horiz_min_gen(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:416
bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
Test for partial permutations.
Definition epu8_impl.hpp:500
bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
Same interface as is_permutation but with a different implementation.
Definition epu8_impl.hpp:522
uint64_t last_zero(epu8 v, int bnd) noexcept
return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
Definition epu8_impl.hpp:124
void merge(epu8 &a, epu8 &b) noexcept
Merge two sorted epu8.
Definition epu8_impl.hpp:241
uint8_t horiz_sum4(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:349
epu8 popcount16(epu8 v) noexcept
a vector popcount function
Definition epu8_impl.hpp:481
epu8 partial_sums_round(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:369
uint8_t horiz_sum3(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:350
uint8_t horiz_sum_ref(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:340
epu8 partial_sums_gen(epu8) noexcept
Same interface as partial_sums but with a different implementation.
Definition epu8_impl.hpp:365
bool equal(epu8 a, epu8 b) noexcept
Equality of HPCombi::epu8.
Definition epu8.hpp:84
epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
Same interface as permutation_of but with a different implementation.
Definition epu8_impl.hpp:295
epu8 min(epu8 a, epu8 b) noexcept
Vector min between two HPCombi::epu8 0.
Definition epu8.hpp:116
epu8 partial_max_ref(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:393
epu8 sorted8(epu8 a) noexcept
Return a HPCombi::epu8 with both halves sorted.
Definition epu8_impl.hpp:207
epu8 partial_max_round(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:404
epu8 eval16(epu8 v) noexcept
Evaluation of a HPCombi::epu8: count how many times each int of 0..15 appears in the input.
Definition epu8.hpp:481
uint8_t horiz_sum(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:253
uint8_t horiz_max(epu8 v) noexcept
Horizontal sum of a HPCombi::epu8.
Definition epu8.hpp:328
epu8 reverted(epu8 a) noexcept
Reverting a HPCombi::epu8.
Definition epu8.hpp:113
epu8 eval16_cycle(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:464
epu8 eval16_ref(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:445
epu8 partial_max_gen(epu8) noexcept
Same interface as partial_max but with a different implementation.
Definition epu8_impl.hpp:400
bool less(epu8 a, epu8 b) noexcept
Lexicographic comparison between two HPCombi::epu8.
Definition epu8_impl.hpp:110
uint8_t horiz_max_ref(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:375
epu8 sorted(epu8 a) noexcept
Return a sorted HPCombi::epu8.
Definition epu8_impl.hpp:204
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:74
bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
Same interface as is_permutation but with a different implementation.
Definition epu8_impl.hpp:526
uint64_t first_zero(epu8 v, int bnd) noexcept
return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
Definition epu8_impl.hpp:121
bool is_all_zero(epu8 a) noexcept
Test whether all the entries of a HPCombi::epu8 are zero.
Definition epu8.hpp:77
epu8 random_epu8(uint16_t bnd)
A random HPCombi::epu8.
Definition epu8_impl.hpp:249
epu8 partial_min(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:437
epu8 revsorted(epu8 a) noexcept
Return a reverse sorted HPCombi::epu8.
Definition epu8_impl.hpp:210
epu8 partial_min_round(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:439
bool is_sorted(epu8 a) noexcept
Testing if a HPCombi::epu8 is sorted.
Definition epu8_impl.hpp:201
uint64_t last_non_zero(epu8 v, int bnd) noexcept
return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
Definition epu8_impl.hpp:130
uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The last difference between two HPCombi::epu8.
Definition epu8.hpp:569
epu8 eval16_popcount(epu8 v) noexcept
Same interface as eval16 but with a different implementation.
Definition epu8_impl.hpp:472
epu8 sort_perm(epu8 &a) noexcept
Sort this and return the sorting permutation.
Definition epu8_impl.hpp:217
epu8 partial_min_gen(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:435
epu8 partial_min_ref(epu8) noexcept
Same interface as partial_min but with a different implementation.
Definition epu8_impl.hpp:428
uint8_t horiz_max_gen(epu8) noexcept
Same interface as horiz_max but with a different implementation.
Definition epu8_impl.hpp:381
uint8_t __attribute__((vector_size(16))) epu8
epu8 stands for Extended Packed Unsigned, grouped by 8 bits; this is the low level type chosen by Int...
Definition epu8.hpp:66
epu8 shifted_left(epu8 a) noexcept
Right shifted of a HPCombi::epu8 inserting a 0.
Definition epu8.hpp:110
bool is_transformation(epu8 v, const size_t k=16) noexcept
Test for transformation.
Definition epu8_impl.hpp:494
uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
The first difference between two HPCombi::epu8.
Definition epu8.hpp:524
bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
Test for partial transformation.
Definition epu8_impl.hpp:486
epu8 permuted_ref(epu8 a, epu8 b) noexcept
Apply a permutation b on the vector a: for i=0..16 {result[i] = a[b[i]}.
Definition epu8_impl.hpp:60
epu8 partial_max(epu8 v) noexcept
Horizontal partial sum of a HPCombi::epu8.
Definition epu8.hpp:362
uint8_t horiz_sum_gen(epu8) noexcept
Same interface as horiz_sum but with a different implementation.
Definition epu8_impl.hpp:346
uint8_t horiz_min3(epu8) noexcept
Same interface as horiz_min but with a different implementation.
Definition epu8_impl.hpp:420
bool not_equal(epu8 a, epu8 b) noexcept
Non equality of HPCombi::epu8.
Definition epu8.hpp:88
Definition bmat8.hpp:367
Given a transformation from 0..15 → 0..15, build at compile-time the array representing the transform...
Definition builder.hpp:49
HPCombi::VectGeneric.