1
#ifndef _MINISKETCH_H_
2
#define _MINISKETCH_H_ 1
3

            
4
#include <stdint.h>
5
#include <stdlib.h>
6

            
7
#ifdef _MSC_VER
8
#  include <compat.h>
9
#else
10
#  include <unistd.h>
11
#endif
12

            
13
#ifndef MINISKETCH_API
14
# if defined(_WIN32)
15
#  ifdef MINISKETCH_BUILD
16
#   define MINISKETCH_API __declspec(dllexport)
17
#  else
18
#   define MINISKETCH_API
19
#  endif
20
# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD)
21
#  define MINISKETCH_API __attribute__ ((visibility ("default")))
22
# else
23
#  define MINISKETCH_API
24
# endif
25
#endif
26

            
27
#ifdef __cplusplus
28
#  if __cplusplus >= 201103L
29
#    include <memory>
30
#    include <vector>
31
#    include <cassert>
32
#    if __cplusplus >= 201703L
33
#      include <optional>
34
#    endif // __cplusplus >= 201703L
35
#  endif // __cplusplus >= 201103L
36
extern "C" {
37
#endif // __cplusplus
38

            
39
/** Opaque type for decoded sketches. */
40
typedef struct minisketch minisketch;
41

            
42
/** Determine whether support for elements of `bits` bits was compiled in. */
43
MINISKETCH_API int minisketch_bits_supported(uint32_t bits);
44

            
45
/** Determine the maximum number of implementations available.
46
 *
47
 * Multiple implementations may be available for a given element size, with
48
 * different performance characteristics on different hardware.
49
 *
50
 * Each implementation is identified by a number from 0 to the output of this
51
 * function call, inclusive. Note that not every combination of implementation
52
 * and element size may exist (see further).
53
*/
54
MINISKETCH_API uint32_t minisketch_implementation_max(void);
55

            
56
/** Determine if the a combination of bits and implementation number is available.
57
 *
58
 * Returns 1 if it is, 0 otherwise.
59
 */
60
MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation);
61

            
62
/** Construct a sketch for a given element size, implementation and capacity.
63
 *
64
 * If the combination of `bits` and `implementation` is unavailable, or when
65
 * OOM occurs, NULL is returned. If minisketch_implementation_supported
66
 * returns 1 for the specified bits and implementation, this will always succeed
67
 * (except when allocation fails).
68
 *
69
 * If the result is not NULL, it must be destroyed using minisketch_destroy.
70
 */
71
MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity);
72

            
73
/** Get the element size of a sketch in bits. */
74
MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch);
75

            
76
/** Get the capacity of a sketch. */
77
MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch);
78

            
79
/** Get the implementation of a sketch. */
80
MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch);
81

            
82
/** Set the seed for randomizing algorithm choices to a fixed value.
83
 *
84
 * By default, sketches are initialized with a random seed. This is important
85
 * to avoid scenarios where an attacker could force worst-case behavior.
86
 *
87
 * This function initializes the seed to a user-provided value (any 64-bit
88
 * integer is acceptable, regardless of field size).
89
 *
90
 * When seed is -1, a fixed internal value with predictable behavior is
91
 * used. It is only intended for testing.
92
 */
93
MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed);
94

            
95
/** Clone a sketch.
96
 *
97
 * The result must be destroyed using minisketch_destroy.
98
 */
99
MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch);
100

            
101
/** Destroy a sketch.
102
 *
103
 * The pointer that was passed in may not be used anymore afterwards.
104
 */
105
MINISKETCH_API void minisketch_destroy(minisketch* sketch);
106

            
107
/** Compute the size in bytes for serializing a given sketch. */
108
MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch);
109

            
110
/** Serialize a sketch to bytes. */
111
MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output);
112

            
113
/** Deserialize a sketch from bytes. */
114
MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input);
115

            
116
/** Add an element to a sketch.
117
 * 
118
 * If the element to be added is too large for the sketch, the most significant
119
 * bits of the element are dropped. More precisely, if the element size of
120
 * `sketch` is b bits, then this function adds the unsigned integer represented
121
 * by the b least significant bits of `element` to `sketch`.
122
 * 
123
 * If the element to be added is 0 (after potentially dropping the most significant
124
 * bits), then this function is a no-op. Sketches cannot contain an element with
125
 * the value 0.
126
 *
127
 * Note that adding the same element a second time removes it again.
128
 */
129
MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element);
130

            
131
/** Merge the elements of another sketch into this sketch.
132
 *
133
 * After merging, `sketch` will contain every element that existed in one but not
134
 * both of the input sketches. It can be seen as an exclusive or operation on
135
 * the set elements.  If the capacity of `other_sketch` is lower than `sketch`'s,
136
 * merging reduces the capacity of `sketch` to that of `other_sketch`.
137
 *
138
 * This function returns the capacity of `sketch` after merging has been performed
139
 * (where this capacity is at least 1), or 0 to indicate that merging has failed because
140
 * the two input sketches differ in their element size or implementation. If 0 is
141
 * returned, `sketch` (and its capacity) have not been modified.
142
 *
143
 * It is also possible to perform this operation directly on the serializations
144
 * of two sketches with the same element size and capacity by performing a bitwise XOR
145
 * of the serializations.
146
 */
147
MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch);
148

            
149
/** Decode a sketch.
150
 *
151
 * `output` is a pointer to an array of `max_element` uint64_t's, which will be
152
 * filled with the elements in this sketch.
153
 *
154
 * The return value is the number of decoded elements, or -1 if decoding failed.
155
 */
156
MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output);
157

            
158
/** Compute the capacity needed to achieve a certain rate of false positives.
159
 *
160
 * A sketch with capacity c and no more than c elements can always be decoded
161
 * correctly. However, if it has more than c elements, or contains just random
162
 * bytes, it is possible that it will still decode, but the result will be
163
 * nonsense. This can be counteracted by increasing the capacity slightly.
164
 *
165
 * Given a field size bits, an intended number of elements that can be decoded
166
 * max_elements, and a false positive probability of 1 in 2**fpbits, this
167
 * function computes the necessary capacity. It is only guaranteed to be
168
 * accurate up to fpbits=256.
169
 */
170
MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits);
171

            
172
/** Compute what max_elements can be decoded for a certain rate of false positives.
173
 *
174
 * This is the inverse operation of minisketch_compute_capacity. It determines,
175
 * given a field size bits, a capacity of a sketch, and an acceptable false
176
 * positive probability of 1 in 2**fpbits, what the maximum allowed
177
 * max_elements value is. If no value of max_elements would give the desired
178
 * false positive probability, 0 is returned.
179
 *
180
 * Note that this is not an exact inverse of minisketch_compute_capacity. For
181
 * example, with bits=32, fpbits=16, and max_elements=8,
182
 * minisketch_compute_capacity will return 9, as capacity 8 would only have a
183
 * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however
184
 * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp
185
 * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with
186
 * capacity=9 will return 9.
187
 */
188
MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits);
189

            
190
#ifdef __cplusplus
191
}
192

            
193
#if __cplusplus >= 201103L
194
/** Simple RAII C++11 wrapper around the minisketch API. */
195
class Minisketch
196
{
197
    struct Deleter
198
    {
199
        void operator()(minisketch* ptr) const
200
        {
201
            minisketch_destroy(ptr);
202
        }
203
    };
204

            
205
    std::unique_ptr<minisketch, Deleter> m_minisketch;
206

            
207
public:
208
    /** Check whether the library supports fields of the given size. */
209
    static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); }
210

            
211
    /** Get the highest supported implementation number. */
212
    static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); }
213

            
214
    /** Check whether the library supports fields with a given size and implementation number.
215
     *  If a particular field size `bits` is supported, implementation 0 is always supported for it.
216
     *  Higher implementation numbers may or may not be available as well, up to MaxImplementation().
217
     */
218
    static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); }
219

            
220
    /** Given field size and a maximum number of decodable elements n, compute what capacity c to
221
     *  use so that sketches with more elements than n have a chance no higher than 2^-fpbits of
222
     *  being decoded incorrectly (and will instead fail when decoding for up to n elements).
223
     *
224
     *  See minisketch_compute_capacity for more details. */
225
    static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); }
226

            
227
    /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */
228
    static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); }
229

            
230
    /** Construct a clone of the specified sketch. */
231
    Minisketch(const Minisketch& sketch) noexcept
232
    {
233
        if (sketch.m_minisketch) {
234
            m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
235
        }
236
    }
237

            
238
    /** Make this Minisketch a clone of the specified one. */
239
    Minisketch& operator=(const Minisketch& sketch) noexcept
240
    {
241
        if (sketch.m_minisketch) {
242
            m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
243
        }
244
        return *this;
245
    }
246

            
247
    /** Check whether this Minisketch object is valid. */
248
    explicit operator bool() const noexcept { return bool{m_minisketch}; }
249

            
250
    /** Construct an (invalid) Minisketch object. */
251
    Minisketch() noexcept = default;
252

            
253
    /** Move constructor. */
254
    Minisketch(Minisketch&&) noexcept = default;
255

            
256
    /** Move assignment. */
257
    Minisketch& operator=(Minisketch&&) noexcept = default;
258

            
259
    /** Construct a Minisketch object with the specified parameters.
260
     *
261
     * If bits is not BitsSupported(), or the combination of bits and capacity is not
262
     * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch
263
     * object will be constructed. Use operator bool() to check that this isn't the
264
     * case before performing any other operations. */
265
    Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept
266
    {
267
        m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity));
268
    }
269

            
270
    /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits.
271
     *  It may construct an invalid object, which you may need to check for. */
272
    static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept
273
    {
274
        return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits));
275
    }
276

            
277
    /** Return the field size for a (valid) Minisketch object. */
278
    uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); }
279

            
280
    /** Return the capacity for a (valid) Minisketch object. */
281
    size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); }
282

            
283
    /** Return the implementation number for a (valid) Minisketch object. */
284
    uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); }
285

            
286
    /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */
287
    Minisketch& SetSeed(uint64_t seed) noexcept
288
    {
289
        minisketch_set_seed(m_minisketch.get(), seed);
290
        return *this;
291
    }
292

            
293
    /** Add (or remove, if already present) an element to a (valid) Minisketch object.
294
     *  See minisketch_add_uint64(). */
295
    Minisketch& Add(uint64_t element) noexcept
296
    {
297
        minisketch_add_uint64(m_minisketch.get(), element);
298
        return *this;
299
    }
300

            
301
    /** Merge sketch into *this; both have to be valid Minisketch objects.
302
     *  See minisketch_merge for details. */
303
    Minisketch& Merge(const Minisketch& sketch) noexcept
304
    {
305
        minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get());
306
        return *this;
307
    }
308

            
309
    /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the
310
     *  vector's size permits. */
311
    bool Decode(std::vector<uint64_t>& result) const
312
    {
313
        ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data());
314
        if (ret == -1) return false;
315
        result.resize(ret);
316
        return true;
317
    }
318

            
319
    /** Get the serialized size in bytes for this (valid) Minisketch object.. */
320
    size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); }
321

            
322
    /** Serialize this (valid) Minisketch object as a byte vector. */
323
    std::vector<unsigned char> Serialize() const
324
    {
325
        std::vector<unsigned char> result(GetSerializedSize());
326
        minisketch_serialize(m_minisketch.get(), result.data());
327
        return result;
328
    }
329

            
330
    /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data()
331
     *  and size() members). */
332
    template<typename T>
333
    Minisketch& Deserialize(
334
        const T& obj,
335
        typename std::enable_if<
336
            std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value &&
337
            std::is_convertible<decltype(obj.size()), std::size_t>::value,
338
            std::nullptr_t
339
        >::type = nullptr) noexcept
340
    {
341
        assert(GetSerializedSize() == obj.size());
342
        minisketch_deserialize(m_minisketch.get(), obj.data());
343
        return *this;
344
    }
345

            
346
#if __cplusplus >= 201703L
347
    /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */
348
    std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const
349
    {
350
        std::vector<uint64_t> result(max_elements);
351
        ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data());
352
        if (ret == -1) return {};
353
        result.resize(ret);
354
        return result;
355
    }
356

            
357
    /** C++17 only: similar to Decode(), but with specified false positive probability. */
358
    std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const
359
    {
360
        return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits));
361
    }
362
#endif // __cplusplus >= 201703L
363
};
364
#endif // __cplusplus >= 201103L
365
#endif // __cplusplus
366

            
367
#endif  // _MINISKETCH_H_