This is documentation for an old release of NumPy (version 1.18). Read this page in the documentation of the latest stable release (version 2.2).
Extending¶
The BitGenerators have been designed to be extendable using standard tools for
high-performance Python – numba and Cython. The Generator
object can also
be used with user-provided BitGenerators as long as these export a small set of
required functions.
Numba¶
Numba can be used with either CTypes or CFFI. The current iteration of the BitGenerators all export a small set of functions through both interfaces.
This example shows how numba can be used to produce gaussian samples using
a pure Python implementation which is then compiled. The random numbers are
provided by ctypes.next_double
.
import numpy as np
import numba as nb
from numpy.random import PCG64
from timeit import timeit
bit_gen = PCG64()
next_d = bit_gen.cffi.next_double
state_addr = bit_gen.cffi.state_address
def normals(n, state):
out = np.empty(n)
for i in range((n + 1) // 2):
x1 = 2.0 * next_d(state) - 1.0
x2 = 2.0 * next_d(state) - 1.0
r2 = x1 * x1 + x2 * x2
while r2 >= 1.0 or r2 == 0.0:
x1 = 2.0 * next_d(state) - 1.0
x2 = 2.0 * next_d(state) - 1.0
r2 = x1 * x1 + x2 * x2
f = np.sqrt(-2.0 * np.log(r2) / r2)
out[2 * i] = f * x1
if 2 * i + 1 < n:
out[2 * i + 1] = f * x2
return out
# Compile using Numba
normalsj = nb.jit(normals, nopython=True)
# Must use state address not state with numba
n = 10000
def numbacall():
return normalsj(n, state_addr)
rg = np.random.Generator(PCG64())
def numpycall():
return rg.normal(size=n)
# Check that the functions work
r1 = numbacall()
r2 = numpycall()
assert r1.shape == (n,)
assert r1.shape == r2.shape
t1 = timeit(numbacall, number=1000)
print('{:.2f} secs for {} PCG64 (Numba/PCG64) gaussian randoms'.format(t1, n))
t2 = timeit(numpycall, number=1000)
print('{:.2f} secs for {} PCG64 (NumPy/PCG64) gaussian randoms'.format(t2, n))
Both CTypes and CFFI allow the more complicated distributions to be used
directly in Numba after compiling the file distributions.c into a DLL
or
so
. An example showing the use of a more complicated distribution is in
the examples section below.
Cython¶
Cython can be used to unpack the PyCapsule
provided by a BitGenerator.
This example uses PCG64
and the example from above. The usual caveats
for writing high-performance code using Cython – removing bounds checks and
wrap around, providing array alignment information – still apply.
#!/usr/bin/env python
#cython: language_level=3
"""
This file shows how the to use a BitGenerator to create a distribution.
"""
import numpy as np
cimport numpy as np
cimport cython
from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
from libc.stdint cimport uint16_t, uint64_t
from numpy.random cimport bitgen_t
from numpy.random import PCG64
@cython.boundscheck(False)
@cython.wraparound(False)
def uniforms(Py_ssize_t n):
"""
Create an array of `n` uniformly distributed doubles.
A 'real' distribution would want to process the values into
some non-uniform distribution
"""
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef const char *capsule_name = "BitGenerator"
cdef double[::1] random_values
x = PCG64()
capsule = x.capsule
# Optional check that the capsule if from a BitGenerator
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
# Cast the pointer
rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
random_values = np.empty(n, dtype='float64')
with x.lock, nogil:
for i in range(n):
# Call the function
random_values[i] = rng.next_double(rng.state)
randoms = np.asarray(random_values)
return randoms
The BitGenerator can also be directly accessed using the members of the basic RNG structure.
@cython.boundscheck(False)
@cython.wraparound(False)
def uint10_uniforms(Py_ssize_t n):
"""Uniform 10 bit integers stored as 16-bit unsigned integers"""
cdef Py_ssize_t i
cdef bitgen_t *rng
cdef const char *capsule_name = "BitGenerator"
cdef uint16_t[::1] random_values
cdef int bits_remaining
cdef int width = 10
cdef uint64_t buff, mask = 0x3FF
x = PCG64()
capsule = x.capsule
if not PyCapsule_IsValid(capsule, capsule_name):
raise ValueError("Invalid pointer to anon_func_state")
rng = <bitgen_t *> PyCapsule_GetPointer(capsule, capsule_name)
random_values = np.empty(n, dtype='uint16')
# Best practice is to release GIL and acquire the lock
bits_remaining = 0
with x.lock, nogil:
for i in range(n):
if bits_remaining < width:
buff = rng.next_uint64(rng.state)
random_values[i] = buff & mask
buff >>= width
randoms = np.asarray(random_values)
return randoms
See Extending numpy.random via Cython for a complete working example including a minimal setup and cython files.
CFFI¶
CFFI can be used to directly access the functions in
include/numpy/random/distributions.h
. Some “massaging” of the header
file is required:
"""
Use cffi to access any of the underlying C functions from distributions.h
"""
import os
import numpy as np
import cffi
from .parse import parse_distributions_h
ffi = cffi.FFI()
inc_dir = os.path.join(np.get_include(), 'numpy')
# Basic numpy types
ffi.cdef('''
typedef intptr_t npy_intp;
typedef unsigned char npy_bool;
''')
parse_distributions_h(ffi, inc_dir)
Once the header is parsed by ffi.cdef
, the functions can be accessed
directly from the _generator
shared object, using the BitGenerator.cffi
interface.
# Compare the distributions.h random_standard_normal_fill to
# Generator.standard_random
bit_gen = np.random.PCG64()
rng = np.random.Generator(bit_gen)
state = bit_gen.state
interface = rng.bit_generator.cffi
n = 100
vals_cffi = ffi.new('double[%d]' % n)
lib.random_standard_normal_fill(interface.bit_generator, n, vals_cffi)
# reset the state
bit_gen.state = state
vals = rng.standard_normal(n)
for i in range(n):
assert vals[i] == vals_cffi[i]
New Basic RNGs¶
Generator
can be used with other user-provided BitGenerators. The simplest
way to write a new BitGenerator is to examine the pyx file of one of the
existing BitGenerators. The key structure that must be provided is the
capsule
which contains a PyCapsule
to a struct pointer of type
bitgen_t
,
typedef struct bitgen {
void *state;
uint64_t (*next_uint64)(void *st);
uint32_t (*next_uint32)(void *st);
double (*next_double)(void *st);
uint64_t (*next_raw)(void *st);
} bitgen_t;
which provides 5 pointers. The first is an opaque pointer to the data structure
used by the BitGenerators. The next three are function pointers which return
the next 64- and 32-bit unsigned integers, the next random double and the next
raw value. This final function is used for testing and so can be set to
the next 64-bit unsigned integer function if not needed. Functions inside
Generator
use this structure as in
bitgen_state->next_uint64(bitgen_state->state)