Skip to content

NumPy Random Module

Why random matters in analytics

Random values help with:

  • Simulation and Monte Carlo experiments
  • Creating synthetic datasets for testing
  • Sampling
  • Bootstrapping

Modern NumPy recommends using a Generator:

rng
import numpy as np
 
rng = np.random.default_rng(42)
print(rng.integers(1, 10, size=5))
rng
import numpy as np
 
rng = np.random.default_rng(42)
print(rng.integers(1, 10, size=5))

Seeding (reproducibility)

Seeding ensures you get the same results each run.

seed
import numpy as np
 
rng = np.random.default_rng(123)
print(rng.normal(size=3))
seed
import numpy as np
 
rng = np.random.default_rng(123)
print(rng.normal(size=3))

Random integers

integers
import numpy as np
 
rng = np.random.default_rng(7)
arr = rng.integers(low=0, high=100, size=10)
print(arr)
integers
import numpy as np
 
rng = np.random.default_rng(7)
arr = rng.integers(low=0, high=100, size=10)
print(arr)

Random floats

random
import numpy as np
 
rng = np.random.default_rng(7)
arr = rng.random(5)  # uniform in [0, 1)
print(arr)
random
import numpy as np
 
rng = np.random.default_rng(7)
arr = rng.random(5)  # uniform in [0, 1)
print(arr)

Normal distribution

normal
import numpy as np
 
rng = np.random.default_rng(7)
arr = rng.normal(loc=0, scale=1, size=5)
print(arr)
normal
import numpy as np
 
rng = np.random.default_rng(7)
arr = rng.normal(loc=0, scale=1, size=5)
print(arr)

Choice (sampling)

choice
import numpy as np
 
rng = np.random.default_rng(7)
categories = np.array(["A", "B", "C"])
print(rng.choice(categories, size=10, replace=True))
choice
import numpy as np
 
rng = np.random.default_rng(7)
categories = np.array(["A", "B", "C"])
print(rng.choice(categories, size=10, replace=True))

Weighted sampling:

choice-weights
import numpy as np
 
rng = np.random.default_rng(7)
values = np.array(["low", "medium", "high"])
probs = [0.6, 0.3, 0.1]
print(rng.choice(values, size=10, p=probs))
choice-weights
import numpy as np
 
rng = np.random.default_rng(7)
values = np.array(["low", "medium", "high"])
probs = [0.6, 0.3, 0.1]
print(rng.choice(values, size=10, p=probs))

Shuffle

shuffle
import numpy as np
 
rng = np.random.default_rng(7)
arr = np.arange(10)
rng.shuffle(arr)
print(arr)
shuffle
import numpy as np
 
rng = np.random.default_rng(7)
arr = np.arange(10)
rng.shuffle(arr)
print(arr)

Synthetic dataset example

synthetic
import numpy as np
 
rng = np.random.default_rng(0)
 
n = 100
age = rng.integers(18, 60, size=n)
income = rng.normal(loc=60000, scale=15000, size=n)
 
X = np.column_stack([age, income])
print(X.shape)
synthetic
import numpy as np
 
rng = np.random.default_rng(0)
 
n = 100
age = rng.integers(18, 60, size=n)
income = rng.normal(loc=60000, scale=15000, size=n)
 
X = np.column_stack([age, income])
print(X.shape)

Next

Continue to: Linear Algebra with NumPy to learn dot products, matrix multiplication, and solving systems.

๐Ÿงช Try It Yourself

Exercise 1 โ€“ Create a NumPy Array

Exercise 2 โ€“ Array Shape and Reshape

Exercise 3 โ€“ Array Arithmetic

If this helped you, consider buying me a coffee โ˜•

Buy me a coffee

Was this page helpful?

Let us know how we did