dmitry-kabanov · May 10, 2021 11:35
diff --git a/HW1_2.5.3.ipynb b/HW1_2.5.3.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 252,
   "id": "1285d895-5b46-403e-8533-643e9fd28251",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "#import p2\n",
    "#import randomfields as rf\n",
    "\n",
    "from scipy import optimize\n",
    "from scipy import sparse\n",
    "from scipy import stats\n",
    "from scipy.sparse.linalg import spsolve"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f304198e-d363-4de7-834d-f3b7a89bd425",
   "metadata": {},
   "source": [
    "---\n",
    "# Homework 1, Problem 2.5.3 (importance sampling for rare events)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3ee67ee-a084-48f8-a032-f5c05d100b0c",
   "metadata": {},
   "source": [
    "$$\n",
    "P(Q < K) = P(F(Y1, \\dots, Y_N) < K) = \\mathbb E \\left[ \\mathbb I (Y_1, \\dots, Y_N) \\right]\n",
    "$$\n",
    "where $\\mathbb I$ is the indicator function that $Q<K$ is true.\n",
    "\n",
    "First, we need to find the new distribution for the normal variables $Y_1, \\dots, Y_N$.\n",
    "Because coefficients of the Karhunen--Loeve expansion decay, we will change the distribution only for the first several coefficients, for example, two:\n",
    "\n",
    "$$\n",
    "\\mu_1, \\mu_2 = \\arg \\max_{y_1, y_2} \\mathbb I (y_1, y_2, y_3, \\dots) \\prod_i \\rho (y_i; 1^2)\n",
    "$$\n",
    "where $rho_i$ is the normal distribution for the $i$th variable."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "965838ac-d6e1-4152-85f5-1d4a84951746",
   "metadata": {},
   "source": [
    "## Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "id": "08ac143c-5df4-4960-86e5-1c9cc8eaa3a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "N = 40\n",
    "I = 100\n",
    "nu = np.Inf\n",
    "\n",
    "x, h = np.linspace(0, 1, num=I+1, retstep=True)\n",
    "\n",
    "K = -100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "1c6b1278-c605-493b-8b3c-90ab7d174385",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Random number generator.\n",
    "rng = np.random.default_rng()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "83383d99-2d4d-4675-9569-1fb722f9343b",
   "metadata": {},
   "source": [
    "# Auxiliary Functions"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "07f87917-939c-448c-b3e7-2c520474ccd9",
   "metadata": {},
   "source": [
    "## Function to compute joint distribution of independent Gaussian RVs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "b859645b-d2de-4d76-9db4-66dd902c68b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def gaussian_independent_pdf(Y, mean=0):\n",
    "    coeff = 1.0 / np.sqrt(2 * np.pi)**len(Y)\n",
    "    exp_expr = -np.sum(np.square(Y - mean))/2\n",
    "    \n",
    "    #coeff=1\n",
    "    return coeff * np.exp(exp_expr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a80f83b-d4d0-4e34-af55-af713fdecf7d",
   "metadata": {},
   "source": [
    "## Generator of Karhunen--Loeve expansion of the random lognormal field"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "id": "13ec6279-f2f6-44a3-8afc-9de767f28bdd",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_karhunen_loeve_expansion(N, nu, x, Y=None):\n",
    "    I = len(x)\n",
    "\n",
    "    if N > I:\n",
    "        raise ValueError(\n",
    "            \"Truncation parameter `N` must be not greater than the grid size\"\n",
    "        )\n",
    "\n",
    "    x12 = (x[0:-1] + x[1:]) / 2.0\n",
    "    C = get_matern_cov_matrix(x12, nu=nu)\n",
    "    # It is very important here to use function `eigh` that works\n",
    "    # with real symmetric matrices.\n",
    "    # Otherwise, eigenvalues and eigenvectors can get numerically\n",
    "    # imaginary parts, which leads to incorrect decomposition.\n",
    "    eigvals, eigvecs = np.linalg.eigh(C)\n",
    "    \n",
    "    # We need to sort eigvals and eigenvectors in descending order.\n",
    "    eigvals = eigvals[::-1]\n",
    "    eigvecs = eigvecs[:, ::-1]\n",
    "\n",
    "    if Y is None:\n",
    "        rng = np.random.default_rng()\n",
    "        Y = rng.standard_normal(N)\n",
    "    else:\n",
    "        assert N == len(Y)\n",
    "\n",
    "    kappa = np.zeros(len(x12))\n",
    "\n",
    "    # Note that we need to take eigenvalues and eigenvectors\n",
    "    # \"from the end\" as they are in the ascending order.\n",
    "    for n in range(0, N):\n",
    "        kappa += np.sqrt(eigvals[n]) * Y[n] * eigvecs[:, n]\n",
    "\n",
    "    return kappa\n",
    "\n",
    "\n",
    "def get_matern_cov_matrix(x, nu=np.Inf, sigma_sq=2, rho=0.1):\n",
    "    r\"\"\"Compute Matern covariance matrix.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    x : ndarray (N, )\n",
    "        Input array with spatial points.\n",
    "    nu : float (one of [0.5, 1.5, 2.5, np.Inf])\n",
    "        Order of the Matern function.\n",
    "    sigma_sq :\n",
    "        Parameter :math:`\\sigma^2`.\n",
    "    rho :\n",
    "        Parameter :math:`\\rho`.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    C : ndarray (N, N)\n",
    "        Covariance matrix evaluated at all points from `x`.\n",
    "\n",
    "    \"\"\"\n",
    "    msg_nu = \"Only four Matern covariance functions are implemented\"\n",
    "    if nu not in [0.5, 1.5, 2.5, np.Inf]:\n",
    "        raise ValueError(msg_nu)\n",
    "\n",
    "    sigma_sq, rho = sigma_sq, rho\n",
    "\n",
    "    # Matrix of distances between all points in `x`.\n",
    "    # Because `x` is 1D, distances are just absolute values of difference.\n",
    "    D = np.abs(np.subtract.outer(x, x))\n",
    "\n",
    "    if nu == 0.5:\n",
    "        return sigma_sq * np.exp(-D / rho)\n",
    "    elif nu == 1.5:\n",
    "        z = np.sqrt(3) * D / rho\n",
    "        return sigma_sq * (1 + z) * np.exp(-z)\n",
    "    elif nu == 2.5:\n",
    "        a = np.sqrt(5) * D / rho\n",
    "        b = np.sqrt(3) * D**2 / rho\n",
    "        return sigma_sq * (1 + a + b) * np.exp(-a)\n",
    "    elif nu == np.Inf:\n",
    "        return sigma_sq * np.exp(-D**2 / (2*rho**2))\n",
    "    else:\n",
    "        # Should not get here actually in any case.\n",
    "        raise ValueError(msg_nu)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2fbffe15-7fbd-485d-967f-3963c2110610",
   "metadata": {},
   "source": [
    "## Function to solve boundary-value problem for a given random field"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 250,
   "id": "eedd3354-a0c8-4fb9-9a67-fe154697c021",
   "metadata": {},
   "outputs": [],
   "source": [
    "def solve_bvp(a, return_grid=False):\n",
    "    r\"\"\"Solve boundary-value problem for given coefficients `a`.\n",
    "\n",
    "    Boundary-value problem is\n",
    "    \\begin{align}\n",
    "        -(a u')' = 4\\pi \\cos(2\\pi x),\n",
    "        u(0) = u(1) = 0.\n",
    "    \\begin{align}\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    a : ndarray of shape (I,)\n",
    "        Diffusion coefficient evaluated at the center points of the grid.\n",
    "    return_grid : bool (optional, default False)\n",
    "        If True, return also the grid and grid step size.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    u : ndarray of shape (I+1, )\n",
    "        Solution evaluated at the center points of the grid.\n",
    "    x : ndarray of shape (I+1, )\n",
    "        Uniform grid 0 = x_0 < x_1 < \\dots < x_I = 1\n",
    "    h : float\n",
    "        Grid step size.\n",
    "    \"\"\"\n",
    "    # Grid size.\n",
    "    I = len(a)\n",
    "    # Build grid with points x0, x1, ..., x_I and step size 1/I.\n",
    "    x, h = np.linspace(0, 1, num=I+1, retstep=True)\n",
    "\n",
    "    # 1. Build coefficient matrix in sparse format.\n",
    "    e1 = -a[0:-1] / h**2\n",
    "    e2 = (a[0:-1] + a[1:]) / h**2\n",
    "    e3 = -a[1:] / h**2\n",
    "    A = sparse.spdiags([e1, e2, e3], [-1, 0, 1], I-1, I-1)\n",
    "\n",
    "    # 2. Build right-hand side (source term).\n",
    "    F = 4*np.pi**2 * np.cos(2*np.pi*x[1:-1])\n",
    "\n",
    "    # 3. Solve the tridiagonal system of linear equations.\n",
    "    # It would be nicer, if matrix A and RHS vector F include\n",
    "    # the boundary conditions explicitly, but here we just\n",
    "    # explicitly set u_0 and u_I to zero as required by BCs.\n",
    "    u = np.zeros(I+1)\n",
    "    A_csc = A.asformat(\"csc\")\n",
    "    u[1:-1] = spsolve(A_csc, F)\n",
    "\n",
    "    if return_grid:\n",
    "        return u, x, h\n",
    "    else:\n",
    "        return u"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ea5c7cf4-63ac-46e7-9e4a-6af3c7ccace3",
   "metadata": {},
   "source": [
    "# Look for shift of distribution of gaussian RVs"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cb1caa67-bb91-41be-8935-89a07b393f8e",
   "metadata": {},
   "source": [
    "Objective function for finding shifts:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "id": "19024fef-c4f3-4409-927d-0894fc2d7b0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def objective_fn(y, N, nu, x, h, K):\n",
    "    y_full = np.zeros(N)\n",
    "    y_full[:len(y)] = y\n",
    "    \n",
    "    kappa = rf.compute_karhunen_loeve_expansion(N, nu, x, y_full)\n",
    "    a = np.exp(kappa)\n",
    "    u = p2.solve_bvp(a)\n",
    "    Q = h * np.sum(u)\n",
    "    \n",
    "    prod_rho = gaussian_independent_pdf(y_full, mean=0)\n",
    "    \n",
    "    result = -float(Q < K) * prod_rho\n",
    "    #print(\n",
    "    #    \"Q = {:7.2e}, prod_rho = {:7.2e}, result = {:7.2e}\".format(\n",
    "    #        Q, prod_rho, result\n",
    "    #    )\n",
    "    #)\n",
    "    return result"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d1143509-9086-49a9-a500-d426689182be",
   "metadata": {},
   "source": [
    "We compute values of the objective function by random sampling of the inputs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "id": "110e2487-16e1-442b-a08f-316fc5135b49",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Minimal value of the objective function: -1.67e-22\n",
      "Initial guess: [ 3.17704955 -4.08460683]\n"
     ]
    }
   ],
   "source": [
    "initial_guess_list = []\n",
    "objective_fun_values = []\n",
    "\n",
    "for __ in range(100):\n",
    "    y0 = rng.uniform(low=-5, high=5, size=2)\n",
    "    initial_guess_list.append(y0)\n",
    "    f = objective_fn(y0, N, nu, x, h, K)\n",
    "    objective_fun_values.append(f)\n",
    "    \n",
    "i_min = np.argmin(objective_fun_values)\n",
    "initial_guess = initial_guess_list[i_min]\n",
    "\n",
    "print(\"Minimal value of the objective function: {:.2e}\".format(objective_fun_values[i_min]))\n",
    "print(\"Initial guess: {}\".format(initial_guess))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "id": "64ea566a-ff00-485a-bd6e-da01f4f145b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: -0.000000\n",
      "         Iterations: 68\n",
      "         Function evaluations: 131\n",
      "***\n",
      "New means for Gaussian RVs: [ 3.63806705 -3.21070776]\n"
     ]
    }
   ],
   "source": [
    "result = optimize.fmin(objective_fn, initial_guess, args=(N, nu, x, h, K))\n",
    "print(\"***\")\n",
    "print(\"New means for Gaussian RVs: {}\".format(result))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25d66175-9b06-4809-a513-848028132e89",
   "metadata": {},
   "source": [
    "The results of the optimization will be the new means of the random variables $Y_1, Y_2, \\dots$. All other random variables remain standard normal RVs."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fa8261d-b3b4-436e-b0be-542d474da261",
   "metadata": {},
   "source": [
    "## \"True\" estimate with huge number of Monte Carlo samples"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4122368a-12b8-4152-9db4-f8967552d488",
   "metadata": {},
   "source": [
    "This should be run to estimate \"true\" value by using huge number of MC samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "454a1660-3ce1-49c5-8e8f-e402a1679a52",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Takes a lot of time\n",
    "Y_mean = np.zeros(N)\n",
    "est, err = simulate(10**7, Y_mean)\n",
    "\n",
    "print(\"Overkill crude MC estimate = {:.4f}\".format(est))\n",
    "print(\"Overkill crude MC confidence interval = ({:.4f}, {:.4f})\".format(est - err, est + err))\n",
    "print(\"Overkill crude MC error = {:.4f}\".format(err))\n",
    "print(\"Overkill crude MC relative error = {:.4f}\".format(err / est))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a10b416c-a852-45a6-be42-a7a67eaa7207",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Importance Sampling Monte Carlo estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 243,
   "id": "20184f6b-5720-4f94-a805-cbabbd7a0faf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Number of Monte Carlo samples\n",
    "M = 10**4"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1cb9851-1124-4fb2-a8ad-ca2a56b4e5ec",
   "metadata": {},
   "source": [
    "Compute estimate of the probability that the quantity of interest is less than `K` using `M` samples. If `Y_mean` is all zeros, than this corresponds to crude Monte Carlo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 248,
   "id": "b296533d-27b9-4e83-9ea4-bc802b3ed096",
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate(M, Y_mean):\n",
    "    \"\"\"Compute Importance Sampling Monte Carlo and the corresponding normal CI error.\"\"\"\n",
    "    Q_sample = np.empty(M)\n",
    "\n",
    "    for m in range(M):\n",
    "        Y = Y_mean + rng.standard_normal(N)\n",
    "        kappa = compute_karhunen_loeve_expansion(N, nu, x, Y)\n",
    "        a = np.exp(kappa)\n",
    "        u = solve_bvp(a)\n",
    "        Q = h * np.sum(u)\n",
    "        likelihood_ratio = gaussian_independent_pdf(Y) / gaussian_independent_pdf(Y, mean=Y_mean)\n",
    "        Q_sample[m] = float(Q < K) * likelihood_ratio\n",
    "\n",
    "    est = Q_sample.mean()\n",
    "    se = Q_sample.std(ddof=1)\n",
    "    err = 1.96*se/np.sqrt(M)\n",
    "\n",
    "    return est, err"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a6beb25-4216-4521-80d0-f8513350a5ae",
   "metadata": {},
   "source": [
    "### Crude MC estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "id": "d563265b-307d-4e8e-90e8-9575db6b044d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Crude MC estimate = 0.0146\n",
      "Crude MC confidence interval = (0.0122, 0.0170)\n",
      "Crude MC error = 0.0024\n",
      "Crude MC relative error = 0.1610\n"
     ]
    }
   ],
   "source": [
    "Y_mean = np.zeros(N)\n",
    "est, err = simulate(M, Y_mean)\n",
    "\n",
    "print(\"Crude MC estimate = {:.4f}\".format(est))\n",
    "print(\"Crude MC confidence interval = ({:.4f}, {:.4f})\".format(est - err, est + err))\n",
    "print(\"Crude MC error = {:.4f}\".format(err))\n",
    "print(\"Crude MC relative error = {:.4f}\".format(err / est))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b898d54e-e7c2-4e90-a27d-6acfbcc21513",
   "metadata": {},
   "source": [
    "### IS MC estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 254,
   "id": "48d9ab5d-129c-4cff-ae42-44a39fb63806",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "IS MC estimate = 0.0053\n",
      "IS MC normal confidence interval = (-0.0015, 0.0121)\n",
      "IS MC error = 0.0068\n",
      "IS MC relative error = 1.2821\n"
     ]
    }
   ],
   "source": [
    "Y_mean = np.zeros(N)\n",
    "Y_mean[:len(result)] = result\n",
    "est, err = simulate(M, Y_mean)\n",
    "\n",
    "print(\"IS MC estimate = {:.4f}\".format(est))\n",
    "print(\"IS MC normal confidence interval = ({:.4f}, {:.4f})\".format(est - err, est + err))\n",
    "print(\"IS MC error = {:.4f}\".format(err))\n",
    "print(\"IS MC relative error = {:.4f}\".format(err / est))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "770b2cd2-b808-44d6-9bfa-67fc686bce10",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 252,
	"id": "1285d895-5b46-403e-8533-643e9fd28251",
	"metadata": {},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"import matplotlib.pyplot as plt\n",
	"import numpy as np\n",
	"\n",
	"#import p2\n",
	"#import randomfields as rf\n",
	"\n",
	"from scipy import optimize\n",
	"from scipy import sparse\n",
	"from scipy import stats\n",
	"from scipy.sparse.linalg import spsolve"
	]
	},
	{
	"cell_type": "markdown",
	"id": "f304198e-d363-4de7-834d-f3b7a89bd425",
	"metadata": {},
	"source": [
	"---\n",
	"# Homework 1, Problem 2.5.3 (importance sampling for rare events)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "f3ee67ee-a084-48f8-a032-f5c05d100b0c",
	"metadata": {},
	"source": [
	"$$\n",
	"P(Q < K) = P(F(Y1, \\dots, Y_N) < K) = \\mathbb E \\left[ \\mathbb I (Y_1, \\dots, Y_N) \\right]\n",
	"$$\n",
	"where $\\mathbb I$ is the indicator function that $Q<K$ is true.\n",
	"\n",
	"First, we need to find the new distribution for the normal variables $Y_1, \\dots, Y_N$.\n",
	"Because coefficients of the Karhunen--Loeve expansion decay, we will change the distribution only for the first several coefficients, for example, two:\n",
	"\n",
	"$$\n",
	"\\mu_1, \\mu_2 = \\arg \\max_{y_1, y_2} \\mathbb I (y_1, y_2, y_3, \\dots) \\prod_i \\rho (y_i; 1^2)\n",
	"$$\n",
	"where $rho_i$ is the normal distribution for the $i$th variable."
	]
	},
	{
	"cell_type": "markdown",
	"id": "965838ac-d6e1-4152-85f5-1d4a84951746",
	"metadata": {},
	"source": [
	"## Parameters"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 230,
	"id": "08ac143c-5df4-4960-86e5-1c9cc8eaa3a8",
	"metadata": {},
	"outputs": [],
	"source": [
	"N = 40\n",
	"I = 100\n",
	"nu = np.Inf\n",
	"\n",
	"x, h = np.linspace(0, 1, num=I+1, retstep=True)\n",
	"\n",
	"K = -100"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 138,
	"id": "1c6b1278-c605-493b-8b3c-90ab7d174385",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Random number generator.\n",
	"rng = np.random.default_rng()"
	]
	},
	{
	"cell_type": "markdown",
	"id": "83383d99-2d4d-4675-9569-1fb722f9343b",
	"metadata": {},
	"source": [
	"# Auxiliary Functions"
	]
	},
	{
	"cell_type": "markdown",
	"id": "07f87917-939c-448c-b3e7-2c520474ccd9",
	"metadata": {},
	"source": [
	"## Function to compute joint distribution of independent Gaussian RVs"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 180,
	"id": "b859645b-d2de-4d76-9db4-66dd902c68b6",
	"metadata": {},
	"outputs": [],
	"source": [
	"def gaussian_independent_pdf(Y, mean=0):\n",
	" coeff = 1.0 / np.sqrt(2 * np.pi)**len(Y)\n",
	" exp_expr = -np.sum(np.square(Y - mean))/2\n",
	" \n",
	" #coeff=1\n",
	" return coeff * np.exp(exp_expr)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "4a80f83b-d4d0-4e34-af55-af713fdecf7d",
	"metadata": {},
	"source": [
	"## Generator of Karhunen--Loeve expansion of the random lognormal field"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 235,
	"id": "13ec6279-f2f6-44a3-8afc-9de767f28bdd",
	"metadata": {},
	"outputs": [],
	"source": [
	"def compute_karhunen_loeve_expansion(N, nu, x, Y=None):\n",
	" I = len(x)\n",
	"\n",
	" if N > I:\n",
	" raise ValueError(\n",
	" \"Truncation parameter `N` must be not greater than the grid size\"\n",
	" )\n",
	"\n",
	" x12 = (x[0:-1] + x[1:]) / 2.0\n",
	" C = get_matern_cov_matrix(x12, nu=nu)\n",
	" # It is very important here to use function `eigh` that works\n",
	" # with real symmetric matrices.\n",
	" # Otherwise, eigenvalues and eigenvectors can get numerically\n",
	" # imaginary parts, which leads to incorrect decomposition.\n",
	" eigvals, eigvecs = np.linalg.eigh(C)\n",
	" \n",
	" # We need to sort eigvals and eigenvectors in descending order.\n",
	" eigvals = eigvals[::-1]\n",
	" eigvecs = eigvecs[:, ::-1]\n",
	"\n",
	" if Y is None:\n",
	" rng = np.random.default_rng()\n",
	" Y = rng.standard_normal(N)\n",
	" else:\n",
	" assert N == len(Y)\n",
	"\n",
	" kappa = np.zeros(len(x12))\n",
	"\n",
	" # Note that we need to take eigenvalues and eigenvectors\n",
	" # \"from the end\" as they are in the ascending order.\n",
	" for n in range(0, N):\n",
	" kappa += np.sqrt(eigvals[n]) * Y[n] * eigvecs[:, n]\n",
	"\n",
	" return kappa\n",
	"\n",
	"\n",
	"def get_matern_cov_matrix(x, nu=np.Inf, sigma_sq=2, rho=0.1):\n",
	" r\"\"\"Compute Matern covariance matrix.\n",
	"\n",
	" Parameters\n",
	" ----------\n",
	" x : ndarray (N, )\n",
	" Input array with spatial points.\n",
	" nu : float (one of [0.5, 1.5, 2.5, np.Inf])\n",
	" Order of the Matern function.\n",
	" sigma_sq :\n",
	" Parameter :math:`\\sigma^2`.\n",
	" rho :\n",
	" Parameter :math:`\\rho`.\n",
	"\n",
	" Returns\n",
	" -------\n",
	" C : ndarray (N, N)\n",
	" Covariance matrix evaluated at all points from `x`.\n",
	"\n",
	" \"\"\"\n",
	" msg_nu = \"Only four Matern covariance functions are implemented\"\n",
	" if nu not in [0.5, 1.5, 2.5, np.Inf]:\n",
	" raise ValueError(msg_nu)\n",
	"\n",
	" sigma_sq, rho = sigma_sq, rho\n",
	"\n",
	" # Matrix of distances between all points in `x`.\n",
	" # Because `x` is 1D, distances are just absolute values of difference.\n",
	" D = np.abs(np.subtract.outer(x, x))\n",
	"\n",
	" if nu == 0.5:\n",
	" return sigma_sq * np.exp(-D / rho)\n",
	" elif nu == 1.5:\n",
	" z = np.sqrt(3) * D / rho\n",
	" return sigma_sq * (1 + z) * np.exp(-z)\n",
	" elif nu == 2.5:\n",
	" a = np.sqrt(5) * D / rho\n",
	" b = np.sqrt(3) * D**2 / rho\n",
	" return sigma_sq * (1 + a + b) * np.exp(-a)\n",
	" elif nu == np.Inf:\n",
	" return sigma_sq * np.exp(-D*2 / (2rho**2))\n",
	" else:\n",
	" # Should not get here actually in any case.\n",
	" raise ValueError(msg_nu)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "2fbffe15-7fbd-485d-967f-3963c2110610",
	"metadata": {},
	"source": [
	"## Function to solve boundary-value problem for a given random field"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 250,
	"id": "eedd3354-a0c8-4fb9-9a67-fe154697c021",
	"metadata": {},
	"outputs": [],
	"source": [
	"def solve_bvp(a, return_grid=False):\n",
	" r\"\"\"Solve boundary-value problem for given coefficients `a`.\n",
	"\n",
	" Boundary-value problem is\n",
	" \\begin{align}\n",
	" -(a u')' = 4\\pi \\cos(2\\pi x),\n",
	" u(0) = u(1) = 0.\n",
	" \\begin{align}\n",
	"\n",
	" Parameters\n",
	" ----------\n",
	" a : ndarray of shape (I,)\n",
	" Diffusion coefficient evaluated at the center points of the grid.\n",
	" return_grid : bool (optional, default False)\n",
	" If True, return also the grid and grid step size.\n",
	"\n",
	" Returns\n",
	" -------\n",
	" u : ndarray of shape (I+1, )\n",
	" Solution evaluated at the center points of the grid.\n",
	" x : ndarray of shape (I+1, )\n",
	" Uniform grid 0 = x_0 < x_1 < \\dots < x_I = 1\n",
	" h : float\n",
	" Grid step size.\n",
	" \"\"\"\n",
	" # Grid size.\n",
	" I = len(a)\n",
	" # Build grid with points x0, x1, ..., x_I and step size 1/I.\n",
	" x, h = np.linspace(0, 1, num=I+1, retstep=True)\n",
	"\n",
	" # 1. Build coefficient matrix in sparse format.\n",
	" e1 = -a[0:-1] / h**2\n",
	" e2 = (a[0:-1] + a[1:]) / h**2\n",
	" e3 = -a[1:] / h**2\n",
	" A = sparse.spdiags([e1, e2, e3], [-1, 0, 1], I-1, I-1)\n",
	"\n",
	" # 2. Build right-hand side (source term).\n",
	" F = 4np.pi2 np.cos(2np.pix[1:-1])\n",
	"\n",
	" # 3. Solve the tridiagonal system of linear equations.\n",
	" # It would be nicer, if matrix A and RHS vector F include\n",
	" # the boundary conditions explicitly, but here we just\n",
	" # explicitly set u_0 and u_I to zero as required by BCs.\n",
	" u = np.zeros(I+1)\n",
	" A_csc = A.asformat(\"csc\")\n",
	" u[1:-1] = spsolve(A_csc, F)\n",
	"\n",
	" if return_grid:\n",
	" return u, x, h\n",
	" else:\n",
	" return u"
	]
	},
	{
	"cell_type": "markdown",
	"id": "ea5c7cf4-63ac-46e7-9e4a-6af3c7ccace3",
	"metadata": {},
	"source": [
	"# Look for shift of distribution of gaussian RVs"
	]
	},
	{
	"cell_type": "markdown",
	"id": "cb1caa67-bb91-41be-8935-89a07b393f8e",
	"metadata": {},
	"source": [
	"Objective function for finding shifts:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 238,
	"id": "19024fef-c4f3-4409-927d-0894fc2d7b0c",
	"metadata": {},
	"outputs": [],
	"source": [
	"def objective_fn(y, N, nu, x, h, K):\n",
	" y_full = np.zeros(N)\n",
	" y_full[:len(y)] = y\n",
	" \n",
	" kappa = rf.compute_karhunen_loeve_expansion(N, nu, x, y_full)\n",
	" a = np.exp(kappa)\n",
	" u = p2.solve_bvp(a)\n",
	" Q = h * np.sum(u)\n",
	" \n",
	" prod_rho = gaussian_independent_pdf(y_full, mean=0)\n",
	" \n",
	" result = -float(Q < K) * prod_rho\n",
	" #print(\n",
	" # \"Q = {:7.2e}, prod_rho = {:7.2e}, result = {:7.2e}\".format(\n",
	" # Q, prod_rho, result\n",
	" # )\n",
	" #)\n",
	" return result"
	]
	},
	{
	"cell_type": "markdown",
	"id": "d1143509-9086-49a9-a500-d426689182be",
	"metadata": {},
	"source": [
	"We compute values of the objective function by random sampling of the inputs:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 239,
	"id": "110e2487-16e1-442b-a08f-316fc5135b49",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Minimal value of the objective function: -1.67e-22\n",
	"Initial guess: [ 3.17704955 -4.08460683]\n"
	]
	}
	],
	"source": [
	"initial_guess_list = []\n",
	"objective_fun_values = []\n",
	"\n",
	"for __ in range(100):\n",
	" y0 = rng.uniform(low=-5, high=5, size=2)\n",
	" initial_guess_list.append(y0)\n",
	" f = objective_fn(y0, N, nu, x, h, K)\n",
	" objective_fun_values.append(f)\n",
	" \n",
	"i_min = np.argmin(objective_fun_values)\n",
	"initial_guess = initial_guess_list[i_min]\n",
	"\n",
	"print(\"Minimal value of the objective function: {:.2e}\".format(objective_fun_values[i_min]))\n",
	"print(\"Initial guess: {}\".format(initial_guess))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 242,
	"id": "64ea566a-ff00-485a-bd6e-da01f4f145b7",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Optimization terminated successfully.\n",
	" Current function value: -0.000000\n",
	" Iterations: 68\n",
	" Function evaluations: 131\n",
	"***\n",
	"New means for Gaussian RVs: [ 3.63806705 -3.21070776]\n"
	]
	}
	],
	"source": [
	"result = optimize.fmin(objective_fn, initial_guess, args=(N, nu, x, h, K))\n",
	"print(\"***\")\n",
	"print(\"New means for Gaussian RVs: {}\".format(result))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "25d66175-9b06-4809-a513-848028132e89",
	"metadata": {},
	"source": [
	"The results of the optimization will be the new means of the random variables $Y_1, Y_2, \\dots$. All other random variables remain standard normal RVs."
	]
	},
	{
	"cell_type": "markdown",
	"id": "9fa8261d-b3b4-436e-b0be-542d474da261",
	"metadata": {},
	"source": [
	"## \"True\" estimate with huge number of Monte Carlo samples"
	]
	},
	{
	"cell_type": "markdown",
	"id": "4122368a-12b8-4152-9db4-f8967552d488",
	"metadata": {},
	"source": [
	"This should be run to estimate \"true\" value by using huge number of MC samples"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "454a1660-3ce1-49c5-8e8f-e402a1679a52",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Takes a lot of time\n",
	"Y_mean = np.zeros(N)\n",
	"est, err = simulate(10**7, Y_mean)\n",
	"\n",
	"print(\"Overkill crude MC estimate = {:.4f}\".format(est))\n",
	"print(\"Overkill crude MC confidence interval = ({:.4f}, {:.4f})\".format(est - err, est + err))\n",
	"print(\"Overkill crude MC error = {:.4f}\".format(err))\n",
	"print(\"Overkill crude MC relative error = {:.4f}\".format(err / est))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "a10b416c-a852-45a6-be42-a7a67eaa7207",
	"metadata": {
	"tags": []
	},
	"source": [
	"# Importance Sampling Monte Carlo estimator"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 243,
	"id": "20184f6b-5720-4f94-a805-cbabbd7a0faf",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Number of Monte Carlo samples\n",
	"M = 10**4"
	]
	},
	{
	"cell_type": "markdown",
	"id": "a1cb9851-1124-4fb2-a8ad-ca2a56b4e5ec",
	"metadata": {},
	"source": [
	"Compute estimate of the probability that the quantity of interest is less than `K` using `M` samples. If `Y_mean` is all zeros, than this corresponds to crude Monte Carlo."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 248,
	"id": "b296533d-27b9-4e83-9ea4-bc802b3ed096",
	"metadata": {},
	"outputs": [],
	"source": [
	"def simulate(M, Y_mean):\n",
	" \"\"\"Compute Importance Sampling Monte Carlo and the corresponding normal CI error.\"\"\"\n",
	" Q_sample = np.empty(M)\n",
	"\n",
	" for m in range(M):\n",
	" Y = Y_mean + rng.standard_normal(N)\n",
	" kappa = compute_karhunen_loeve_expansion(N, nu, x, Y)\n",
	" a = np.exp(kappa)\n",
	" u = solve_bvp(a)\n",
	" Q = h * np.sum(u)\n",
	" likelihood_ratio = gaussian_independent_pdf(Y) / gaussian_independent_pdf(Y, mean=Y_mean)\n",
	" Q_sample[m] = float(Q < K) * likelihood_ratio\n",
	"\n",
	" est = Q_sample.mean()\n",
	" se = Q_sample.std(ddof=1)\n",
	" err = 1.96*se/np.sqrt(M)\n",
	"\n",
	" return est, err"
	]
	},
	{
	"cell_type": "markdown",
	"id": "4a6beb25-4216-4521-80d0-f8513350a5ae",
	"metadata": {},
	"source": [
	"### Crude MC estimator"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 253,
	"id": "d563265b-307d-4e8e-90e8-9575db6b044d",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Crude MC estimate = 0.0146\n",
	"Crude MC confidence interval = (0.0122, 0.0170)\n",
	"Crude MC error = 0.0024\n",
	"Crude MC relative error = 0.1610\n"
	]
	}
	],
	"source": [
	"Y_mean = np.zeros(N)\n",
	"est, err = simulate(M, Y_mean)\n",
	"\n",
	"print(\"Crude MC estimate = {:.4f}\".format(est))\n",
	"print(\"Crude MC confidence interval = ({:.4f}, {:.4f})\".format(est - err, est + err))\n",
	"print(\"Crude MC error = {:.4f}\".format(err))\n",
	"print(\"Crude MC relative error = {:.4f}\".format(err / est))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "b898d54e-e7c2-4e90-a27d-6acfbcc21513",
	"metadata": {},
	"source": [
	"### IS MC estimator"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 254,
	"id": "48d9ab5d-129c-4cff-ae42-44a39fb63806",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"IS MC estimate = 0.0053\n",
	"IS MC normal confidence interval = (-0.0015, 0.0121)\n",
	"IS MC error = 0.0068\n",
	"IS MC relative error = 1.2821\n"
	]
	}
	],
	"source": [
	"Y_mean = np.zeros(N)\n",
	"Y_mean[:len(result)] = result\n",
	"est, err = simulate(M, Y_mean)\n",
	"\n",
	"print(\"IS MC estimate = {:.4f}\".format(est))\n",
	"print(\"IS MC normal confidence interval = ({:.4f}, {:.4f})\".format(est - err, est + err))\n",
	"print(\"IS MC error = {:.4f}\".format(err))\n",
	"print(\"IS MC relative error = {:.4f}\".format(err / est))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "770b2cd2-b808-44d6-9bfa-67fc686bce10",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}