plot_sparsity (generic function with 1 method)

Difference-of-Convex Algorithm with Frank-Wolfe

This example shows the optimization of a difference-of-convex problem of the form:

\[min\limits_{x \in \mathcal{X}} \phi(x) = f(x) - g(x)\]

with $f$, $g$ convex functions with access to subgradients and $f$ smooth.

The DCA-FW algorithm constructs local convex models of $\phi$ by linearizing $g$ and approximately optimizes them with FW. It is a local method that converges to a stationary point

using FrankWolfe
using LinearAlgebra
using Random
using SparseArrays
using StableRNGs
using Random

The convex functions $f$, $g$ will be generated as random convex quadratics:

\[\begin{align} f(x) & = \frac12 x^\top A x + a^\top x + c \\ g(x) & = \frac12 x^\top B x + b^\top x + d \end{align}\]

Setting up the problem functions and data

const n = 500  # Reduced dimension

# Generate random positive definite matrices to ensure convexity
function generate_problem_data(rng)
    A_raw = randn(rng, n, n)
    A_raw ./= opnorm(A_raw)
    A = A_raw' * A_raw + 0.1 * I

    B_raw = randn(rng, n, n)
    B_raw ./= opnorm(B_raw)
    B = B_raw' * B_raw + 0.1 * I

    a = randn(rng, n)
    b = randn(rng, n)

    c = randn(rng)
    d = -359434.0

    return A, B, a, b, c, d
end

const rng = StableRNGs.StableRNG(1)
const A, B, a, b, c, d = generate_problem_data(rng)

([0.36647164615458394 0.007434201971305419 … 0.004451785375884825 0.015514411527022771; 0.007434201971305419 0.3482281394741432 … 0.013654308840956 -0.025910486878046358; … ; 0.004451785375884825 0.013654308840956 … 0.3806261404722966 0.011896910948147398; 0.015514411527022771 -0.025910486878046358 … 0.011896910948147398 0.34299337924542406], [0.36858235928505045 0.003647736865763906 … -0.01474711849204959 -0.00939075884232428; 0.003647736865763906 0.3683227732422839 … -0.00470842392230873 0.01931972691415089; … ; -0.01474711849204959 -0.00470842392230873 … 0.37182444470673925 -0.007058980450873037; -0.00939075884232428 0.01931972691415089 … -0.007058980450873037 0.36976970853592184], [-0.5056985849854947, 1.8254695540945816, 1.1686948924955953, 0.025865043318803068, 0.7618207158940428, -1.9599078996573032, -0.28701522778989075, -1.210555966368952, -0.3476555815275666, 0.36622748810084865  …  -1.2418975260665193, -2.042717239142079, -0.977247840514734, -0.9480533072182336, 0.2354971732415276, 1.1489207372892907, 0.26713737924876685, -0.11111819389928967, 1.2106291210589706, -0.36135542009200855], [-1.7352231531269595, -1.0889348569606767, -1.9696069675033783, -0.6944538146281424, 0.12594479576421444, 2.43890963984164, -0.3332102207771672, -1.598587279152474, 0.8970898814075347, 0.7441204116527911  …  -0.07885951683533911, 0.921491279700674, 0.8755353895807455, 0.4142785039140312, 1.0607172005644177, -0.9845905380360871, 0.7603797972500105, 1.0314248598279463, -1.0727076372535254, -0.1591576537893938], -0.6622940864130684, -359434.0)

We can now define the two functions

function f(x)
    return 0.5 * FrankWolfe.fast_dot(x, A, x) + dot(a, x) + c
end

function grad_f!(storage, x)
    mul!(storage, A, x)
    storage .+= a
    return nothing
end

function g(x)
    return 0.5 * FrankWolfe.fast_dot(x, B, x) + dot(b, x) + d
end

function grad_g!(storage, x)
    mul!(storage, B, x)
    storage .+= b
    return nothing
end

# True objective function for verification
# It is not needed by the solver
function phi(x)
    return f(x) - g(x)
end

lmo = FrankWolfe.KSparseLMO(5, 1000.0)

x0 = FrankWolfe.compute_extreme_point(lmo, randn(n))


x_final, primal_final, dca_gap_final, iterations, status, traj_data = FrankWolfe.dca_fw(
    f,
    grad_f!,
    g,
    grad_g!,
    lmo,
    copy(x0),
    max_iteration=200, # Outer iterations
    max_inner_iteration=10000, # Inner iterations
    epsilon=1e-5, # Tolerance for DCA gap
    line_search=FrankWolfe.Secant(),
    verbose=true,
    trajectory=true,
    verbose_inner=false,
    print_iter=10,
    use_corrective_fw=true,
    use_dca_early_stopping=true,
    grad_f_workspace=collect(x0),
    grad_g_workspace=collect(x0),
)


res_boost = FrankWolfe.dca_fw(
    f,
    grad_f!,
    g,
    grad_g!,
    lmo,
    copy(x0),
    boosted=true,
    max_iteration=200, # Outer iterations
    max_inner_iteration=10000, # Inner iterations
    epsilon=1e-5, # Tolerance for DCA gap
    line_search=FrankWolfe.Secant(),
    verbose=true,
    trajectory=true,
    verbose_inner=false,
    print_iter=10,
    use_corrective_fw=true,
    use_dca_early_stopping=true,
    grad_f_workspace=collect(x0),
    grad_g_workspace=collect(x0),
)

(x =   [192]  =  -1000.0
  [223]  =  -1000.0
  [300]  =  -1000.0
  [312]  =  -978.003
  [369]  =  1000.0
  [459]  =  -21.9971, primal = 53053.62668955501, dca_gap = 9.32747434490011e-6, iterations = 117, status = FrankWolfe.STATUS_OPTIMAL, traj_data = Any[(1, 308788.7947776787, 202045.62131051294, 106743.17346716575, 0.000192932), (2, 257652.1679852578, 202229.806066292, 55422.361918965806, 0.001204561), (3, 228635.67581443582, 198470.9928750624, 30164.682939373422, 0.001888565), (4, 206623.15386243968, 184134.16424079204, 22488.989621647623, 0.002827287), (5, 190083.81209781097, 172040.82351478876, 18042.988583022212, 0.003733018), (6, 175020.76896441734, 158953.8120424718, 16066.956921945532, 0.004647344), (7, 161691.9474576451, 148959.70774898783, 12732.23970865726, 0.005674713), (8, 147637.6804228191, 132699.36048078243, 14938.319942036662, 0.006579862), (9, 130296.59131469566, 110708.71903961228, 19587.872275083384, 0.007555524), (10, 108517.26982113987, 83437.39859402608, 25079.871227113792, 0.008398807)  …  (109, 53053.62694682204, 53053.6269159784, 3.084364016103791e-5, 0.038701543), (110, 53053.62689561269, 53053.62686905144, 2.6561246613709955e-5, 0.03892358), (111, 53053.626851514215, 53053.62682864159, 2.2872626232128823e-5, 0.039324783), (112, 53053.62681353849, 53053.62679384178, 1.9696705749083776e-5, 0.039539997), (113, 53053.626780836144, 53053.62676387415, 1.696199342404725e-5, 0.039810264), (114, 53053.626752674114, 53053.626738067265, 1.4606848708353937e-5, 0.040083287), (115, 53053.626728423056, 53053.62671584468, 1.2578376299643423e-5, 0.040313719), (116, 53053.62670753931, 53053.626696707746, 1.083156575987232e-5, 0.040534925), (117, 53053.62668955501, 53053.626680227535, 9.32747434490011e-6, 0.040752353), (117, 53053.62668955501, 53053.626680227535, 9.32747434490011e-6, 0.040988286)])

Plotting the resulting trajectory

We modify the y axis to highlight that we are plotting the DCA gap, not the FW gap

data = [traj_data, res_boost[6].traj_data]
label = ["DCA-FW", "DCA-FW-B"]
p_res = plot_trajectories(data, label, marker_shapes=[:o, :x])
ylabel!(p_res.subplots[3], "DCA gap")

display(p_res)

This page was generated using Literate.jl.