plot_sparsity (generic function with 1 method)

Difference-of-Convex Algorithm with Frank-Wolfe

This example shows the optimization of a difference-of-convex problem of the form:

\[min\limits_{x \in \mathcal{X}} \phi(x) = f(x) - g(x)\]

with $f$, $g$ convex functions with access to subgradients and $f$ smooth.

The DCA-FW algorithm constructs local convex models of $\phi$ by linearizing $g$ and approximately optimizes them with FW. It is a local method that converges to a stationary point

using FrankWolfe
using LinearAlgebra
using Random
using SparseArrays
using StableRNGs
using Random

The convex functions $f$, $g$ will be generated as random convex quadratics:

\[\begin{align} f(x) & = \frac12 x^\top A x + a^\top x + c \\ g(x) & = \frac12 x^\top B x + b^\top x + d \end{align}\]

Setting up the problem functions and data

const n = 500  # Reduced dimension

# Generate random positive definite matrices to ensure convexity
function generate_problem_data(rng)
    A_raw = randn(rng, n, n)
    A_raw ./= opnorm(A_raw)
    A = A_raw' * A_raw + 0.1 * I

    B_raw = randn(rng, n, n)
    B_raw ./= opnorm(B_raw)
    B = B_raw' * B_raw + 0.1 * I

    a = randn(rng, n)
    b = randn(rng, n)

    c = randn(rng)
    d = -359434.0

    return A, B, a, b, c, d
end

const rng = StableRNGs.StableRNG(1)
const A, B, a, b, c, d = generate_problem_data(rng)
([0.3664716461545843 0.007434201971305423 … 0.004451785375884827 0.015514411527022787; 0.007434201971305423 0.34822813947414344 … 0.013654308840956007 -0.025910486878046375; … ; 0.004451785375884827 0.013654308840956007 … 0.3806261404722967 0.0118969109481474; 0.015514411527022787 -0.025910486878046375 … 0.0118969109481474 0.34299337924542433], [0.36858235928505056 0.0036477368657639084 … -0.014747118492049596 -0.009390758842324287; 0.0036477368657639084 0.36832277324228413 … -0.004708423922308739 0.019319726914150886; … ; -0.014747118492049596 -0.004708423922308739 … 0.3718244447067395 -0.0070589804508730414; -0.009390758842324287 0.019319726914150886 … -0.0070589804508730414 0.36976970853592206], [-0.5056985849854947, 1.8254695540945816, 1.1686948924955953, 0.025865043318803068, 0.7618207158940428, -1.9599078996573032, -0.28701522778989075, -1.210555966368952, -0.3476555815275666, 0.36622748810084865  …  -1.2418975260665193, -2.042717239142079, -0.977247840514734, -0.9480533072182336, 0.2354971732415276, 1.1489207372892907, 0.26713737924876685, -0.11111819389928967, 1.2106291210589706, -0.36135542009200855], [-1.7352231531269595, -1.0889348569606767, -1.9696069675033783, -0.6944538146281424, 0.12594479576421444, 2.43890963984164, -0.3332102207771672, -1.598587279152474, 0.8970898814075347, 0.7441204116527911  …  -0.07885951683533911, 0.921491279700674, 0.8755353895807455, 0.4142785039140312, 1.0607172005644177, -0.9845905380360871, 0.7603797972500105, 1.0314248598279463, -1.0727076372535254, -0.1591576537893938], -0.6622940864130684, -359434.0)

We can now define the two functions

function f(x)
    return 0.5 * FrankWolfe.fast_dot(x, A, x) + dot(a, x) + c
end

function grad_f!(storage, x)
    mul!(storage, A, x)
    storage .+= a
    return nothing
end

function g(x)
    return 0.5 * FrankWolfe.fast_dot(x, B, x) + dot(b, x) + d
end

function grad_g!(storage, x)
    mul!(storage, B, x)
    storage .+= b
    return nothing
end

# True objective function for verification
# It is not needed by the solver
function phi(x)
    return f(x) - g(x)
end

lmo = FrankWolfe.KSparseLMO(5, 1000.0)

x0 = FrankWolfe.compute_extreme_point(lmo, randn(n))


res_dca =FrankWolfe.dca_fw(
    f,
    grad_f!,
    g,
    grad_g!,
    lmo,
    copy(x0),
    max_iteration=200, # Outer iterations
    max_inner_iteration=10000, # Inner iterations
    epsilon=1e-5, # Tolerance for DCA gap
    line_search=FrankWolfe.Secant(),
    verbose=true,
    trajectory=true,
    verbose_inner=false,
    print_iter=10,
    use_corrective_fw=true,
    use_dca_early_stopping=true,
    grad_f_workspace=collect(x0),
    grad_g_workspace=collect(x0),
)


res_boost = FrankWolfe.dca_fw(
    f,
    grad_f!,
    g,
    grad_g!,
    lmo,
    copy(x0),
    boosted=true,
    max_iteration=200, # Outer iterations
    max_inner_iteration=10000, # Inner iterations
    epsilon=1e-5, # Tolerance for DCA gap
    line_search=FrankWolfe.Secant(),
    verbose=true,
    trajectory=true,
    verbose_inner=false,
    print_iter=10,
    use_corrective_fw=true,
    use_dca_early_stopping=true,
    grad_f_workspace=collect(x0),
    grad_g_workspace=collect(x0),
)
(x =   [85 ]  =  -1000.0
  [165]  =  1000.0
  [172]  =  -1000.0
  [189]  =  -495.797
  [284]  =  509.933
  [443]  =  -994.27, primal = -28559.07967711636, dca_gap = 0.0017960158386358671, iterations = 200, status = FrankWolfe.STATUS_MAXITER, traj_data = Any[(1, 294875.31210056937, 159462.77113257998, 135412.5409679894, 0.000247721), (2, 233608.78152744513, 171884.65988862887, 61724.12163881626, 0.001330064), (3, 193747.43895855575, 152453.9253774089, 41293.51358114686, 0.001947172), (4, 157226.3824985706, 125317.55242508456, 31908.830073486042, 0.002553019), (5, 121290.36026926019, 85228.86009675784, 36061.50017250235, 0.00312393), (6, 94135.52046240523, 60922.44884054625, 33213.07162185898, 0.003903731), (7, 72598.53313159908, 53691.12327595457, 18907.40985564451, 0.004492245), (8, 50442.87391716789, 33321.68835612556, 17121.18556104233, 0.004953964), (9, 26748.452816429315, 5460.808603641635, 21287.64421278768, 0.005468611), (10, 6245.563844720367, -20035.881602236528, 26281.445446956895, 0.005918698)  …  (192, -28559.07959273737, -28559.0845594115, 0.004966674131082982, 8.239527114), (193, -28559.079606098705, -28559.084702978493, 0.005096879787743092, 8.38390711), (194, -28559.0796185598, -28559.082333950926, 0.0027153911240702655, 8.526388058), (195, -28559.07963019691, -28559.0885625935, 0.008932396591717406, 8.7835616), (196, -28559.079641020508, -28559.08812539701, 0.008484376502110536, 8.895897083), (197, -28559.07965102105, -28559.084049075827, 0.004398054777993821, 9.009495127), (198, -28559.079660346964, -28559.084090260923, 0.004429913957437748, 9.129968078), (199, -28559.07966902794, -28559.08369788131, 0.0040288533726879905, 9.268210821), (200, -28559.07967711636, -28559.0814731322, 0.0017960158386358671, 9.402283156), (200, -28559.07967711636, -28559.0814731322, 0.0017960158386358671, 9.540315217)])

Plotting the resulting trajectory

We modify the y axis to highlight that we are plotting the DCA gap, not the FW gap

data = [res_dca.traj_data, res_boost.traj_data]
label = ["DCA-FW", "DCA-FW-B"]
p_res = plot_trajectories(data, label, marker_shapes=[:o, :x])
ylabel!(p_res.subplots[3], "DCA gap")
Example block output
display(p_res)
┌ Warning: No strict ticks found
└ @ PlotUtils ~/.julia/packages/PlotUtils/HX80C/src/ticks.jl:194
┌ Warning: No strict ticks found
└ @ PlotUtils ~/.julia/packages/PlotUtils/HX80C/src/ticks.jl:194
┌ Warning: No strict ticks found
└ @ PlotUtils ~/.julia/packages/PlotUtils/HX80C/src/ticks.jl:194
┌ Warning: Invalid negative or zero value -15888.95527970884 found at series index 2 for log10 based yscale
└ @ Plots ~/.julia/packages/Plots/GIume/src/utils.jl:105
┌ Warning: Invalid negative or zero value -6268.8743044039 found at series index 11 for log10 based yscale
└ @ Plots ~/.julia/packages/Plots/GIume/src/utils.jl:105
┌ Warning: No strict ticks found
└ @ PlotUtils ~/.julia/packages/PlotUtils/HX80C/src/ticks.jl:194
┌ Warning: Invalid negative or zero value -15888.95527970884 found at series index 2 for log10 based yscale
└ @ Plots ~/.julia/packages/Plots/GIume/src/utils.jl:105
┌ Warning: Invalid negative or zero value -6268.8743044039 found at series index 11 for log10 based yscale
└ @ Plots ~/.julia/packages/Plots/GIume/src/utils.jl:105

This page was generated using Literate.jl.