import FrankWolfe; include(joinpath(dirname(pathof(FrankWolfe)), "../examples/plot_utils.jl")) # hideDifference-of-Convex Algorithm with Frank-Wolfe
This example shows the optimization of a difference-of-convex problem of the form:
\[min\limits_{x \in \mathcal{X}} \phi(x) = f(x) - g(x)\]
with $f$, $g$ convex functions with access to subgradients and $f$ smooth.
The DCA-FW algorithm constructs local convex models of $\phi$ by linearizing $g$ and approximately optimizes them with FW. It is a local method that converges to a stationary point
using FrankWolfe
using LinearAlgebra
using Random
using SparseArrays
using StableRNGs
using RandomThe convex functions $f$, $g$ will be generated as random convex quadratics:
\[\begin{align} f(x) & = \frac12 x^\top A x + a^\top x + c \\ g(x) & = \frac12 x^\top B x + b^\top x + d \end{align}\]
Setting up the problem functions and data
const n = 500 # Reduced dimension
# Generate random positive definite matrices to ensure convexity
function generate_problem_data(rng)
A_raw = randn(rng, n, n)
A_raw ./= opnorm(A_raw)
A = A_raw' * A_raw + 0.1 * I
B_raw = randn(rng, n, n)
B_raw ./= opnorm(B_raw)
B = B_raw' * B_raw + 0.1 * I
a = randn(rng, n)
b = randn(rng, n)
c = randn(rng)
d = -359434.0
return A, B, a, b, c, d
end
const rng = StableRNGs.StableRNG(1)
const A, B, a, b, c, d = generate_problem_data(rng)([0.3664716461545837 0.007434201971305414 … 0.00445178537588482 0.01551441152702277; 0.007434201971305414 0.348228139474143 … 0.013654308840956 -0.025910486878046337; … ; 0.00445178537588482 0.013654308840956 … 0.38062614047229637 0.011896910948147394; 0.01551441152702277 -0.025910486878046337 … 0.011896910948147394 0.34299337924542395], [0.3685823592850501 0.0036477368657639037 … -0.01474711849204956 -0.009390758842324275; 0.0036477368657639037 0.36832277324228346 … -0.004708423922308731 0.019319726914150855; … ; -0.01474711849204956 -0.004708423922308731 … 0.3718244447067389 -0.007058980450873029; -0.009390758842324275 0.019319726914150855 … -0.007058980450873029 0.3697697085359215], [-0.5056985849854947, 1.8254695540945816, 1.1686948924955953, 0.025865043318803068, 0.7618207158940428, -1.9599078996573032, -0.28701522778989075, -1.210555966368952, -0.3476555815275666, 0.36622748810084865 … -1.2418975260665193, -2.042717239142079, -0.977247840514734, -0.9480533072182336, 0.2354971732415276, 1.1489207372892907, 0.26713737924876685, -0.11111819389928967, 1.2106291210589706, -0.36135542009200855], [-1.7352231531269595, -1.0889348569606767, -1.9696069675033783, -0.6944538146281424, 0.12594479576421444, 2.43890963984164, -0.3332102207771672, -1.598587279152474, 0.8970898814075347, 0.7441204116527911 … -0.07885951683533911, 0.921491279700674, 0.8755353895807455, 0.4142785039140312, 1.0607172005644177, -0.9845905380360871, 0.7603797972500105, 1.0314248598279463, -1.0727076372535254, -0.1591576537893938], -0.6622940864130684, -359434.0)We can now define the two functions
function f(x)
return 0.5 * FrankWolfe.fast_dot(x, A, x) + dot(a, x) + c
end
function grad_f!(storage, x)
mul!(storage, A, x)
storage .+= a
return nothing
end
function g(x)
return 0.5 * FrankWolfe.fast_dot(x, B, x) + dot(b, x) + d
end
function grad_g!(storage, x)
mul!(storage, B, x)
storage .+= b
return nothing
end
# True objective function for verification
# It is not needed by the solver
function phi(x)
return f(x) - g(x)
end
lmo = FrankWolfe.KSparseLMO(5, 1000.0)
x0 = FrankWolfe.compute_extreme_point(lmo, randn(n))
x_final, primal_final, dca_gap_final, iterations, status, traj_data = FrankWolfe.dca_fw(
f,
grad_f!,
g,
grad_g!,
lmo,
copy(x0),
max_iteration=200, # Outer iterations
max_inner_iteration=10000, # Inner iterations
epsilon=1e-5, # Tolerance for DCA gap
line_search=FrankWolfe.Secant(),
verbose=true,
trajectory=true,
verbose_inner=false,
print_iter=10,
use_corrective_fw=true,
use_dca_early_stopping=true,
grad_f_workspace=collect(x0),
grad_g_workspace=collect(x0),
)
res_boost = FrankWolfe.dca_fw(
f,
grad_f!,
g,
grad_g!,
lmo,
copy(x0),
boosted=true,
max_iteration=200, # Outer iterations
max_inner_iteration=10000, # Inner iterations
epsilon=1e-5, # Tolerance for DCA gap
line_search=FrankWolfe.Secant(),
verbose=true,
trajectory=true,
verbose_inner=false,
print_iter=10,
use_corrective_fw=true,
use_dca_early_stopping=true,
grad_f_workspace=collect(x0),
grad_g_workspace=collect(x0),
)(x = [107] = -1000.0
[150] = -1000.0
[155] = 267.803
[234] = 1000.0
[255] = 732.197
[490] = -1000.0, primal = 84959.58576838858, dca_gap = 9.256948528038785e-6, iterations = 114, status = FrankWolfe.STATUS_OPTIMAL, traj_data = Any[(1, 324819.58465171105, 216109.38505388398, 108710.19959782707, 0.000168631), (2, 276258.7087047262, 228201.19975238593, 48057.50895234025, 0.001811075), (3, 247908.58223472937, 216959.5065712229, 30949.07566350646, 0.00303403), (4, 226382.41568631888, 204110.86335754645, 22271.552328772428, 0.004279504), (5, 206863.68211304292, 187232.8087131643, 19630.873399878616, 0.005655725), (6, 189296.6060845591, 168258.49561746756, 21038.11046709155, 0.006902004), (7, 175305.93138720526, 164640.38668960857, 10665.544697596684, 0.007968991), (8, 160599.70041606866, 148966.75547422294, 11632.944941845737, 0.009170484), (9, 143729.82416766224, 127319.62467798722, 16410.19948967502, 0.010223529), (10, 127309.92309989344, 109245.16240058762, 18064.760699305818, 0.010974521) … (106, 84959.58600901137, 84959.58598183199, 2.7179379685549065e-5, 0.042571731), (107, 84959.58596304606, 84959.58593929058, 2.375547046540305e-5, 0.04284524), (108, 84959.58592287137, 84959.58590210788, 2.0763494831044227e-5, 0.043122807), (109, 84959.58588775632, 84959.58586960821, 1.8148100934922695e-5, 0.043389391), (110, 84959.58585706505, 84959.5858412031, 1.5861955034779385e-5, 0.043702869), (111, 84959.58583023981, 84959.58581637603, 1.3863791537005454e-5, 0.04414435), (112, 84959.58580679295, 84959.58579467502, 1.211794005939737e-5, 0.044477848), (113, 84959.5857862999, 84959.58577570907, 1.0590824371587815e-5, 0.044784294), (114, 84959.58576838858, 84959.58575913163, 9.256948528038785e-6, 0.045084358), (114, 84959.58576838858, 84959.58575913163, 9.256948528038785e-6, 0.045364609)])Plotting the resulting trajectory
We modify the y axis to highlight that we are plotting the DCA gap, not the FW gap
data = [traj_data, res_boost[6].traj_data]
label = ["DCA-FW", "DCA-FW-B"]
p_res = plot_trajectories(data, label, marker_shapes=[:o, :x])
ylabel!(p_res.subplots[3], "DCA gap")display(p_res)This page was generated using Literate.jl.