In[]:=
deploy
Mon 20 May 2019 19:53:48

Util

In[]:=
(* utils *)​​movingAvg[ys_,avg_]:=Module[{},​​xs=Range@Length@ys;​​Transpose@{MovingAverage[xs,avg],MovingAverage[ys,avg]}​​]​​​​(* column vectorize, following Magnus, 1999 *)​​vec[W_]:=Transpose@{Flatten@Transpose[W]};​​unvec[Wf_, rows_]:=Transpose[Flatten/@Partition[Wf,rows]];​​toscalar[v_]:=Block[{t},​​t=Flatten@v;​​Assert[Length[t]1, "scalar assert"];​​First@t​​];​​​​v2c[c_]:=Transpose[{c}] (* turns vector to column matrix *)​​v2r[c_]:={c} (* turns vector to row matrix *)​​c2v[c_]:=Flatten[c] (* turns column matrix into vector *)​​​​(* dot product that works on matrices *)​​Unprotect[CircleDot];​​DotProduct[a_,b_]:=Inner[Times,Flatten@a,Flatten@b,Plus];​​CircleDot=DotProduct;​​​​(* deploys with canonical name *)​​deploy:=Module[{notebookFn, parentDir,cloudFn,result},​​Print[DateString[]];​​notebookFn=FileNameSplit[NotebookFileName[]][[-1]];​​parentDir=FileNameSplit[NotebookFileName[]][[-2]];​​cloudFn=parentDir~StringJoin~"/"~StringJoin~notebookFn;​​result=CloudDeploy[SelectedNotebook[],CloudObject[cloudFn],Permissions"Public",SourceLinkNone];​​Print["Uploading to ",cloudFn];​​result​​]​​​​​​(* centers data where batch dimension is 1 *)​​centerData[X_]:=Module[{Xc},​​Xc=Mean@Transpose@X;​​Transpose[#-Xc&/@Transpose[X]]​​];​​​​(* Generate 2D data for least-squares regression.​​e ∈ [0,1): off-diagonal offset from singular covariance. 1 is unit normal, 0 is singular​​dsize: number of datapoints​​​​​​Returns: {X,Y} where X has shape 2,dsize, Y is 1,dsize *)​​generateXY[e_,dsize_,v2_]:=Module[{n,wt,mean,cov,normal,X,Y},​​n=2; (* dimensions *) ​​wt={{1,1}};(* true predictor weights *)​​mean=0&/@Range@n;​​cov={{1,1-e},{1-e,1}};​​normal=MultinormalDistribution[mean,cov];​​X=RandomVariate[normal,{dsize}]//Transpose;​​X=centerData[X];​​Y=Dot[wt,X]+RandomVariate[NormalDistribution[0,v2],{1,dsize}];​​{X,Y}];​​​​(* Generate multidimensional X data in n dimensions *)​​generateX[n_,e_,dsize_]:=Module[{wt,mean,cov,normal,X},​​mean=Table[0,{n}];​​cov=IdentityMatrix[n]*e+Array[1-e&,{n,n}];​​normal=MultinormalDistribution[mean,cov];​​X=RandomVariate[normal,{dsize}]//Transpose​​(*centerData[X]*)​​];

Noisy Quadratic Coherence

In[]:=
SeedRandom[0];​​dsize=10000;(*numberofdatapoints*)​​{X,Y}=generateXY[0.1,dsize,0.0000001];​​err[w_]:=w.X-Y;(*residuals,(1,dsize)*)​​err[w_,i_]:={err[w][[All,i]]};(*residuals,(1,dsize)*)​​grad[w_]:=err[w].X;(*fullgradient*)​​grad[w_,i_]:=​​v2r[err[w][[1,i]]Transpose[X][[i,All]]];(*gradientfor1example*)​​loss[w_]:=toscalar[err[w].err[w]/2];​​loss[w_,i_]:=toscalar[err[w,i].err[w,i]/2];​​​​(*Matrixofallgradients(dsize,2),i'throwisgradientfori'thexample*)​​gradients[w_]:=(DiagonalMatrix@Flatten@err[w]).X;​​(*EmpiricalFishermatrixatcurrentpointestimatedfromthewholedataset*)​​Cmat[w_]:=gradients[w].gradients[w]/dsize;​​(*EmpiricalFishermatrixatcurrentpointestimatedfromexamplei*)​​Cmat[w_,i_]:=grad[w,i].grad[w,i];​​​​maxIters=10000;​​(*sequenceofexamplestosample*)​​indices=RandomChoice[Range[dsize],maxIters];​​​​optimizeSgd[lr_,w0_,iters_]:=Module[{g,w},​​{pointList,gradList,fullGradList,lossList}={{},{},{},{}};​​w=w0;​​For[iter=1,iter≤iters,iter++,​​g=grad[w,indices[[iter]]];​​pointList=pointList~Append~w;​​gradList=gradList~Append~g;​​fullGradList=fullGradList~Append~grad[w];​​lossList=lossList~Append~loss[w];​​w=w-lr*g;​​]​​];​​​​w0={{1,2}};​​numSteps=1000;​​η=0.05;​​optimizeSgd[η,w0,numSteps]​​ListPlot[lossList,PlotLabel"noisy quadratic",AxesLabel{"iter","loss"}]​​​​bound=2;​​plt1=ContourPlot[loss[{{x,y}}],{x,0,bound},{y,0,bound},Contours100,ContourShadingNone];​​plotPoints=Flatten/@pointList;​​plt3=Graphics[{Red,PointSize[0.01],Point[plotPoints]}];​​plt4=Graphics[{Blue,Line[plotPoints]}];​​Show[{plt1,plt3,plt4},PlotLabel"Optimization Path"]​​​​(*EmpiricalFishermatrixatcurrentpointestimatedfromexamplei*)​​Cmat[w_,i_]:=grad[w,i].grad[w,i];​​lopsidedCosine[a_,b_]:=
DotProduct[a,b]
DotProduct[a,a]
;​​coherence[i_,k_]:=Min[Table[DotProduct[gradList[[i-l]],gradList[[i]]],{l,1,k}]];​​coherences1=Table[coherence[i,1],{i,20,numSteps}];​​coherences5=Table[coherence[i,5],{i,20,numSteps}];​​ListLinePlot[{movingAvg[coherences1,10],movingAvg[coherences5,10]},PlotRangeAll,PlotLabel"Gradient Coherence",PlotLegends{"m=1","m=5"}]​​