In[]:=
CompoundExpression[
]
​​deploy
Thu 18 Jan 2024 11:00:04
Di
https://math.stackexchange.com/questions/4843188/computing-e-log1-x2-for-chi-squared-x?noredirect=1#comment10326565_4843188

Symbolic integral

Forked from linear-estimation: Probabilistic stability of 1D Gaussian SGD
In[]:=
$Assumptions={x>0,a>0};​​dist[n_]=TransformedDistribution[x/n,xChiSquareDistribution[n]];​​pdf[n_,x_]=PDF[dist[n],x];​​expr[n_,x_]=Refine[Log[(1-ax)^2]pdf[2b,x]];​​mean[b_?IntegerQ]:=Inactive[Integrate][Refine[Log[(1-ax)^2]pdf[2b,x]],{x,0,1/a,∞}];​​bvals=Range[2];​​results=(mean[#]//Activate//FullSimplify)&/@bvals;​​Print["Integrating: ",Refine[Log[(1-ax)^2]pdf[2b,x]]//TraditionalForm]​​TableForm[{SF["b=``",#]&/@bvals,results}]
Integrating:
-b

1
b

-bx

b-1
x
log(
2
(1-ax)
)
Γ(b)
Out[]//TableForm=
b=1
-2
-1/a

ExpIntegralEi
1
a

b=2
2-
2(2+a)
-2/a

ExpIntegralEi
2
a

a
​
Out[]//TraditionalForm=
-b

1
b

-bx

b-1
x
log(
2
(1-ax)
)
Γ(b)

Get numerical cutoffs

Forked from linear-estimation: Probabilistic stability of 1D Gaussian SGD
In[]:=
findRoot[c0_,a0_,b0_,f_,df_]:=Module[{a,b,c,x,y,dy,maxIters,stopCriterion},​​c=c0;​​a=a0;​​b=b0;​​x=c;​​maxIters=100;​​stopCriterion=
-6
10
;​​For[i=1,i<=maxIters,i+=1,​​y=f[x];​​If[Abs[y]<stopCriterion,Break[]];​​dy=df[x];​​x=x-y/dy;(*replacewith(dy+eps)forill-conditoned*);​​Sow[y];​​(*checkifxgotoutofinterval*);​​Which[​​x<a,x=(a+b)/2;b=c,​​x>b,x=(a+b)/2;a=c,​​True,​​_​​];​​c=x;​​];​​c​​];​​​​adaptiveIntegrate[expr_]:=NIntegrate[expr,{x,0,∞},Method->"LocalAdaptive"];​​pvalueIntegrate[expr_,exclusion_]:=NIntegrate[expr,{x,0,∞},Method->"PrincipalValue",Exclusions->{exclusion},MinRecursion->4,MaxRecursion->20];​​SF=StringForm;​​​​Off[General::munfl];(*underflowwarnings*)​​ClearAll[n,x,a,b,getCritical,dist,pdf,meanLog,meanLogD];​​​​(*xisdomain,aisstepsize*)​​$Assumptions={x>0,a>0};​​​​(*Distributionofsampleaverageofnchi-squaredR.V.s*)​​dist[n_]=TransformedDistribution[x/n,xChiSquareDistribution[n]];​​pdf[n_,x_]=Refine@PDF[dist[n],x];(*refinetogetridofPiecewise*)​​​​(*Expectedvalueoflog*)​​expr[n_,a_,x_]=Log[
2
(1-a*x)
]*pdf[n,x];​​exprD[n_,a_,x_]=D[expr[n,a,x],a];​​meanLog[n_,a_?NumericQ]:=adaptiveIntegrate[expr[n,a,x]];​​meanLogD[n_,a_?NumericQ]:=pvalueIntegrate[exprD[n,a,x],1-ax==0];​​​​batchSizes={1,2,3,10,1000};​​legends=SF["b=``",#]&/@batchSizes;​​labeledPdfs=Table[Labeled[pdf[b,x],SF["b=``",b],{Top}],{b,batchSizes}];​​Plot@@{labeledPdfs,{x,0,3},PlotLegends->legends,PlotLabel->"Sample mean of b
2
χ
R.V.s",AxesLabel->{"x","density"}}​​​​labeledMeanLogs=Table[Labeled[meanLog[b,a],SF["b=``",b],{Top}],{b,batchSizes}];​​Plot@@{labeledMeanLogs,{a,0,3},PlotLegends->legends,PlotLabel->"E[log (1-ax
2
)\),
​]",AxesLabel->{"x","density"}}​​​​(*Findcriticallearningrateforgivennumberofrandomvariables*)​​Off[NIntegrate::izero];​​getCritical[n_?NumericQ]:=findRoot[2.5,2,3,meanLog[n,#]&,meanLogD[n,#]&];​​critVals=getCritical/@batchSizes;​​pointsPlot=ListPlot[{batchSizes,critVals},PlotStyle->Directive[PointSize[Large],Red]];​​smoothPlot=Plot[Callout[getCritical[n],legends,{batchSizes,critVals}],{n,0.6,20},MaxRecursion->1,PlotLabel->"Critical step α",AxesLabel->{"b","α"}];​​Show[smoothPlot,pointsPlot]​​​​TableForm[{SF["b=``",#]&/@batchSizes,critVals}]

Visualize plot

Forked from:
​mathoverflow-gaussian-convergence.nb​
​
In[]:=
(*
https://math.stackexchange.com/questions/4519054/bounds-on-spectral-radius-of-2-textdiaghh-cdot-1t
*)​​stepL2SGDfast[h_]:=Module[{d=Length[h],normalize,step,evec},​​normalize[v_]:=v/Sqrt@Total[v*v];​​step[v_]:=2h*v+h*Total[v];​​evec=FixedPoint[normalize[step[#]]&,ConstantArray[1.,d],1000];​​2/Norm[step@evec]​​]​​​​(*Gaussiansamplerwithdiagonalcovariance*)​​SeedRandom[1,Method->"MKL"];​​SF=StringForm;​​gaussianSampler[diag_]:=With{d=Length[diag]},​​Compile{{n,_Integer}},​​Module{vals,diagSqrt},​​diagSqrt=
diag
;​​vals=diagSqrt*#&/@RandomVariate[NormalDistribution[],{n,d}];​​​​tileRows[mat_,times_]:=ArrayFlatten[Table[mat,{times},{1}]];​​duplicateRows[mat_,times_]:=Flatten[Table[#,{times}]&/@mat,1];​​​​(*GeneratesplotforGaussianSGDwithcoveigenvalues
-p
1
,
-p
2
,...,
-p
d
,byusingstepsizeaandbatchsizeB1*)​​generatePlot[p_,d_,a_,B1_]:=Module[{},​​numSamples=1000;​​numSteps=100;​​ones=ConstantArray[1.,d];​​h=Table[
-p
i
,{i,1,d}];​​sampler=gaussianSampler[h];​​W0=RandomVariate[NormalDistribution[],{numSamples,Length[h]}];​​​​step[w_,x_]=w-axx.w;​​batchStep[W_]:=MapThread[step,{W,sampler[numSamples*B1]}];​​averagedStep[W_]:=Mean/@Partition[batchStep[duplicateRows[W,B1]],B1];​​traj=NestList[averagedStep,W0,numSteps];(*numStepsxnumSamplesxd*)​​​​errorNorms2=Total[traj*traj,{3}];(*numStepsxnumSamples*)​​numQuantiles=100;​​compress[l_]:=Quantile[l,#]&/@Range[0,1,1/numQuantiles];​​​​meanPlot=ListLinePlot[Total[errorNorms2,{2}]/numSamples,ScalingFunctions->"Log",PlotLegends->{"mean"}];​​filling=Table[i->{numQuantiles+2-i},{i,1,numQuantiles/2}];​​distPlot=ListLinePlot[Transpose[compress/@errorNorms2],ScalingFunctions->"Log",Filling->filling,FillingStyle->Directive[​​Opacity[.1],EdgeForm[]],PlotStyle->{Directive[Gray,Thin]}];​​nf[expr_]:=NumberForm[N@expr,{3,3}];​​​​Show[distPlot,meanPlot,AxesLabel->{"t","||e
2
||
"},PlotLabel->SF["B=``, α=``, final error=``",B1,nf@a,nf@Median@Last[errorNorms2]],ImageSize->Large]​​​​];​​​​p=1;​​d=1;​​(*1Ddivergencethreshold,seeDSP.se
post
*)​​a1=2.421249521036836042992780131428225933128842834067867847377838424345397835692066559296652232682365322;​​a2=2.6845103513837487`;​​a3=2.6988830381053224`;​​generatePlot[0,d,a2,1]​​generatePlot[0,d,a2,3]​​generatePlot[0,d,a2,10]​​