Neural Net Training for "Can AI Solve Science?"

Train an Autoencoder on Cellular Automata

TrainCAAutoencoder[​​ruleSpec_:<|"OuterTotalisticCode"966,"Range"2|>,​​experimentName_String:"outer966",​​basePath_String:"AISolveScienceCheckpoints-01",(*NOTE:​​Changethisstringtothepathwhereyouwanttostorethemodel\​checkpoints*)​​nDilations_Integer:3,​​maxTrainingRounds_Integer:100000,​​caSize_Integer:64,​​nColors_Integer:2,​​embDim_Integer:64,​​convChannels_Integer:64,​​latentDim_Integer:32,​​seed_Integer:12345​​]:=Module[{convLayer,encoder,resize,decoder,loss,net,​​lossNet,generateCA,dataGenerator,initNet},​​convLayer[convLayerChannels_Integer,stride_Integer:2,​​nConvDilations_Integer:1,convSize_Integer:3,​​layer_Symbol:ConvolutionLayer]:=If[​​nConvDilations==1,​​NetChain[{​​layer[convLayerChannels,convSize,"Stride"stride,​​PaddingSize{(convSize-1)/2,(convSize-1)/2},​​InterleavingTrue],​​Ramp​​}],​​NetGraph[​​{​​Splice@Table[​​NetChain[{​​​​layer[convLayerChannels,convSize,"Stride"stride,​​"Dilation"k,​​PaddingSize{((convSize-1)/2)k,((convSize-1)/​​2)k},InterleavingTrue],​​ElementwiseLayer["ReLU"]​​}],​​{k,nConvDilations}​​],​​NetChain[{​​CatenateLayer[3],​​Ramp​​}]​​},​​{​​NetPort["Input"]Range[nConvDilations],​​Range[nConvDilations](nConvDilations+1)​​NetPort["Output"]​​}]​​];​​​​encoder=NetChain[{​​EmbeddingLayer[embDim,nColors],​​Splice@Table[convLayer[convChannels,2,nDilations],{n,3}],​​FlattenLayer[],​​LinearLayer[latentDim]​​},"Input"{caSize,caSize}];​​​​resize=​​ResizeLayer[{Scaled[2],Scaled[2]},Resampling"Linear",​​InterleavingTrue];​​​​decoder=NetChain[{​​LinearLayer[8*8*convChannels],​​ReshapeLayer[{8,8,convChannels}],​​Splice@​​Flatten@Table[{resize,​​convLayer[convChannels,1,nDilations]},{n,3}],​​ConvolutionLayer[nColors,1,InterleavingTrue],​​SoftmaxLayer[]​​},"Input"latentDim];​​​​loss=CrossEntropyLossLayer["Index"];​​​​net=NetGraph[{​​"encoder"NetFlatten@encoder,​​"decoder"NetFlatten@decoder​​},{​​"encoder""decoder"​​}];​​​​lossNet=NetGraph[{​​"net"net,​​"loss"loss​​},{​​"net"NetPort["loss","Input"],​​NetPort["Target"]NetPort["loss","Target"]​​}];​​​​SeedRandom[seed];​​generateCA:=​​CellularAutomaton[ruleSpec,RandomInteger[nColors-1,caSize],​​2caSize-1]+1;​​dataGenerator[params_Association]:=​​Table[With[{ca=generateCA},<|"Input"ca[[;;caSize]],​​"Target"ca[[caSize+1;;]]|>],params["BatchSize"]];​​​​initNet=NetInitialize@lossNet;​​Export[FileNameJoin[{basePath,experimentName,"init.wlnet"}],​​initNet];​​​​NetTrain[initNet,dataGenerator,All,RandomSeedingseed,​​BatchSize32,TargetDevice"GPU",​​MaxTrainingRoundsmaxTrainingRounds,​​TrainingProgressCheckpointing{"Directory",​​FileNameJoin[{basePath,experimentName}],​​"Interval"Quantity[1000,"Rounds"]}]​​]
TrainCAAutoencoder[<|"OuterTotalisticCode"600,​​"Range"2|>,"outer600"]

Train a Two-Dimensional Autoencoder on Cellular Automata

TrainCAAutoencoder[​​ruleSpec_:<|"OuterTotalisticCode"626,"Range"2|>,​​experimentName_String:"outer626_2D",​​basePath_String:"AISolveScienceCheckpoints-02"]:=​​Module[{embDim,nColors,caSize,convChannels,latentDim,conv,​​encoder,resize,decoder,loss,net,generateCA,dataGenerator,​​initNet},​​​​nColors=2;​​caSize=64;​​embDim=64;​​convChannels=64;​​latentDim=2;​​​​conv[convChannels_Integer,stride_Integer:2,convSize_Integer:3,​​dilation_Integer:1,layer_Symbol:ConvolutionLayer]:=​​NetChain[{​​layer[convChannels,convSize,"Stride"stride,​​"Dilation"dilation,​​PaddingSize{((convSize-1)/2)dilation,((convSize-1)/​​2)dilation},InterleavingTrue],​​BatchNormalizationLayer[],​​Ramp​​}];​​​​encoder=NetChain[{​​EmbeddingLayer[embDim,nColors],​​Table[conv[convChannels*2^n],{n,0,Log2[caSize]-1}],​​FlattenLayer[],​​Splice@​​Take[Flatten[​​Table[{LinearLayer[2^n],BatchNormalizationLayer[],Ramp},{n,​​Log2[convChannels*(2^(Log2[caSize]-1))]-1,​​Log2[latentDim],-1}]],{1,-2}]​​},"Input"{caSize,caSize}];​​​​resize=​​ResizeLayer[{Scaled[2],Scaled[2]},Resampling"Linear",​​InterleavingTrue];​​​​decoder=NetChain[{​​Splice@​​Flatten[Table[{LinearLayer[2^n],BatchNormalizationLayer[],​​Ramp},{n,Log2[latentDim]+1,​​Log2[convChannels*(2^(Log2[caSize]-1))]}]],​​ReshapeLayer[{1,1,convChannels*(2^(Log2[caSize]-1))}],​​Splice@​​Table[{resize,conv[convChannels*2^n,1]},{n,Log2[caSize]-1,​​0,-1}],​​ConvolutionLayer[nColors,1,InterleavingTrue],​​SoftmaxLayer[]​​},"Input"latentDim];​​​​loss=CrossEntropyLossLayer["Index"];​​​​net=NetGraph[{​​"encoder"encoder,​​"decoder"decoder,​​"loss"loss​​},{​​"encoder""decoder",​​"decoder"NetPort["loss","Input"],​​NetPort["Target"]NetPort["loss","Target"]​​}];​​​​generateCA:=​​CellularAutomaton[ruleSpec,RandomInteger[nColors-1,caSize],​​2caSize-1]+1;​​dataGenerator[params_Association]:=​​Table[With[{ca=generateCA},<|"Input"ca[[;;caSize]],​​"Target"ca[[;;caSize]]|>],params["BatchSize"]];​​​​initNet=NetInitialize@net;​​Export[FileNameJoin[{basePath,experimentName,"init.wlnet"}],​​initNet];​​​​NetTrain[initNet,dataGenerator,All,BatchSize32,​​TargetDevice"GPU",MaxTrainingRounds1000000,​​TrainingProgressCheckpointing{"Directory",​​FileNameJoin[{basePath,experimentName}],​​"Interval"Quantity[1000,"Rounds"]}]​​]

Train a GraphDistance Estimation Network

Train a Character-Level LLM on MultiwaySystem Shortest Path Data

Train a Network to Solve Instances of the Three-Body Problem