GenBank
GenBank
In[]:=
CopyFile["C:\\Users\\arnoudb.WRI\\Downloads\\sequences.fasta","D:\\git\\wolfram-coronavirus\\data-files\\sequences.fasta"]
Out[]=
D:\git\wolfram-coronavirus\data-files\sequences.fasta
In[]:=
list=Import["D:\\git\\wolfram-coronavirus\\data-files\\sequences.fasta"];
In[]:=
Length[list]
Out[]=
49
In[]:=
Map[StringLength,list]
Out[]=
{29903,290,290,29882,29881,29854,29891,29852,29857,29882,29825,29881,107,287,287,107,107,107,29891,287,29883,29903,29882,287,287,29838,107,107,107,29893,322,322,29882,29899,29889,29899,29883,29890,29847,29882,29882,29872,29868,29388,29882,29882,29882,29848,29866}
In[]:=
list2=Select[list,StringLength[#]>1000&];
In[]:=
Map[StringLength,list2]
Out[]=
{29903,29882,29881,29854,29891,29852,29857,29882,29825,29881,29891,29883,29903,29882,29838,29893,29882,29899,29889,29899,29883,29890,29847,29882,29882,29872,29868,29388,29882,29882,29882,29848,29866}
In[]:=
list3=DeleteDuplicates[list2];
In[]:=
alignment=SequenceAlignment[list2[[1]],list2[[2]]];
In[]:=
SetOptions[Framed,RoundingRadius3];
In[]:=
Row[Riffle[Map[If[StringQ[#],Tooltip[Framed["…"],#],Framed[Column[#]]]&,alignment]," "]]
Out[]=
In[]:=
SequenceCompare[seq1_,seq2_]:=Module[{alignment},alignment=SequenceAlignment[seq1,seq2];Row[Riffle[Map[If[StringQ[#],Tooltip[Framed["…"],#],Framed[Column[#]]]&,alignment]," "]]]
In[]:=
SequenceCompare[list3[[1]],list3[[3]]]
Out[]=
In[]:=
SequenceCompare[list3[[1]],list3[[7]]]
Out[]=
In[]:=
Length[list3]
Out[]=
28
In[]:=
Table[SequenceCompare[list3[[1]],list3[[n]]],{n,2,28}]
Out[]=