(* ::Package:: *) BeginPackage["LinguisticInterconcepts`"]; MakeIndex::usage = "MakeIndex[text_String, opts] creates index from text."; MakeNGramTrie::usage = "MakeNGramTrie[text_String, ngram_Integer] generates new Markov-style text."; GenerateMarkovText::usage = "GenerateMarkovText[text, maxSentences] generates new Markov-style text."; RephraseAndShow::usage = "RephraseAndShow[text] rephrases text using LLM and prints both versions."; BlendVector::usage = "BlendVector[index, y] blends semantic vectors in the index."; SentencePOS::usage = "SentencePOS[s] gives a list of POS rules for the given sentence."; LLMRephraseByFeatures::usage = "LLM rephrase by sentence features."; Begin["`Private`"]; Needs["AntonAntonov`TriesWithFrequencies`"]; Clear[MakeIndex]; Options[MakeIndex] = {"Granularity" -> Automatic}; MakeIndex[text_String, opts:OptionsPattern[]] := Block[{gran, paragraphs, index}, gran = OptionValue[MakeIndex, "Granularity"]; If[gran === Automatic, gran = "Paragraphs" ]; paragraphs = Which[ gran == "Paragraphs", Select[StringTrim /@ StringSplit[text, RegularExpression["\\n\\h*\\n"]], StringLength[#] > 0&] , gran == "Sentences", Select[StringTrim/@TextSentences[text], StringLength[#]>0&] , True, Echo["Do not know how to process the value of \"Granularity\"." ]; Return[$Failed] ]; CreateSemanticSearchIndex[paragraphs] ]; Clear[BlendVector]; BlendVector[index_, V1_?VectorQ, V2_?VectorQ, y_:0.5] := Block[{embeddings, nfVecs, Vnew, pos1, pos2, nns, matchedPos}, embeddings = Normal[index["Embeddings"]]; nfVecs = Nearest[embeddings -> "Index"]; Vnew = V1 + y * V2; pos1 = nfVecs[V1] // First; pos2 = nfVecs[V2] // First; nns = Complement[nfVecs[Vnew, 5], {pos1, pos2}]; matchedPos = First[nns]; <|"V1Item" -> index["Items"][[pos1]], "V2Item" -> index["Items"][[pos2]], "BlendedVector" -> Vnew, "MatchedItem" -> index["Items"][[matchedPos]]|> ] /; MemberQ[Normal@index["Embeddings"], V1] && MemberQ[Normal@index["Embeddings"], V2]; BlendVector[index_, y_:0.5] := Module[{embeddings, nfVecs, V1, V2, Vnew, pos1, pos2, nns, matchedPos }, embeddings = Normal[index["Embeddings"]]; V1 = RandomChoice[embeddings]; V2 = RandomChoice[embeddings]; BlendVector[index, V1, V2, y] ]; Clear[RephraseAndShow] RephraseAndShow[text_] := Module[ {rephraseLLM, rephrased}, rephraseLLM = LLMFunction[ "Rephrase the following text to make it clearer:\n\n```text\n`1`\n```" ]; rephrased = rephraseLLM[text]; (* Print["\n\!\(\*StyleBox[\"ORIGINAL TEXT:\", FontWeight -> \"Bold\"]\)\n", text]; Print["\n\!\(\*StyleBox[\"REPHRASED TEXT:\", FontWeight -> \"Bold\"]\)\n", rephrased]; *) rephrased ] Clear[MakeNGramTrie]; MakeNGramTrie[text_String, ngram_Integer]:= Module[ { lsWords }, lsWords = TextCases[text, "Word" | "Punctuation"]; TrieCreate[Partition[ToLowerCase[lsWords], ngram, 1]] ]; Clear[GenerateMarkovText]; GenerateMarkovText[tr_, maxSentences_:2, seed_: {"the","idea"}] := Module[ { aRes, res1, res2 }, aRes = NestWhile[ (ngram = Rest@TrieRandomChoice[TrieSubTrie[tr,{Last[#Words]}]]; <| "Words" -> Join[#Words, ngram], "Sentences" -> If[ngram[[-1]] == ".", #Sentences + 1, #Sentences] |>) &, <|"Words" -> seed, "Sentences" -> 0|>, #Sentences < maxSentences & ]; res1 = StringReplace[ StringRiffle[aRes["Words"], " "], " " ~~ x : PunctuationCharacter :> x ]; res2 = StringReplace[ Capitalize[res1], ". " ~~ x : LetterCharacter :> ". " <> Capitalize[x] ]; res2 ]; GenerateMarkovText[text_String, ngram_Integer:2, maxSentences_:2, seed_: {"the","idea"}] := Module[ { trWords }, trWords = MakeNGramTrie[text, ngram]; GenerateMarkovText[trWords, maxSentences,seed] ] Clear[SentencePOS]; SentencePOS[s_String] := Block[{t, gunits}, t = TextStructure[s]; gunits = Cases[t, TextElement[w_String, Association[Rule["GrammaticalUnit", Entity["GrammaticalUnit", pos_String]]]] :> {w, pos}, \[Infinity]]; Rule @@@ gunits ]; Clear[LLMRephraseByFeatures]; LLMRephraseByFeatures[pos : {_Rule..}, fbTopic_String, sentiment_String] := LLMSynthesize[{"Write a sentence that uses the following word-to-part-of-speech dictionary.", ExportString[pos, "JSON"], "The sentence should have the FaceBook topic: " <> fbTopic <> " and a " <> sentiment <> "sentiment."}, LLMEvaluator -> LLMConfiguration[<|"Model" -> "gpt-4.1-mini"|>] ]; End[]; EndPackage[];