(* To keep things simple, I'm only collecting pfd reports of a certain type *) pfdReportTypes= "hospital-death-clinical-procedures-and-medical-management-related-deaths", "alcohol-drug-and-medication-related-deaths", "care-home-health-related-deaths", "community-health-care-and-emergency-services-related-deaths", "emergency-services-related-deaths-2019-onwards", "mental-health-related-deaths";getPFDList[start_, end_] := Block[{htmlDocs, individualPageLinks, individualPages}, htmlDocs = Table[Import[URLBuild[<| "Scheme" -> "https", "Domain" -> "www.judiciary.uk", "Path" -> {"page", ToString[i]}, (* Iterate through the pages *) "Query" -> "pfd_report_type" -> #, (* Cycle throught just the report types we want *) "post_type" -> "pfd", "order" -> "desc" , "Fragment" -> None |>], "Text"]& /@ pfdReportTypes, {i, start, end}]; (* Now that we've got a list of the results pages, we need to grab each individual report's page *) individualPageLinks = Flatten[StringCases[#, "<a class=\"card__link\" href=\"" ~~ Shortest[url___] ~~ "\"" :> url]& /@ htmlDocs]; individualPageLinks = DeleteDuplicates[individualPageLinks]; (* The searches may have had overlapping results, which should be removed *) (* Each of these pages contains a link to a PDF file with the reports we want *) individualPages = Import[#, "Text"]& /@ individualPageLinks; First[StringCases[#, "related-content__link\" href=\"" ~~ Shortest[url___] ~~ "\"" :> url] ]& /@ individualPages ]