word$(c) = "QB64" 'replace with the word you want to have extracted, could be used in a loop to extract more than one word
shellcmd$
= "start powershell -Command " + CHR$(34) + "curl https://en.wikipedia.org/wiki/" + word$
(c
) + " -o temp.txt" + CHR$(34)SHELL _HIDE shellcmd$
'starts powershell from the CMD directly with the command because curl doesn't work from CMD
SLEEP 5 'waits for the file to be created, there is probably a better way to determine when the file is created
x = 0
IF MID$(line$
, 1, 3) = "<p>" THEN 'only reads paragraphs, as this is is the safest method for getting the raw text, feel free to toy around with other things as well x = x + 1
p = 0
p = p + 1
IF MID$(line$
, p
, 1) = "<" THEN 'for now just excludes every html command to get a clean text p = p + 1
rawline$
(x
) = rawline$
(x
) + MID$(line$
, p
, 1) 'saves the respective paragraph as a raw line, accessible with parameter x as count for paragraphs in an article 'PRINT rawline$(x) 'uncomment this line to print the raw data on the screen and check for eventual bugs in the raw lines
'here is the space for processing of each line