Post by B+ on Apr 21, 2019 17:49:03 GMT
'Fun with N-grams.txt for JB v2.0 B+ 2019-04-21
' translated from: Ngrams 1.bas posted 2019-04-20 B+ for QB64
' ref: Daniel Shiffman Coding Challenge #42.1 Markov Chains Part 1
' https://www.youtube.com/watch?v=eGFJ8vugIWA&list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH&index=52
' Basically take some text, collect all unique nGrams of a certain length, the gram grams$(i, 0)
' Collect all the letters that follow a gram in grams$(i, 1).
' Create a new random text with same nGrams based on random selection of chars following grams.
global ngLen, nextGI
Dim grams$(10000, 1) '(i, 0) stores grams, (i, 1) stores nextChar indexed by nextGI
ngLen = 6 'length of grams
nextGI = 1 'next insertion index for grams$ and also tracks grams$() top item index = nextGI - 1
'get some text, setup a text file called "Ngram Source.txt" and put the text in it.
OPEN "Ngram Source.txt" FOR INPUT AS #1
WHILE EOF(#1) = 0
LINE INPUT #1, t$
t$ = trim$(t$)
if t$ <> "" then txt$ = txt$ + " " + t$
WEND
CLOSE #1
txt$ = trim$(txt$)
'PRINT len(txt$),txt$
'Load the grams array
FOR i = 1 TO LEN(txt$) - ngLen - 1
scan
ngCandidate$ = MID$(txt$, i, ngLen)
PRINT "ngCandidate$ "; ngCandidate$
found = find(ngCandidate$)
'PRINT "found "; found
IF found = 0 THEN 'new gram
'add to grams
grams$(nextGI, 0) = ngCandidate$
grams$(nextGI, 1) = MID$(txt$, i + ngLen, 1) + "~"
'sort grams
'Sort grams$(), 1, nextGI, 0 'find is not finding all it should I suspect JB Sort is not sorting as I expect
' yeah Sort isn't working correctly
'way slower but works!
call QSort 1, nextGI
'FOR k = 1 TO nextGI
' scan
' PRINT grams$(k, 0), grams$(k, 1)
' IF k MOD 20 = 0 THEN
' PRINT: INPUT "Viewing newly sorted grams$(), press enter..."; wate$
' CLS
' END IF
'NEXT
nextGI = nextGI + 1
ELSE
grams$(found, 1) = grams$(found, 1) + MID$(txt$, i + ngLen, 1) + "~"
END IF
'INPUT "One Gram processeded, press enter..."; wate$
NEXT
'check load
'FOR i = 1 TO nextGI - 1
' scan
' PRINT grams$(i, 0), grams$(i, 1)
' IF i MOD 20 = 0 THEN
' PRINT: INPUT "Checking the load of grams$(), press enter..."; wate$
' CLS
' END IF
'NEXT
cls
'generate random text
'find a gram that starts with a Capital letter, dang it's always the same start!
i = INT(RND * (nextGI - 1)) + 1
WHILE rtxt$ = ""
scan
IF INSTR("ABCDEFGHIJKLMNOPQRSTUVWXYZ", LEFT$(grams$(i, 0), 1)) > 0 THEN
rtxt$ = grams$(i, 0)
wc = len(grams$(i, 1))/2
r = int(rnd(0) * wc) + 1
rtxt$ = rtxt$ + word$(grams$(i, 1), r, "~")
END IF
i = i + 1
IF i > nextGI - 1 THEN i = 1
WEND
PRINT rtxt$, LEN(rtxt$)
WHILE LEN(rtxt$) < 4000
scan
g$ = MID$(rtxt$, LEN(rtxt$) - ngLen + 1)
'Print "searching for ";g$
found = find(g$)
IF found > 0 THEN
wc = len(grams$(found, 1))/2
r = int(rnd(0) * wc) + 1
rtxt$ = rtxt$ + word$(grams$(found, 1), r, "~")
ELSE
rtxt$ = rtxt$ + " "
END IF
'input "OK press enter ";wate$
WEND
cls
for row = 0 to 99
for col = 1 to 40
PRINT mid$(rtxt$, row*40 + col, 1);
next
print
next
Print: Print "Random text making done, page up to review results."
SUB QSort Start, Finish
i = Start
j = Finish
m$ = grams$(INT((i + j) / 2), 0)
WHILE i <= j
WHILE grams$(i, 0) < m$
i = i + 1
WEND
WHILE grams$(j, 0) > m$
j = j - 1
WEND
IF i <= j THEN
temp$ = grams$(i, 0)
temp1$ = grams$(i, 1)
grams$(i, 0) = grams$(j, 0)
grams$(i, 1) = grams$(j, 1)
grams$(j, 0) = temp$
grams$(j, 1) = temp1$
i = i + 1
j = j - 1
END IF
WEND
IF j > Start THEN call QSort Start, j
IF i < Finish THEN call QSort i, Finish
END SUB
FUNCTION find (x$)
IF nextGI = 1 THEN EXIT FUNCTION
low = 1: hi = nextGI - 1
WHILE low <= hi
scan
test = int((low + hi) / 2)
IF grams$(test, 0) = x$ THEN
find = test: EXIT FUNCTION
ELSE
IF grams$(test, 0) < x$ THEN low = test + 1 ELSE hi = test - 1
END IF
WEND
END FUNCTION
Change ngLen to 3 or 4 for more varied word creations.
I used an excerpt for Alice of Wonderland for test text source:
Ngram Source.txt (1.21 KB)
The code searches for an N-gram starting with a Capital letter to start the Random Text generation. So make sure your text has some Capital letters if you want to try this code.
Here is a sample output:
Alice think it so very tired of sitting
by her. There was considering in her own
mind (as well as she could, for it flas
hed across her mind that she had never b
efore seen a rabbit with either a waistc
oat-pocket and then hurried on, Alice af
ter it and looked at it and then hurried
on, Alice after it and looked at it and
looked at it and looked at it and looke
d at it and was just in time to see it p
op down a large rabbit with either a wat
ch out of its waistcoat-pocket, or a wai
stcoat-pocket, or a waistcoat-pocket and
was just in time to see it pop down wen
t Alice think it so very much out of its
waistcoat-pocket, or a watch to take ou
t of the way to her feet, for it flashed
across the field after it and was just
in time to see it pop down went Alice wa
s nothing to get very sleepy and stupid)
, whether the hedge. In another moment,
down a large rabbit with either a watch
out of the way to hear the day made her
feet, for it flashed across the use of a
book," thought Alice was considering in
her own mind that she had never before
seen a rabbit with curiosity, she ran ac
ross her mind that she had peeped into t
he bank, and of having nothing so very r
emarkable in that, nor did Alice was beg
inning to get very remarkable in that, n
or did Alice started to hear the hedge.
In another moment, down went Alice was c
onsidering in her own mind (as well as s
he could, for the pleasure of making a d
aisy-chain would be worth the trouble of
getting by her. There was considering i
n her own mind that she had peeped into
the bank, and of having nothing to get v
ery tired of sitting by her. There was b
eginning to do. Once or twice she had ne
ver before seen a rabbit say to itself,
"Oh dear! I shall be too late!" But when
the bank, and of having nothing to do.
Once or twice she had peeped into the ba
nk, and of having nothing to do. Once or
twice she had no pictures or conversati
ons?" So she was nothing so very sleepy
and stupid), whether the hedge. In anoth
er moment, down went Alice think it so v
ery remarkable in that, nor did Alice th
ink it so very much out of it, and, burn
ing with pink eyes ran close by her. The
re was beginning to get very much out of
it, and, burning with pink eyes ran clo
se by her. There was beginning to get ve
ry much out of the way to hear the pleas
ure of making a daisy-chain would be wor
th the trouble of getting up and picking
the daisies, when suddenly a White Rabb
it with either a watch to take out of th
e way to her feel very sleepy and stupid
), whether the daisies, when suddenly a
White Rabbit with either a waistcoat-poc
ket and then hurried on, Alice started t
o hear the Rabbit say to her feel very m
uch out of its waistcoat-pocket and then
hurried on, Alice started to hear the h
edge. In another moment, down went Alice
think it so very tired of sitting up an
d picking the daisies, when suddenly a W
hite Rabbit actually took a watch out of
it, and, burning with pink eyes ran clo
se by her sister was reading, but it had
never before seen a rabbit-hole, under
the day made her feet, for the hedge. In
another moment, down went Alice think i
t so very sleepy and stupid), whether th
e hedge. In another moment, down a large
rabbit say to hear the pleasure of maki
ng a daisy-chain would be worth the trou
ble of getting up and picking the day ma
de her feet, for the pleasure of making
a daisy-chain would be worth the trouble
of getting by her sister on the bank, a
nd of having nothing to get very much ou
t of its waistcoat-pocket and was just i
n time to see it pop down a large rabbit
-hole, under the pleasure of making a da
isy-chain would be worth the trouble of
getting by her sister was reading, but i
t had no pictures or conversations in it
, "and what is the field after it and th
en hurried on, Alice was beginning to do
. Once or twice she had peeped into the
book her sister was reading, but it had
no pictures or conversations?" So she wa
s beginning to do. Once or twice she had
peeped into the bank, and of having not
hing to do. Once or twice she had peeped