NLTK11《Python自然语言处理》code10 分析语句的含义

时间:2022-03-24 07:19:04

分析语句的含义

# -*- coding: utf-8 -*-
# win10 python3.5.3/python3.6.1 nltk3.2.4
# 《Python自然语言处理》 10 分析语句的含义
# pnlp10.py
# 更官方的参考 http://www.nltk.org/book/ch10.html

# 需要特别关注prover9的安装
# pip3 install prove
# http://www.cs.unm.edu/~mccune/prover9/gui/Prover9-Mace4-v05-setup.exe
"""
# C:\Python36\Lib\site-packages\nltk\inference\prover9.py
def binary_locations(self):
return ['/usr/local/bin/prover9',
'/usr/local/bin/prover9/bin',
'/usr/local/bin',
'/usr/bin',
'/usr/local/prover9',
'/usr/local/share/prover9',
'C:/Program Files (x86)/Prover9-Mace4/bin-win32'] # 添加了这个路径,特注
"""


import nltk
# 10.1 自然语言理解
# 查询数据库
# nltk.data.show_cfg('grammars/book_grammars/sql0.fcfg')
from nltk import load_parser
cp = load_parser('grammars/book_grammars/sql0.fcfg')
query = 'What cities are located in China'
for tree in cp.parse(query.split()):
print(tree)
"""
(S[SEM=(SELECT, City FROM city_table, WHERE, , , Country="china")]
(NP[SEM=(SELECT, City FROM city_table)]
(Det[SEM='SELECT'] What)
(N[SEM='City FROM city_table'] cities))
(VP[SEM=(, , Country="china")]
(IV[SEM=''] are)
(AP[SEM=(, Country="china")]
(A[SEM=''] located)
(PP[SEM=(, Country="china")]
(P[SEM=''] in)
(NP[SEM='Country="china"'] China)))))
"""

#trees = cp.parse(query.split())
trees = next(cp.parse(query.split()))
answer = trees[0].label()
q = answer['SEM']
print(q) # (SELECT, City FROM city_table)

from nltk.sem import chat80
q = "select city from city_table where country='china'"
rows = chat80.sql_query('corpora/city_database/city.db', q)
for r in rows: print(r[0], end=" ")
"""canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin """

# 自然语言、语义和逻辑

# 10.2 命题逻辑
nltk.boolean_ops()
"""
conjunction &
disjunction |
implication ->
equivalence <->
"""


lp = nltk.sem.Expression.fromstring('-(P & Q)')
print(lp) # -(P & Q)

# 这个实验没有找到Prover9,没安装prover9
prover = nltk.Prover9()
NotFnS = nltk.sem.Expression.fromstring('-FnS')
SnF = nltk.sem.Expression.fromstring('SnF')
R = nltk.sem.Expression.fromstring('SnF -> -FnS')
res = prover.prove(NotFnS, [SnF, R])
print(res) # True

val = nltk.Valuation([('P', True), ('Q', True), ('R', False)])
print(val['P']) # True
dom = set([])
g = nltk.Assignment(dom)
m = nltk.Model(dom, val)
print(m.evaluate('(P & Q)', g)) # True
print(m.evaluate('-(P & Q)', g)) # False
print(m.evaluate('(P & R)', g)) # False
print(m.evaluate('(P | R)', g)) # True

# 10.3 一阶逻辑
#tlp = nltk.LogicParser()
#parsed = tlp.parse('walk(angus)')
parsed = nltk.sem.Expression.fromstring('walk(angus)', type_check=True)
print(parsed.argument) # angus
print(parsed.argument.type) # e
print(parsed.function) # walk

sig = {'walk': '<e, t>'}
parsed = nltk.sem.Expression.fromstring('walk(angus)', type_check=True, signature=sig)
print(parsed.function.type) # <e,t>

lp = nltk.sem.Expression.fromstring('dog(cyril)')
lp.free()
print(lp) # dog(cyril)

lp = nltk.sem.Expression.fromstring('dog(cyril)')
lp = lp.free()
print(lp) # set()

lp = nltk.sem.Expression.fromstring('dog(cyril)').free()
print(lp) # set()

lp = nltk.sem.Expression.fromstring('dog(x)').free()
print(lp) # {Variable('x')}

lp = nltk.sem.Expression.fromstring('own(angus, cyril)').free()
print(lp) # set()

lp = nltk.sem.Expression.fromstring('exists x.dog(x)').free()
print(lp) # set()

lp = nltk.sem.Expression.fromstring('((some x. walk(x)) -> sing(x))').free()
print(lp) # {Variable('x')}

lp = nltk.sem.Expression.fromstring('exists x.own(y, x)').free()
print(lp) # {Variable('y')}

# 一阶定理证明
NotFnS = nltk.sem.Expression.fromstring('-north_of(f, s)')
SnF = nltk.sem.Expression.fromstring('north_of(s, f)')
R = nltk.sem.Expression.fromstring('all x. all y. (north_of(x, y) -> -north_of(y, x))')
prover = nltk.Prover9()
prover.prove(NotFnS, [SnF, R])
FnS = nltk.sem.Expression.fromstring('north_of(f, s)')
res = prover.prove(FnS, [SnF, R])
print(res) # False

# 一阶逻辑语言总结
# 真值模型
dom = set(['b', 'o', 'c'])
v = """
bertie => b
olive => o
cyril => c
boy => {b}
girl => {o}
dog => {c}
walk => {o, c}
see => {(b, o), (c, b), (o, c)}
"""

val = nltk.sem.Valuation.fromstring(v)
print(val)
"""
{'bertie': 'b',
'boy': {('b',)},
'cyril': 'c',
'dog': {('c',)},
'girl': {('o',)},
'olive': 'o',
'see': {('o', 'c'), ('b', 'o'), ('c', 'b')},
'walk': {('c',), ('o',)}}
"""


print(('o', 'c') in val['see']) # True
print(('b',) in val['boy']) # True

# 独立变量和赋值
g = nltk.Assignment(dom, [('x', 'o'), ('y', 'c')])
print(g) # g[c/y][o/x]
m = nltk.Model(dom, val)
res = m.evaluate('see(olive, y)', g)
print(res) # True
print(g['y']) # c
g.purge()
print(g) # g
print(m.evaluate('see(olive, y)', g)) # Undefined

res = m.evaluate('see(bertie, olive) & boy(bertie) & -walk(bertie)', g)
print(res) # True

# 量化
res = m.evaluate('exists x.(girl(x) & walk(x))', g)
print(res) # True

res = m.evaluate('girl(x) & walk(x)', g.add('x', 'o'))
print(res) # True

fmla1 = nltk.sem.Expression.fromstring('girl(x) | boy(x)')
res = m.satisfiers(fmla1, 'x', g)
print(res) # {'o', 'b'}
fmla2 = nltk.sem.Expression.fromstring('girl(x) -> walk(x)')
res = m.satisfiers(fmla2, 'x', g)
print(res) # {'o', 'b', 'c'}
fmla3 = nltk.sem.Expression.fromstring('walk(x) -> girl(x)')
res = m.satisfiers(fmla3, 'x', g)
print(res) # {'o', 'b'}

res = m.evaluate('all x.(girl(x) -> walk(x))', g)
print(res) # True

# 量词范围歧义
v2 = """
bruce => b
cyril => c
elspeth => e
julia => j
matthew => m
person => {b, e, j, m}
admire => {(j, b), (b, b), (m, e), (e, m), (c, a)}
"""

val2 = nltk.sem.Valuation.fromstring(v2)
dom2 = val2.domain
m2 = nltk.Model(dom2, val2)
g2 = nltk.Assignment(dom2)
fmla4 = nltk.sem.Expression.fromstring('(person(x) -> exists y. (person(y) & admire(x, y)))')
res = m2.satisfiers(fmla4, 'x', g2)
print(res) # {'a', 'm', 'j', 'e', 'b', 'c'}
fmla5 = nltk.sem.Expression.fromstring('(person(y) & all x.(person(x) -> admire(x, y)))')
res = m2.satisfiers(fmla5, 'y', g2)
print(res) # set()
fmla6 = nltk.sem.Expression.fromstring('(person(y) & all x.((x=bruce | x= julia) -> admire(x, y)))')
res = m2.satisfiers(fmla6, 'y', g2)
print(res) # {'b'}

# 模型的建立
a3 = nltk.sem.Expression.fromstring('exists x.(man(x) & walks(x))')
c1 = nltk.sem.Expression.fromstring('mortal(socrates)')
c2 = nltk.sem.Expression.fromstring('-mortal(socrates)')
mb = nltk.Mace(5)
print(mb.build_model(None, [a3, c1])) # True
print(mb.build_model(None, [a3, c2])) # True
print(mb.build_model(None, [c1, c2])) # False

a4 = nltk.sem.Expression.fromstring('exists y. (woman(y) & all x. (man(x) -> love(x, y)))')
a5 = nltk.sem.Expression.fromstring('man(adam)')
a6 = nltk.sem.Expression.fromstring('woman(eve)')
g = nltk.sem.Expression.fromstring('love(adam, eve)')
mc = nltk.MaceCommand(g, assumptions=[a4, a5, a6])
res = mc.build_model()
print(res) # True

print(mc.valuation)
"""
{'C1': 'b',
'adam': 'a',
'eve': 'a',
'love': {('a', 'b')},
'man': {('a',)},
'woman': {('b',), ('a',)}}
"""


a7 = nltk.sem.Expression.fromstring('all x. (max(x) -> woman(x))')
g = nltk.sem.Expression.fromstring('love(adam, eve)')
mc = nltk.MaceCommand(g, assumptions=[a4, a5, a6, a7])
res = mc.build_model()
print(res) # True
print(mc.valuation)
"""
{'C1': 'b',
'adam': 'a',
'eve': 'a',
'love': {('a', 'b')},
'man': {('a',)},
'max': set(),
'woman': {('a',), ('b',)}}
"""


# 10.4 英语语句的语义
lp = nltk.sem.Expression.fromstring(r'\x.(walk(x) & chew_gum(x))')
print(lp) # \x.(walk(x) & chew_gum(x))
print(lp.free()) # set()
lp = nltk.sem.Expression.fromstring(r'\x.(walk(x) & chew_gum(y))')
print(lp) # \x.(walk(x) & chew_gum(y))

lp = nltk.sem.Expression.fromstring(r'\x.(walk(x) & chew_gum(x))(gerald)')
print(lp) # \x.(walk(x) & chew_gum(x))(gerald)
print(lp.simplify()) # (walk(gerald) & chew_gum(gerald))

lp = nltk.sem.Expression.fromstring(r'\x.\y.(dog(x) & own(y, x))(cyril)').simplify()
print(lp) # \y.(dog(cyril) & own(y,cyril))
lp = nltk.sem.Expression.fromstring(r'\x y.(dog(x) & own(y, x))(cyril, angus)').simplify()
print(lp) # (dog(cyril) & own(angus,cyril))

e1 = nltk.sem.Expression.fromstring('exists x.P(x)')
print(e1)
e2 = e1.alpha_convert(nltk.Variable('z'))
print(e2) # exists z.P(z)
print(e1 == e2) # True

e3 = nltk.sem.Expression.fromstring('\P.exists x.P(x)(\y.see(y, x))')
print(e3) # \P.exists x.P(x,\y.see(y,x))
print(e3.simplify()) # \P.exists x.P(x,\y.see(y,x)) # 注意,和原书说明不同

# 量化的NP
# 及物动词
lp = nltk.sem.Expression.fromstring(r'\X x.X(\y.chase(x,y))')
np = nltk.sem.Expression.fromstring(r'(\P.exists x.(dog(x) & P(x)))')
vp = nltk.ApplicationExpression(lp, np)
print(vp) # (\X x.X(\y.chase(x,y)))(\P.exists x.(dog(x) & P(x)))
print(vp.simplify()) # \x.exists z1.(dog(z1) & chase(x,z1))

from nltk import load_parser
parser = load_parser('grammars/book_grammars/simple-sem.fcfg', trace=0)
sentence = 'Angus gives a bone to every dog'
tokens = sentence.split()
trees = parser.parse(tokens)
for tree in trees:
print(tree.label()['SEM'])
"""all z3.(dog(z3) -> exists z2.(bone(z2) & give(angus,z2,z3)))"""

v = """
bertie => b
olive => o
cyril => c
boy => {b}
girl => {o}
dog => {c}
walk => {o, c}
see => {(b, o), (c, b), (o, c)}
"""

val = val2 = nltk.sem.Valuation.fromstring(v)
g = nltk.Assignment(val.domain)
m = nltk.Model(val.domain, val)
sent = 'Cyril sees every boy'
grammar_file = 'grammars/book_grammars/simple-sem.fcfg'
#results = nltk.batch_evaluate([sent], grammar_file, m, g)[0]
results = nltk.evaluate_sents([sent], grammar_file, m, g)[0]
for (syntree, semrel, value) in results:
print(semrel)
print(value)
"""
all z4.(boy(z4) -> see(cyril,z4))
True
"""


# 重述量词歧义
from nltk.sem import cooper_storage as cs
sentence = 'every girl chases a dog'
trees = cs.parse_with_bindops(sentence, grammar='grammars/book_grammars/storage.fcfg')
semrep = trees[0].label()['SEM']
cs_semrep = cs.CooperStore(semrep)
print(cs_semrep.core) # chase(z2,z3)
for bo in cs_semrep.store:
print(bo)
"""
bo(\P.all x.(girl(x) -> P(x)),z2)
bo(\P.exists x.(dog(x) & P(x)),z3)
"""


res = cs_semrep.s_retrieve(trace=True)
print(res)
"""
Permutation 1
(\P.all x.(girl(x) -> P(x)))(\z2.chase(z2,z3))
(\P.exists x.(dog(x) & P(x)))(\z3.all x.(girl(x) -> chase(x,z3)))
Permutation 2
(\P.exists x.(dog(x) & P(x)))(\z3.chase(z2,z3))
(\P.all x.(girl(x) -> P(x)))(\z2.exists x.(dog(x) & chase(z2,x)))
None
"""

for reading in cs_semrep.readings:
print(reading)
"""
exists x.(dog(x) & all z7.(girl(z7) -> chase(z7,x)))
all x.(girl(x) -> exists z8.(dog(z8) & chase(x,z8)))
"""


# 10.5 段落语义层
# 段落表示理论
dp = nltk.sem.DrtExpression.fromstring('([x, y], [angus(x), dog(y), own(x, y)])')
print(dp) # ([x,y],[angus(x), dog(y), own(x,y)])
dp.draw()

print(dp.fol()) # exists x y.(angus(x) & dog(y) & own(x,y))

"""
# 此处有错误
drs2 = nltk.sem.Expression.fromstring('([x], [walk(x)]) + ([y], [run(y)])') # 有错误
print(drs2)
print(drs2.simplify())

drs3 = nltk.sem.Expression.fromstring('([], [(([x], [dog(x)]) -> ([y], [ankle(y), bite(x, y)]))])')
print(drs3.fol())

drs4 = nltk.sem.Expression.fromstring('([x, y], [angus(x), dog(y), own(x, y)])')
drs5 = nltk.sem.Expression.fromstring('([u, z], [PRO(u), irene(z), bite(u, z)])')
drs6 = drs4 + drs5
print(drs6.simplify())
"""


from nltk import load_parser
parser = load_parser('grammars/book_grammars/drt.fcfg', logic_parser=nltk.sem.drt.DrtParser())
trees = parser.parse('Angus owns a dog'.split())
#for tree in trees: print(tree)
tree = next(trees)
print(tree[0].label()['SEM']) # \P.(([x],[Angus(x)]) + P(x))
print(tree[0].label()['SEM'].simplify()) # \P.(([x],[Angus(x)]) + P(x))

# 段落处理
dt = nltk.DiscourseTester(['A student dances', 'Every student is a person'])
dt.readings()
"""
s0 readings:

s0-r0: exists z1.(student(z1) & dance(z1))

s1 readings:

s1-r0: all z1.(student(z1) -> person(z1))
"""


dt.add_sentence('No person dances', consistchk=True)
"""
Inconsistent discourse: d0 ['s0-r0', 's1-r0', 's2-r0']:
s0-r0: exists z1.(student(z1) & dance(z1))
s1-r0: all z1.(student(z1) -> person(z1))
s2-r0: -exists z1.(person(z1) & dance(z1))
"""


dt.retract_sentence('No person dances', verbose=True)
"""
Current sentences are
s0: A student dances
s1: Every student is a person
"""


dt.add_sentence('A person dances', 'informchk=True')
"""
Sentence 'A person dances' under reading 'exists x.(person(x) & dance(x))':
Not informative relative to thread 'd0'
"""


"""
# 此处有错误未处理
tagger = nltk.tag.RegexpTagger(
[('^(chases|runs)$', 'VB'),
('^(a)$', 'ex_guant'),
('^(every)$', 'univ_quant'),
('^(dog|boy)$', 'NN'),
('^(He)$', 'PRP')])
# 使用nltk.MaltParser需要下载maltparser的jar包,并正确指定路径parser_dirname
# http://www.maltparser.org/download.html
# http://maltparser.org/dist/maltparser-1.9.1.tar.gz
# http://maltparser.org/dist/maltparser-1.9.1.zip
rc = nltk.DrtGlueReadingCommand(depparser=nltk.MaltParser(
parser_dirname="D:/nltk_data/maltparser-1.9.1", tagger=tagger))
dt = nltk.DiscourseTester(['Every dog chases a boy', 'He runs'], rc)
#dt.readings() # 出错
dt.readings(show_thread_readings=True, filter=True) # 出错
"""