neo4j Cypher语句(CQL)练习

时间:2022-01-09 23:00:46

CQL虽然看起来挺容易懂,实际上还是挺难写的,跟SQL的直观完全不能比较,其复杂度的来源可能是图的结构引起的,而非本身语言设计的问题。当然,不可否认,neo4j中CQL的设计还是略微的有一些不足。本次的练习是基于《Graph Databases(2013)》的第5章,确切说是从p105开始的3个例子。

这里提到了3个现实的图数据库模型,分别是社交关系模型(Social Networks),权限控制模型(Access Control)和物流模型(Logistics)。具体的模型应用的背景,可以去参考书中的描述,书可以去官网下载电子版。

社交关系模型的图结构如下,

neo4j Cypher语句(CQL)练习

权限控制模型的图结构如下,

neo4j Cypher语句(CQL)练习

物流模型的图结构如下,

neo4j Cypher语句(CQL)练习

其中,物流模型中不同颜色的边表示不同的时间段,时间段估计是用unix time_t类型记录的一个长整数,如下图

neo4j Cypher语句(CQL)练习

首先要先建立3个图数据库的基本数据,这里使用CQL中的CREATE即可。

社交网络模型的建图语句如下

CREATE 
// User
(uc:User {name : 'Charlie'}), 
(ub:User {name : 'Ben'}), 
(us:User {name : 'Sarah'}), 
(ua:User {name : 'Arnold'}), 
(ue:User {name : 'Emily'}), 
(ug:User {name : 'Gordon'}), 
(uk:User {name : 'Kate'}),
// Company
(ca:Company {name : 'Acme, Inc.'}), 
(cs:Company {name : 'Startup, Ltd.'}),
// Interest
(im:Interest {name : 'Medicine'}), 
(ic:Interest {name : 'Cars'}), 
(ir:Interest {name : 'REST'}), 
(ig:Interest {name : 'Graphs'}), 
(ij:Interest {name : 'Java'}), 
(it:Interest {name : 'Travel'}), 
(id:Interest {name : 'Design'}), 
(ia:Interest {name : 'Art'}), 
(im1:Interest {name : 'Music'}), 
(id1:Interest {name : 'Drama'}),
// Project
(pn:Project {name : 'Next Gen Platform'}), 
(pq:Project {name : 'Quantum Leap'}), 
(pp:Project {name : 'Phoenix'}),
// WORKS_FOR
(uc)-[:WORKS_FOR]->(ca),
(ub)-[:WORKS_FOR]->(ca),
(us)-[:WORKS_FOR]->(ca),
(ua)-[:WORKS_FOR]->(cs),
(ue)-[:WORKS_FOR]->(cs),
(ug)-[:WORKS_FOR]->(cs),
(uk)-[:WORKS_FOR]->(cs),
// WORKED_ON
(uc)-[:WORKED_ON]->(pn),
(ub)-[:WORKED_ON]->(pn),
(us)-[:WORKED_ON]->(pn),
(us)-[:WORKED_ON]->(pq),
(ua)-[:WORKED_ON]->(pp),
(ue)-[:WORKED_ON]->(pn),
(ue)-[:WORKED_ON]->(pq),
(uk)-[:WORKED_ON]->(pq),
(uk)-[:WORKED_ON]->(pp),
// INTERESTED_IN
(uc)-[:INTERESTED_IN]->(im),
(uc)-[:INTERESTED_IN]->(ic),
(uc)-[:INTERESTED_IN]->(ig),
(ub)-[:INTERESTED_IN]->(ir),
(ub)-[:INTERESTED_IN]->(ig),
(us)-[:INTERESTED_IN]->(ir),
(us)-[:INTERESTED_IN]->(ig),
(us)-[:INTERESTED_IN]->(ij),
(ua)-[:INTERESTED_IN]->(ir),
(ua)-[:INTERESTED_IN]->(ig),
(ua)-[:INTERESTED_IN]->(ij),
(ua)-[:INTERESTED_IN]->(it),
(ue)-[:INTERESTED_IN]->(id),
(ue)-[:INTERESTED_IN]->(ia),
(ug)-[:INTERESTED_IN]->(ig),
(ug)-[:INTERESTED_IN]->(im1),
(uk)-[:INTERESTED_IN]->(im1),
(uk)-[:INTERESTED_IN]->(id1)
;

CREATE INDEX ON :User(name);
CREATE INDEX ON :Company(name);
CREATE INDEX ON :Interest(name);
CREATE INDEX ON :Project(name);

权限控制模型的建图语句如下

CREATE 
// Admin
(ab:Admin {name : 'Ben'}), 
(as1:Admin {name : 'Sarah'}), 
(al:Admin {name : 'Liz'}), 
(ap:Admin {name : 'Phil'}), 
// Group
(g1:Group {name : 'Group1'}), 
(g2:Group {name : 'Group2'}),
(g3:Group {name : 'Group3'}),
(g4:Group {name : 'Group4'}),
(g5:Group {name : 'Group5'}),
(g6:Group {name : 'Group6'}),
(g7:Group {name : 'Group7'}),
// Company
(ca:Company {name : 'Acme'}), 
(cs:Company {name : 'Spinoff'}), 
(cs1:Company {name : 'Startup'}), 
(cs2:Company {name : 'Skunk-workz'}), 
(cb:Company {name : 'Big Co'}), 
(ca1:Company {name : 'Aquired Ltd.'}), 
(cs3:Company {name : 'Subsid\'ry'}), 
(co:Company {name : 'One-Man Shop'}), 
(cd:Company {name : 'Dev Shop'}), 
// Employee
(ea:Employee {name : 'Arnold'}), 
(ec:Employee {name : 'Charlie'}), 
(ee:Employee {name : 'Emily'}),
(eg:Employee {name : 'Gordon'}),
(el:Employee {name : 'Lucy'}),
(ek:Employee {name : 'Kate'}),
(ea1:Employee {name : 'Alister'}),
(ee1:Employee {name : 'Eve'}),
(eg1:Employee {name : 'Gary'}),
(eb:Employee {name : 'Bill'}),
(em:Employee {name : 'Mary'}),
// Account
(n1:Account {name : 'Account1'}), 
(n2:Account {name : 'Account2'}), 
(n3:Account {name : 'Account3'}), 
(n4:Account {name : 'Account4'}), 
(n5:Account {name : 'Account5'}), 
(n6:Account {name : 'Account6'}), 
(n7:Account {name : 'Account7'}), 
(n8:Account {name : 'Account8'}), 
(n9:Account {name : 'Account9'}), 
(n10:Account {name : 'Account10'}), 
(n11:Account {name : 'Account11'}), 
(n12:Account {name : 'Account12'}), 
// MEMBER_OF
(ab)-[:MEMBER_OF]->(g1),
(ab)-[:MEMBER_OF]->(g3),
(as1)-[:MEMBER_OF]->(g2),
(as1)-[:MEMBER_OF]->(g3),
(al)-[:MEMBER_OF]->(g4),
(al)-[:MEMBER_OF]->(g5),
(al)-[:MEMBER_OF]->(g6),
(ap)-[:MEMBER_OF]->(g7),
// ALLOWED_INHERIT
(g1)-[:ALLOWED_INHERIT]->(ca),
(g3)-[:ALLOWED_INHERIT]->(cs1),
(g4)-[:ALLOWED_INHERIT]->(cb),
(g7)-[:ALLOWED_INHERIT]->(cs3),
// ALLOWED_DO_NOT_INHERIT
(g2)-[:ALLOWED_DO_NOT_INHERIT]->(ca),
(g6)-[:ALLOWED_DO_NOT_INHERIT]->(co),
// DENIED
(g2)-[:DENIED]->(cs2),
(g5)-[:DENIED]->(ca1),
// CHILD_OF
(cs)-[:CHILD_OF]->(ca),
(cs2)-[:CHILD_OF]->(cs1),
(ca1)-[:CHILD_OF]->(cb),
(cs3)-[:CHILD_OF]->(ca1),
(co)-[:CHILD_OF]->(cs3),
(cd)-[:CHILD_OF]->(cs3),
// WORKS_FOR
(ea)-[:WORKS_FOR]->(ca),
(ec)-[:WORKS_FOR]->(ca),
(ee)-[:WORKS_FOR]->(cs),
(eg)-[:WORKS_FOR]->(cs1),
(el)-[:WORKS_FOR]->(cs1),
(ek)-[:WORKS_FOR]->(cs2),
(ea1)-[:WORKS_FOR]->(cb),
(ee1)-[:WORKS_FOR]->(ca1),
(eg1)-[:WORKS_FOR]->(cs3),
(eb)-[:WORKS_FOR]->(co),
(em)-[:WORKS_FOR]->(cd),
// HAS_ACCOUNT
(ea)-[:HAS_ACCOUNT]->(n1),
(ea)-[:HAS_ACCOUNT]->(n2),
(ec)-[:HAS_ACCOUNT]->(n3),
(eg)-[:HAS_ACCOUNT]->(n4),
(el)-[:HAS_ACCOUNT]->(n5),
(ee)-[:HAS_ACCOUNT]->(n6),
(ek)-[:HAS_ACCOUNT]->(n7),
(ea1)-[:HAS_ACCOUNT]->(n8),
(ee1)-[:HAS_ACCOUNT]->(n9),
(eb)-[:HAS_ACCOUNT]->(n10),
(eg1)-[:HAS_ACCOUNT]->(n11),
(em)-[:HAS_ACCOUNT]->(n12)
; 

物流模型的建图语句如下,

CREATE 
// Parcel Center
(pc1:Center {name : 'Parcel Center-1'}), 
(pc2:Center {name : 'Parcel Center-2'}), 
// Delivery Base
(db1:Base {name : 'Delivery Base1'}), 
(db2:Base {name : 'Delivery Base2'}),
(db3:Base {name : 'Delivery Base3'}),
// Delivery Area
(da1:Area {name : 'Delivery Area1'}), 
(da2:Area {name : 'Delivery Area2'}),
(da3:Area {name : 'Delivery Area3'}),
(da4:Area {name : 'Delivery Area4'}), 
// Delivery Segment
(ds1:Segment {name : 'Delivery Segment1'}), 
(ds2:Segment {name : 'Delivery Segment2'}),
(ds3:Segment {name : 'Delivery Segment3'}),
(ds4:Segment {name : 'Delivery Segment4'}), 
(ds5:Segment {name : 'Delivery Segment5'}), 
(ds6:Segment {name : 'Delivery Segment6'}),
(ds7:Segment {name : 'Delivery Segment7'}),
(ds8:Segment {name : 'Delivery Segment8'}), 
// CONNECTED_TO
(pc1)-[:CONNECTED_TO {cost:3, start_date:1350255600000, end_date:1350860400000}]->(db1),
(pc1)-[:CONNECTED_TO {cost:2, start_date:1350860400000, end_date:1351465200000}]->(db1),
(pc1)-[:CONNECTED_TO {cost:6, start_date:1351465200000, end_date:1352070000000}]->(db1),

(pc1)-[:CONNECTED_TO {cost:3, start_date:1350255600000, end_date:1350860400000}]->(db2),
(pc1)-[:CONNECTED_TO {cost:2, start_date:1350860400000, end_date:1351465200000}]->(db2),
(pc1)-[:CONNECTED_TO {cost:6, start_date:1351465200000, end_date:1352070000000}]->(db2),

(pc1)-[:CONNECTED_TO {cost:6, start_date:1351465200000, end_date:1352070000000}]->(db3),

(pc2)-[:CONNECTED_TO {cost:5, start_date:1350860400000, end_date:1351465200000}]->(db1),

(pc2)-[:CONNECTED_TO {cost:3, start_date:1350255600000, end_date:1350860400000}]->(db2),
(pc2)-[:CONNECTED_TO {cost:2, start_date:1350860400000, end_date:1351465200000}]->(db2),
(pc2)-[:CONNECTED_TO {cost:6, start_date:1351465200000, end_date:1352070000000}]->(db2),

(pc2)-[:CONNECTED_TO {cost:3, start_date:1350255600000, end_date:1350860400000}]->(db3),
(pc2)-[:CONNECTED_TO {cost:2, start_date:1350860400000, end_date:1351465200000}]->(db3),

// DELIVERY_ROUTE
(db1)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(da1),
(db1)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(da1),
(db1)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(da1),
(db1)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(da4),

(db2)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(da4),
(db2)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(da4),
(db2)-[:DELIVERY_ROUTE {cost:5, start_date:1350255600000, end_date:1350860400000}]->(da3),
(db2)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(da2),

(db3)-[:DELIVERY_ROUTE {cost:5, start_date:1350860400000, end_date:1351465200000}]->(da3),
(db3)-[:DELIVERY_ROUTE {cost:5, start_date:1351465200000, end_date:1352070000000}]->(da3),
(db3)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(da2),
(db3)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(da2),

(da1)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds1),
(da1)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds1),
(da1)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds2),
(da1)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds2),
(da1)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds7),
(da1)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds8),

(da4)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds1),
(da4)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds2),
(da4)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds7),
(da4)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds7),
(da4)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds8),
(da4)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds8),
(da4)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds5),

(da3)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds5),
(da3)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds5),
(da3)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds6),
(da3)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds6),
(da3)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds6),
(da3)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds3),
(da3)-[:DELIVERY_ROUTE {cost:6, start_date:1351465200000, end_date:1352070000000}]->(ds4),

(da2)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds3),
(da2)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds3),
(da2)-[:DELIVERY_ROUTE {cost:3, start_date:1350255600000, end_date:1350860400000}]->(ds4),
(da2)-[:DELIVERY_ROUTE {cost:2, start_date:1350860400000, end_date:1351465200000}]->(ds4)
; 

以上建图语句中,只有社交网络模型(第一个)使用了索引,其他的没有加进去。neo4j的索引有一些坑,我用的是2.0.2版本,其中提供了2种索引,一种叫做index,一种叫做legacy index。两种的差异并不是一个取代另一个,而是有特定场景的。index可以用于match语句中,但是start语句中只可以支持legacy index,文档中有一段很不起眼的话“In general, the START clause is only really needed when using legacy indexes”。还有,index可以很方便的通过create index创建,但是legacy index就悲剧了,从文档的例子来看,只能通过写Java代码来创建。建立index应该是属于DDL语句,需要由DBA来操作,这样看来,index应该是neo4j需要加强的地方,把legacy index的很多特性逐步的迁移过来。

例子1,在社交网络模型场景中,列出所有对Java感兴趣的人名

MATCH (n:User), (i:Interest)
WHERE (n)-[:INTERESTED_IN]-> (i)
AND   i.name = 'Java'
RETURN n.name;

结果是Sarah,Arnold。

例子2,在权限控制模型中,列出所有Sarah无权管理的用户(人名)

MATCH paths=(admin:Admin)-[:MEMBER_OF]->()-[:DENIED]->()<-[:CHILD_OF*0..3]-(company)
      <-[:WORKS_FOR]-(employee)
WHERE admin.name = 'Sarah'
RETURN employee.name
UNION
MATCH (admin:Admin),paths=(company:Company)<-[:WORKS_FOR]-(employee:Employee)
where (NOT ((admin)-[:MEMBER_OF]->()-[:ALLOWED_INHERIT]->()
<-[:CHILD_OF*0..3]-(company)))
and   (NOT ((admin)-[:MEMBER_OF]->()-[:ALLOWED_DO_NOT_INHERIT]->(company)))
and   admin.name = 'Sarah'
RETURN distinct employee.name;

结果有7个人,不列举了。这个CQL的写法是,先列出Sarah被Denied的账户有哪些,然后还有Sarah不能访问到的账户有哪些,取并集即可。