BUILD 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. # Description:
  2. # A syntactic parser and part-of-speech tagger in TensorFlow.
  3. package(
  4. default_visibility = [
  5. "//visibility:private",
  6. ],
  7. features = ["-layering_check"],
  8. )
  9. licenses(["notice"]) # Apache 2.0
  10. load(
  11. "syntaxnet",
  12. "tf_proto_library",
  13. "tf_proto_library_py",
  14. "tf_gen_op_libs",
  15. "tf_gen_op_wrapper_py",
  16. )
  17. # proto libraries
  18. tf_proto_library(
  19. name = "feature_extractor_proto",
  20. srcs = ["feature_extractor.proto"],
  21. )
  22. tf_proto_library(
  23. name = "sentence_proto",
  24. srcs = ["sentence.proto"],
  25. )
  26. tf_proto_library_py(
  27. name = "sentence_py_pb2",
  28. srcs = ["sentence.proto"],
  29. )
  30. tf_proto_library(
  31. name = "dictionary_proto",
  32. srcs = ["dictionary.proto"],
  33. )
  34. tf_proto_library_py(
  35. name = "dictionary_py_pb2",
  36. srcs = ["dictionary.proto"],
  37. )
  38. tf_proto_library(
  39. name = "kbest_syntax_proto",
  40. srcs = ["kbest_syntax.proto"],
  41. deps = [":sentence_proto"],
  42. )
  43. tf_proto_library(
  44. name = "task_spec_proto",
  45. srcs = ["task_spec.proto"],
  46. )
  47. tf_proto_library_py(
  48. name = "task_spec_py_pb2",
  49. srcs = ["task_spec.proto"],
  50. )
  51. tf_proto_library(
  52. name = "sparse_proto",
  53. srcs = ["sparse.proto"],
  54. )
  55. tf_proto_library_py(
  56. name = "sparse_py_pb2",
  57. srcs = ["sparse.proto"],
  58. )
  59. # cc libraries for feature extraction and parsing
  60. cc_library(
  61. name = "base",
  62. hdrs = ["base.h"],
  63. visibility = ["//visibility:public"],
  64. deps = [
  65. "@re2//:re2",
  66. "@protobuf//:protobuf",
  67. "@org_tensorflow//third_party/eigen3",
  68. ] + select({
  69. "//conditions:default": [
  70. "@org_tensorflow//tensorflow/core:framework",
  71. "@org_tensorflow//tensorflow/core:lib",
  72. ],
  73. "@org_tensorflow//tensorflow:darwin": [
  74. "@org_tensorflow//tensorflow/core:framework_headers_lib",
  75. ],
  76. }),
  77. )
  78. cc_library(
  79. name = "utils",
  80. srcs = ["utils.cc"],
  81. hdrs = [
  82. "utils.h",
  83. ],
  84. deps = [
  85. ":base",
  86. "//util/utf8:unicodetext",
  87. ],
  88. )
  89. cc_library(
  90. name = "test_main",
  91. testonly = 1,
  92. srcs = ["test_main.cc"],
  93. linkopts = ["-lm"],
  94. deps = [
  95. "//external:gtest",
  96. "@org_tensorflow//tensorflow/core:lib",
  97. "@org_tensorflow//tensorflow/core:testlib",
  98. ],
  99. )
  100. cc_library(
  101. name = "document_format",
  102. srcs = ["document_format.cc"],
  103. hdrs = ["document_format.h"],
  104. deps = [
  105. ":registry",
  106. ":sentence_proto",
  107. ":task_context",
  108. ],
  109. )
  110. cc_library(
  111. name = "text_formats",
  112. srcs = ["text_formats.cc"],
  113. deps = [
  114. ":document_format",
  115. ":sentence_proto",
  116. ],
  117. alwayslink = 1,
  118. )
  119. cc_library(
  120. name = "fml_parser",
  121. srcs = ["fml_parser.cc"],
  122. hdrs = ["fml_parser.h"],
  123. deps = [
  124. ":feature_extractor_proto",
  125. ":utils",
  126. ],
  127. )
  128. cc_library(
  129. name = "proto_io",
  130. hdrs = ["proto_io.h"],
  131. deps = [
  132. ":feature_extractor_proto",
  133. ":fml_parser",
  134. ":sentence_proto",
  135. ":task_context",
  136. ],
  137. )
  138. cc_library(
  139. name = "char_properties",
  140. srcs = ["char_properties.cc"],
  141. hdrs = ["char_properties.h"],
  142. deps = [
  143. ":registry",
  144. ":utils",
  145. "//util/utf8:unicodetext",
  146. ],
  147. alwayslink = 1,
  148. )
  149. cc_library(
  150. name = "segmenter_utils",
  151. srcs = ["segmenter_utils.cc"],
  152. hdrs = ["segmenter_utils.h"],
  153. deps = [
  154. ":base",
  155. ":char_properties",
  156. ":sentence_proto",
  157. "//util/utf8:unicodetext",
  158. ],
  159. alwayslink = 1,
  160. )
  161. cc_library(
  162. name = "feature_extractor",
  163. srcs = ["feature_extractor.cc"],
  164. hdrs = [
  165. "feature_extractor.h",
  166. "feature_types.h",
  167. ],
  168. deps = [
  169. ":document_format",
  170. ":feature_extractor_proto",
  171. ":proto_io",
  172. ":sentence_proto",
  173. ":task_context",
  174. ":utils",
  175. ":workspace",
  176. ],
  177. )
  178. cc_library(
  179. name = "affix",
  180. srcs = ["affix.cc"],
  181. hdrs = ["affix.h"],
  182. deps = [
  183. ":dictionary_proto",
  184. ":feature_extractor",
  185. ":sentence_proto",
  186. ":shared_store",
  187. ":term_frequency_map",
  188. ":utils",
  189. ":workspace",
  190. ],
  191. )
  192. cc_library(
  193. name = "sentence_features",
  194. srcs = ["sentence_features.cc"],
  195. hdrs = ["sentence_features.h"],
  196. deps = [
  197. ":affix",
  198. ":feature_extractor",
  199. ":registry",
  200. ":segmenter_utils",
  201. ],
  202. )
  203. cc_library(
  204. name = "shared_store",
  205. srcs = ["shared_store.cc"],
  206. hdrs = ["shared_store.h"],
  207. deps = [
  208. ":utils",
  209. ],
  210. )
  211. cc_library(
  212. name = "registry",
  213. srcs = ["registry.cc"],
  214. hdrs = ["registry.h"],
  215. deps = [
  216. ":utils",
  217. ],
  218. )
  219. cc_library(
  220. name = "workspace",
  221. srcs = ["workspace.cc"],
  222. hdrs = ["workspace.h"],
  223. deps = [
  224. ":utils",
  225. ],
  226. )
  227. cc_library(
  228. name = "task_context",
  229. srcs = ["task_context.cc"],
  230. hdrs = ["task_context.h"],
  231. deps = [
  232. ":task_spec_proto",
  233. ":utils",
  234. ],
  235. )
  236. cc_library(
  237. name = "term_frequency_map",
  238. srcs = ["term_frequency_map.cc"],
  239. hdrs = ["term_frequency_map.h"],
  240. visibility = ["//visibility:public"],
  241. deps = [
  242. ":utils",
  243. ],
  244. alwayslink = 1,
  245. )
  246. cc_library(
  247. name = "morphology_label_set",
  248. srcs = ["morphology_label_set.cc"],
  249. hdrs = ["morphology_label_set.h"],
  250. deps = [
  251. ":document_format",
  252. ":feature_extractor",
  253. ":proto_io",
  254. ":registry",
  255. ":sentence_proto",
  256. ":utils",
  257. ],
  258. )
  259. cc_library(
  260. name = "parser_transitions",
  261. srcs = [
  262. "arc_standard_transitions.cc",
  263. "binary_segment_state.cc",
  264. "binary_segment_transitions.cc",
  265. "morpher_transitions.cc",
  266. "parser_features.cc",
  267. "parser_state.cc",
  268. "parser_transitions.cc",
  269. "tagger_transitions.cc",
  270. ],
  271. hdrs = [
  272. "binary_segment_state.h",
  273. "parser_features.h",
  274. "parser_state.h",
  275. "parser_transitions.h",
  276. ],
  277. deps = [
  278. ":affix",
  279. ":feature_extractor",
  280. ":kbest_syntax_proto",
  281. ":morphology_label_set",
  282. ":registry",
  283. ":segmenter_utils",
  284. ":sentence_features",
  285. ":sentence_proto",
  286. ":shared_store",
  287. ":task_context",
  288. ":term_frequency_map",
  289. ":workspace",
  290. ],
  291. alwayslink = 1,
  292. )
  293. cc_library(
  294. name = "populate_test_inputs",
  295. testonly = 1,
  296. srcs = ["populate_test_inputs.cc"],
  297. hdrs = ["populate_test_inputs.h"],
  298. deps = [
  299. ":dictionary_proto",
  300. ":sentence_proto",
  301. ":task_context",
  302. ":task_spec_proto",
  303. ":term_frequency_map",
  304. ":test_main",
  305. ],
  306. )
  307. cc_library(
  308. name = "embedding_feature_extractor",
  309. srcs = ["embedding_feature_extractor.cc"],
  310. hdrs = ["embedding_feature_extractor.h"],
  311. deps = [
  312. ":feature_extractor",
  313. ":parser_transitions",
  314. ":sparse_proto",
  315. ":task_context",
  316. ":workspace",
  317. ],
  318. )
  319. cc_library(
  320. name = "sentence_batch",
  321. srcs = ["sentence_batch.cc"],
  322. hdrs = ["sentence_batch.h"],
  323. deps = [
  324. ":embedding_feature_extractor",
  325. ":feature_extractor",
  326. ":parser_transitions",
  327. ":sentence_proto",
  328. ":sparse_proto",
  329. ":task_context",
  330. ":task_spec_proto",
  331. ":term_frequency_map",
  332. ":workspace",
  333. ],
  334. )
  335. cc_library(
  336. name = "reader_ops",
  337. srcs = [
  338. "beam_reader_ops.cc",
  339. "reader_ops.cc",
  340. ],
  341. deps = [
  342. ":parser_transitions",
  343. ":sentence_batch",
  344. ":sentence_proto",
  345. ":sparse_proto",
  346. ":task_context",
  347. ":task_spec_proto",
  348. ],
  349. alwayslink = 1,
  350. )
  351. cc_library(
  352. name = "document_filters",
  353. srcs = ["document_filters.cc"],
  354. deps = [
  355. ":document_format",
  356. ":parser_transitions",
  357. ":sentence_batch",
  358. ":sentence_proto",
  359. ":task_context",
  360. ":text_formats",
  361. ],
  362. alwayslink = 1,
  363. )
  364. cc_library(
  365. name = "lexicon_builder",
  366. srcs = ["lexicon_builder.cc"],
  367. deps = [
  368. ":dictionary_proto",
  369. ":document_format",
  370. ":parser_transitions",
  371. ":segmenter_utils",
  372. ":sentence_batch",
  373. ":sentence_proto",
  374. ":task_context",
  375. ":text_formats",
  376. ],
  377. alwayslink = 1,
  378. )
  379. cc_library(
  380. name = "unpack_sparse_features",
  381. srcs = ["unpack_sparse_features.cc"],
  382. deps = [
  383. ":sparse_proto",
  384. ":utils",
  385. ],
  386. alwayslink = 1,
  387. )
  388. cc_library(
  389. name = "parser_ops_cc",
  390. srcs = ["ops/parser_ops.cc"],
  391. deps = [
  392. ":base",
  393. ":document_filters",
  394. ":lexicon_builder",
  395. ":reader_ops",
  396. ":unpack_sparse_features",
  397. ],
  398. alwayslink = 1,
  399. )
  400. cc_binary(
  401. name = "parser_ops.so",
  402. linkopts = select({
  403. "//conditions:default": ["-lm"],
  404. "@org_tensorflow//tensorflow:darwin": [],
  405. }),
  406. linkshared = 1,
  407. linkstatic = 1,
  408. deps = [
  409. ":parser_ops_cc",
  410. ],
  411. )
  412. # cc tests
  413. filegroup(
  414. name = "testdata",
  415. srcs = [
  416. "testdata/context.pbtxt",
  417. "testdata/document",
  418. "testdata/mini-training-set",
  419. ],
  420. )
  421. filegroup(
  422. name = "parsey_data",
  423. srcs = glob(["models/parsey_mcparseface/*"]),
  424. )
  425. cc_test(
  426. name = "binary_segment_state_test",
  427. size = "small",
  428. srcs = ["binary_segment_state_test.cc"],
  429. deps = [
  430. ":base",
  431. ":parser_transitions",
  432. ":term_frequency_map",
  433. ":test_main",
  434. ],
  435. )
  436. cc_test(
  437. name = "shared_store_test",
  438. size = "small",
  439. srcs = ["shared_store_test.cc"],
  440. deps = [
  441. ":shared_store",
  442. ":test_main",
  443. ],
  444. )
  445. cc_test(
  446. name = "char_properties_test",
  447. srcs = ["char_properties_test.cc"],
  448. deps = [
  449. ":char_properties",
  450. ":test_main",
  451. ],
  452. )
  453. cc_test(
  454. name = "segmenter_utils_test",
  455. srcs = ["segmenter_utils_test.cc"],
  456. deps = [
  457. ":base",
  458. ":segmenter_utils",
  459. ":sentence_proto",
  460. ":test_main",
  461. ],
  462. )
  463. cc_test(
  464. name = "sentence_features_test",
  465. size = "medium",
  466. srcs = ["sentence_features_test.cc"],
  467. deps = [
  468. ":feature_extractor",
  469. ":populate_test_inputs",
  470. ":sentence_features",
  471. ":sentence_proto",
  472. ":task_context",
  473. ":task_spec_proto",
  474. ":term_frequency_map",
  475. ":test_main",
  476. ":workspace",
  477. ],
  478. )
  479. cc_test(
  480. name = "morphology_label_set_test",
  481. srcs = ["morphology_label_set_test.cc"],
  482. deps = [
  483. ":morphology_label_set",
  484. ":test_main",
  485. ],
  486. )
  487. cc_test(
  488. name = "arc_standard_transitions_test",
  489. size = "small",
  490. srcs = ["arc_standard_transitions_test.cc"],
  491. data = [":testdata"],
  492. deps = [
  493. ":parser_transitions",
  494. ":populate_test_inputs",
  495. ":sentence_proto",
  496. ":task_spec_proto",
  497. ":test_main",
  498. ],
  499. )
  500. cc_test(
  501. name = "binary_segment_transitions_test",
  502. size = "small",
  503. srcs = ["binary_segment_transitions_test.cc"],
  504. deps = [
  505. ":parser_transitions",
  506. ":sentence_proto",
  507. ":test_main",
  508. ],
  509. )
  510. cc_test(
  511. name = "tagger_transitions_test",
  512. size = "small",
  513. srcs = ["tagger_transitions_test.cc"],
  514. data = [":testdata"],
  515. deps = [
  516. ":parser_transitions",
  517. ":populate_test_inputs",
  518. ":sentence_proto",
  519. ":task_spec_proto",
  520. ":test_main",
  521. ],
  522. )
  523. cc_test(
  524. name = "parser_features_test",
  525. size = "small",
  526. srcs = ["parser_features_test.cc"],
  527. deps = [
  528. ":feature_extractor",
  529. ":parser_transitions",
  530. ":populate_test_inputs",
  531. ":sentence_proto",
  532. ":task_context",
  533. ":task_spec_proto",
  534. ":term_frequency_map",
  535. ":test_main",
  536. ":workspace",
  537. ],
  538. )
  539. # py graph builder and trainer
  540. tf_gen_op_libs(
  541. op_lib_names = ["parser_ops"],
  542. )
  543. tf_gen_op_wrapper_py(
  544. name = "parser_ops",
  545. deps = [":parser_ops_op_lib"],
  546. )
  547. py_library(
  548. name = "load_parser_ops_py",
  549. srcs = ["load_parser_ops.py"],
  550. data = [":parser_ops.so"],
  551. )
  552. py_library(
  553. name = "graph_builder",
  554. srcs = ["graph_builder.py"],
  555. deps = [
  556. ":load_parser_ops_py",
  557. ":parser_ops",
  558. "@org_tensorflow//tensorflow:tensorflow_py",
  559. "@org_tensorflow//tensorflow/core:protos_all_py",
  560. ],
  561. )
  562. py_library(
  563. name = "structured_graph_builder",
  564. srcs = ["structured_graph_builder.py"],
  565. deps = [
  566. ":graph_builder",
  567. ],
  568. )
  569. py_binary(
  570. name = "parser_trainer",
  571. srcs = ["parser_trainer.py"],
  572. deps = [
  573. ":graph_builder",
  574. ":structured_graph_builder",
  575. ":task_spec_py_pb2",
  576. ],
  577. )
  578. py_binary(
  579. name = "parser_eval",
  580. srcs = ["parser_eval.py"],
  581. deps = [
  582. ":graph_builder",
  583. ":sentence_py_pb2",
  584. ":structured_graph_builder",
  585. ],
  586. )
  587. py_binary(
  588. name = "conll2tree",
  589. srcs = ["conll2tree.py"],
  590. deps = [
  591. ":graph_builder",
  592. ":sentence_py_pb2",
  593. ],
  594. )
  595. # py tests
  596. py_test(
  597. name = "lexicon_builder_test",
  598. size = "small",
  599. srcs = ["lexicon_builder_test.py"],
  600. deps = [
  601. ":graph_builder",
  602. ":sentence_py_pb2",
  603. ":task_spec_py_pb2",
  604. ],
  605. )
  606. py_test(
  607. name = "text_formats_test",
  608. size = "small",
  609. srcs = ["text_formats_test.py"],
  610. deps = [
  611. ":graph_builder",
  612. ":sentence_py_pb2",
  613. ":task_spec_py_pb2",
  614. ],
  615. )
  616. py_test(
  617. name = "reader_ops_test",
  618. size = "medium",
  619. srcs = ["reader_ops_test.py"],
  620. data = [":testdata"],
  621. tags = ["notsan"],
  622. deps = [
  623. ":dictionary_py_pb2",
  624. ":graph_builder",
  625. ":sparse_py_pb2",
  626. ],
  627. )
  628. py_test(
  629. name = "beam_reader_ops_test",
  630. size = "medium",
  631. srcs = ["beam_reader_ops_test.py"],
  632. data = [":testdata"],
  633. tags = ["notsan"],
  634. deps = [
  635. ":structured_graph_builder",
  636. ],
  637. )
  638. py_test(
  639. name = "graph_builder_test",
  640. size = "medium",
  641. srcs = ["graph_builder_test.py"],
  642. data = [
  643. ":testdata",
  644. ],
  645. tags = ["notsan"],
  646. deps = [
  647. ":graph_builder",
  648. ":sparse_py_pb2",
  649. ],
  650. )
  651. sh_test(
  652. name = "parser_trainer_test",
  653. size = "large",
  654. srcs = ["parser_trainer_test.sh"],
  655. data = [
  656. ":parser_eval",
  657. ":parser_trainer",
  658. ":testdata",
  659. ],
  660. tags = ["notsan"],
  661. )