BUILD 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. # Description:
  2. # A syntactic parser and part-of-speech tagger in TensorFlow.
  3. package(
  4. default_visibility = [
  5. "//visibility:private",
  6. ],
  7. features = ["-layering_check"],
  8. )
  9. licenses(["notice"]) # Apache 2.0
  10. load(
  11. "syntaxnet",
  12. "tf_proto_library",
  13. "tf_proto_library_py",
  14. "tf_gen_op_libs",
  15. "tf_gen_op_wrapper_py",
  16. )
  17. # proto libraries
  18. tf_proto_library(
  19. name = "feature_extractor_proto",
  20. srcs = ["feature_extractor.proto"],
  21. )
  22. tf_proto_library(
  23. name = "sentence_proto",
  24. srcs = ["sentence.proto"],
  25. )
  26. tf_proto_library_py(
  27. name = "sentence_py_pb2",
  28. srcs = ["sentence.proto"],
  29. )
  30. tf_proto_library(
  31. name = "dictionary_proto",
  32. srcs = ["dictionary.proto"],
  33. )
  34. tf_proto_library_py(
  35. name = "dictionary_py_pb2",
  36. srcs = ["dictionary.proto"],
  37. )
  38. tf_proto_library(
  39. name = "kbest_syntax_proto",
  40. srcs = ["kbest_syntax.proto"],
  41. deps = [":sentence_proto"],
  42. )
  43. tf_proto_library(
  44. name = "task_spec_proto",
  45. srcs = ["task_spec.proto"],
  46. )
  47. tf_proto_library_py(
  48. name = "task_spec_py_pb2",
  49. srcs = ["task_spec.proto"],
  50. )
  51. tf_proto_library(
  52. name = "sparse_proto",
  53. srcs = ["sparse.proto"],
  54. )
  55. tf_proto_library_py(
  56. name = "sparse_py_pb2",
  57. srcs = ["sparse.proto"],
  58. )
  59. # cc libraries for feature extraction and parsing
  60. cc_library(
  61. name = "base",
  62. hdrs = ["base.h"],
  63. visibility = ["//visibility:public"],
  64. deps = [
  65. "@com_googlesource_code_re2//:re2",
  66. "@protobuf//:protobuf",
  67. "@org_tensorflow//third_party/eigen3",
  68. ] + select({
  69. "//conditions:default": [
  70. "@org_tensorflow//tensorflow/core:framework",
  71. "@org_tensorflow//tensorflow/core:lib",
  72. ],
  73. "@org_tensorflow//tensorflow:darwin": [
  74. "@org_tensorflow//tensorflow/core:framework_headers_lib",
  75. ],
  76. }),
  77. )
  78. cc_library(
  79. name = "utils",
  80. srcs = ["utils.cc"],
  81. hdrs = [
  82. "utils.h",
  83. ],
  84. deps = [
  85. ":base",
  86. "//util/utf8:unicodetext",
  87. ],
  88. )
  89. cc_library(
  90. name = "test_main",
  91. testonly = 1,
  92. srcs = ["test_main.cc"],
  93. linkopts = ["-lm"],
  94. deps = [
  95. "//external:gtest",
  96. "@org_tensorflow//tensorflow/core:lib",
  97. "@org_tensorflow//tensorflow/core:testlib",
  98. ],
  99. )
  100. cc_library(
  101. name = "document_format",
  102. srcs = ["document_format.cc"],
  103. hdrs = ["document_format.h"],
  104. deps = [
  105. ":registry",
  106. ":sentence_proto",
  107. ":task_context",
  108. ],
  109. )
  110. cc_library(
  111. name = "text_formats",
  112. srcs = ["text_formats.cc"],
  113. deps = [
  114. ":document_format",
  115. ":segmenter_utils",
  116. ":sentence_proto",
  117. ],
  118. alwayslink = 1,
  119. )
  120. cc_library(
  121. name = "fml_parser",
  122. srcs = ["fml_parser.cc"],
  123. hdrs = ["fml_parser.h"],
  124. deps = [
  125. ":feature_extractor_proto",
  126. ":utils",
  127. ],
  128. )
  129. cc_library(
  130. name = "proto_io",
  131. hdrs = ["proto_io.h"],
  132. deps = [
  133. ":feature_extractor_proto",
  134. ":fml_parser",
  135. ":sentence_proto",
  136. ":task_context",
  137. ],
  138. )
  139. cc_library(
  140. name = "char_properties",
  141. srcs = ["char_properties.cc"],
  142. hdrs = ["char_properties.h"],
  143. deps = [
  144. ":registry",
  145. ":utils",
  146. "//util/utf8:unicodetext",
  147. ],
  148. alwayslink = 1,
  149. )
  150. cc_library(
  151. name = "segmenter_utils",
  152. srcs = ["segmenter_utils.cc"],
  153. hdrs = ["segmenter_utils.h"],
  154. deps = [
  155. ":base",
  156. ":char_properties",
  157. ":sentence_proto",
  158. "//util/utf8:unicodetext",
  159. ],
  160. alwayslink = 1,
  161. )
  162. cc_library(
  163. name = "feature_extractor",
  164. srcs = ["feature_extractor.cc"],
  165. hdrs = [
  166. "feature_extractor.h",
  167. "feature_types.h",
  168. ],
  169. deps = [
  170. ":document_format",
  171. ":feature_extractor_proto",
  172. ":proto_io",
  173. ":sentence_proto",
  174. ":task_context",
  175. ":utils",
  176. ":workspace",
  177. ],
  178. )
  179. cc_library(
  180. name = "affix",
  181. srcs = ["affix.cc"],
  182. hdrs = ["affix.h"],
  183. deps = [
  184. ":dictionary_proto",
  185. ":feature_extractor",
  186. ":sentence_proto",
  187. ":shared_store",
  188. ":term_frequency_map",
  189. ":utils",
  190. ":workspace",
  191. ],
  192. )
  193. cc_library(
  194. name = "sentence_features",
  195. srcs = ["sentence_features.cc"],
  196. hdrs = ["sentence_features.h"],
  197. deps = [
  198. ":affix",
  199. ":feature_extractor",
  200. ":registry",
  201. ":segmenter_utils",
  202. ],
  203. )
  204. cc_library(
  205. name = "shared_store",
  206. srcs = ["shared_store.cc"],
  207. hdrs = ["shared_store.h"],
  208. deps = [
  209. ":utils",
  210. ],
  211. )
  212. cc_library(
  213. name = "registry",
  214. srcs = ["registry.cc"],
  215. hdrs = ["registry.h"],
  216. deps = [
  217. ":utils",
  218. ],
  219. )
  220. cc_library(
  221. name = "workspace",
  222. srcs = ["workspace.cc"],
  223. hdrs = ["workspace.h"],
  224. deps = [
  225. ":utils",
  226. ],
  227. )
  228. cc_library(
  229. name = "task_context",
  230. srcs = ["task_context.cc"],
  231. hdrs = ["task_context.h"],
  232. deps = [
  233. ":task_spec_proto",
  234. ":utils",
  235. ],
  236. )
  237. cc_library(
  238. name = "term_frequency_map",
  239. srcs = ["term_frequency_map.cc"],
  240. hdrs = ["term_frequency_map.h"],
  241. visibility = ["//visibility:public"],
  242. deps = [
  243. ":utils",
  244. ],
  245. alwayslink = 1,
  246. )
  247. cc_library(
  248. name = "morphology_label_set",
  249. srcs = ["morphology_label_set.cc"],
  250. hdrs = ["morphology_label_set.h"],
  251. deps = [
  252. ":document_format",
  253. ":feature_extractor",
  254. ":proto_io",
  255. ":registry",
  256. ":sentence_proto",
  257. ":utils",
  258. ],
  259. )
  260. cc_library(
  261. name = "parser_transitions",
  262. srcs = [
  263. "arc_standard_transitions.cc",
  264. "binary_segment_state.cc",
  265. "binary_segment_transitions.cc",
  266. "morpher_transitions.cc",
  267. "parser_features.cc",
  268. "parser_state.cc",
  269. "parser_transitions.cc",
  270. "tagger_transitions.cc",
  271. ],
  272. hdrs = [
  273. "binary_segment_state.h",
  274. "parser_features.h",
  275. "parser_state.h",
  276. "parser_transitions.h",
  277. ],
  278. deps = [
  279. ":affix",
  280. ":feature_extractor",
  281. ":kbest_syntax_proto",
  282. ":morphology_label_set",
  283. ":registry",
  284. ":segmenter_utils",
  285. ":sentence_features",
  286. ":sentence_proto",
  287. ":shared_store",
  288. ":task_context",
  289. ":term_frequency_map",
  290. ":workspace",
  291. ],
  292. alwayslink = 1,
  293. )
  294. cc_library(
  295. name = "populate_test_inputs",
  296. testonly = 1,
  297. srcs = ["populate_test_inputs.cc"],
  298. hdrs = ["populate_test_inputs.h"],
  299. deps = [
  300. ":dictionary_proto",
  301. ":sentence_proto",
  302. ":task_context",
  303. ":task_spec_proto",
  304. ":term_frequency_map",
  305. ":test_main",
  306. ],
  307. )
  308. cc_library(
  309. name = "embedding_feature_extractor",
  310. srcs = ["embedding_feature_extractor.cc"],
  311. hdrs = ["embedding_feature_extractor.h"],
  312. deps = [
  313. ":feature_extractor",
  314. ":parser_transitions",
  315. ":sparse_proto",
  316. ":task_context",
  317. ":workspace",
  318. ],
  319. )
  320. cc_library(
  321. name = "sentence_batch",
  322. srcs = ["sentence_batch.cc"],
  323. hdrs = ["sentence_batch.h"],
  324. deps = [
  325. ":embedding_feature_extractor",
  326. ":feature_extractor",
  327. ":parser_transitions",
  328. ":sentence_proto",
  329. ":sparse_proto",
  330. ":task_context",
  331. ":task_spec_proto",
  332. ":term_frequency_map",
  333. ":workspace",
  334. ],
  335. )
  336. cc_library(
  337. name = "reader_ops",
  338. srcs = [
  339. "beam_reader_ops.cc",
  340. "reader_ops.cc",
  341. ],
  342. deps = [
  343. ":parser_transitions",
  344. ":sentence_batch",
  345. ":sentence_proto",
  346. ":sparse_proto",
  347. ":task_context",
  348. ":task_spec_proto",
  349. ],
  350. alwayslink = 1,
  351. )
  352. cc_library(
  353. name = "document_filters",
  354. srcs = ["document_filters.cc"],
  355. deps = [
  356. ":document_format",
  357. ":parser_transitions",
  358. ":sentence_batch",
  359. ":sentence_proto",
  360. ":task_context",
  361. ":text_formats",
  362. ],
  363. alwayslink = 1,
  364. )
  365. cc_library(
  366. name = "lexicon_builder",
  367. srcs = ["lexicon_builder.cc"],
  368. deps = [
  369. ":dictionary_proto",
  370. ":document_format",
  371. ":parser_transitions",
  372. ":segmenter_utils",
  373. ":sentence_batch",
  374. ":sentence_proto",
  375. ":task_context",
  376. ":text_formats",
  377. ],
  378. alwayslink = 1,
  379. )
  380. cc_library(
  381. name = "unpack_sparse_features",
  382. srcs = ["unpack_sparse_features.cc"],
  383. deps = [
  384. ":sparse_proto",
  385. ":utils",
  386. ],
  387. alwayslink = 1,
  388. )
  389. cc_library(
  390. name = "parser_ops_cc",
  391. srcs = ["ops/parser_ops.cc"],
  392. deps = [
  393. ":base",
  394. ":document_filters",
  395. ":lexicon_builder",
  396. ":reader_ops",
  397. ":unpack_sparse_features",
  398. ],
  399. alwayslink = 1,
  400. )
  401. cc_binary(
  402. name = "parser_ops.so",
  403. linkopts = select({
  404. "//conditions:default": ["-lm"],
  405. "@org_tensorflow//tensorflow:darwin": [],
  406. }),
  407. linkshared = 1,
  408. linkstatic = 1,
  409. deps = [
  410. ":parser_ops_cc",
  411. ],
  412. )
  413. # cc tests
  414. filegroup(
  415. name = "testdata",
  416. srcs = [
  417. "testdata/context.pbtxt",
  418. "testdata/document",
  419. "testdata/mini-training-set",
  420. ],
  421. )
  422. filegroup(
  423. name = "parsey_data",
  424. srcs = glob(["models/parsey_mcparseface/*"]),
  425. )
  426. cc_test(
  427. name = "binary_segment_state_test",
  428. size = "small",
  429. srcs = ["binary_segment_state_test.cc"],
  430. deps = [
  431. ":base",
  432. ":parser_transitions",
  433. ":term_frequency_map",
  434. ":test_main",
  435. ],
  436. )
  437. cc_test(
  438. name = "shared_store_test",
  439. size = "small",
  440. srcs = ["shared_store_test.cc"],
  441. deps = [
  442. ":shared_store",
  443. ":test_main",
  444. ],
  445. )
  446. cc_test(
  447. name = "char_properties_test",
  448. srcs = ["char_properties_test.cc"],
  449. deps = [
  450. ":char_properties",
  451. ":test_main",
  452. ],
  453. )
  454. cc_test(
  455. name = "segmenter_utils_test",
  456. srcs = ["segmenter_utils_test.cc"],
  457. deps = [
  458. ":base",
  459. ":segmenter_utils",
  460. ":sentence_proto",
  461. ":test_main",
  462. ],
  463. )
  464. cc_test(
  465. name = "sentence_features_test",
  466. size = "medium",
  467. srcs = ["sentence_features_test.cc"],
  468. deps = [
  469. ":feature_extractor",
  470. ":populate_test_inputs",
  471. ":sentence_features",
  472. ":sentence_proto",
  473. ":task_context",
  474. ":task_spec_proto",
  475. ":term_frequency_map",
  476. ":test_main",
  477. ":workspace",
  478. ],
  479. )
  480. cc_test(
  481. name = "morphology_label_set_test",
  482. srcs = ["morphology_label_set_test.cc"],
  483. deps = [
  484. ":morphology_label_set",
  485. ":test_main",
  486. ],
  487. )
  488. cc_test(
  489. name = "arc_standard_transitions_test",
  490. size = "small",
  491. srcs = ["arc_standard_transitions_test.cc"],
  492. data = [":testdata"],
  493. deps = [
  494. ":parser_transitions",
  495. ":populate_test_inputs",
  496. ":sentence_proto",
  497. ":task_spec_proto",
  498. ":test_main",
  499. ],
  500. )
  501. cc_test(
  502. name = "binary_segment_transitions_test",
  503. size = "small",
  504. srcs = ["binary_segment_transitions_test.cc"],
  505. deps = [
  506. ":parser_transitions",
  507. ":sentence_proto",
  508. ":task_context",
  509. ":test_main",
  510. ":workspace",
  511. ],
  512. )
  513. cc_test(
  514. name = "tagger_transitions_test",
  515. size = "small",
  516. srcs = ["tagger_transitions_test.cc"],
  517. data = [":testdata"],
  518. deps = [
  519. ":parser_transitions",
  520. ":populate_test_inputs",
  521. ":sentence_proto",
  522. ":task_spec_proto",
  523. ":test_main",
  524. ],
  525. )
  526. cc_test(
  527. name = "parser_features_test",
  528. size = "small",
  529. srcs = ["parser_features_test.cc"],
  530. deps = [
  531. ":feature_extractor",
  532. ":parser_transitions",
  533. ":populate_test_inputs",
  534. ":sentence_proto",
  535. ":task_context",
  536. ":task_spec_proto",
  537. ":term_frequency_map",
  538. ":test_main",
  539. ":workspace",
  540. ],
  541. )
  542. # py graph builder and trainer
  543. tf_gen_op_libs(
  544. op_lib_names = ["parser_ops"],
  545. )
  546. tf_gen_op_wrapper_py(
  547. name = "parser_ops",
  548. deps = [":parser_ops_op_lib"],
  549. )
  550. py_library(
  551. name = "load_parser_ops_py",
  552. srcs = ["load_parser_ops.py"],
  553. data = [":parser_ops.so"],
  554. )
  555. py_library(
  556. name = "graph_builder",
  557. srcs = ["graph_builder.py"],
  558. deps = [
  559. ":load_parser_ops_py",
  560. ":parser_ops",
  561. "@org_tensorflow//tensorflow:tensorflow_py",
  562. "@org_tensorflow//tensorflow/core:protos_all_py",
  563. ],
  564. )
  565. py_library(
  566. name = "structured_graph_builder",
  567. srcs = ["structured_graph_builder.py"],
  568. deps = [
  569. ":graph_builder",
  570. ],
  571. )
  572. py_binary(
  573. name = "parser_trainer",
  574. srcs = ["parser_trainer.py"],
  575. deps = [
  576. ":graph_builder",
  577. ":structured_graph_builder",
  578. ":task_spec_py_pb2",
  579. ],
  580. )
  581. py_binary(
  582. name = "parser_eval",
  583. srcs = ["parser_eval.py"],
  584. deps = [
  585. ":graph_builder",
  586. ":sentence_py_pb2",
  587. ":structured_graph_builder",
  588. ":task_spec_py_pb2",
  589. ],
  590. )
  591. py_binary(
  592. name = "conll2tree",
  593. srcs = ["conll2tree.py"],
  594. deps = [
  595. ":graph_builder",
  596. ":sentence_py_pb2",
  597. ],
  598. )
  599. # py tests
  600. py_test(
  601. name = "lexicon_builder_test",
  602. size = "small",
  603. srcs = ["lexicon_builder_test.py"],
  604. deps = [
  605. ":graph_builder",
  606. ":sentence_py_pb2",
  607. ":task_spec_py_pb2",
  608. ],
  609. )
  610. py_test(
  611. name = "text_formats_test",
  612. size = "small",
  613. srcs = ["text_formats_test.py"],
  614. deps = [
  615. ":graph_builder",
  616. ":sentence_py_pb2",
  617. ":task_spec_py_pb2",
  618. ],
  619. )
  620. py_test(
  621. name = "reader_ops_test",
  622. size = "medium",
  623. srcs = ["reader_ops_test.py"],
  624. data = [":testdata"],
  625. tags = ["notsan"],
  626. deps = [
  627. ":dictionary_py_pb2",
  628. ":graph_builder",
  629. ":sparse_py_pb2",
  630. ],
  631. )
  632. py_test(
  633. name = "beam_reader_ops_test",
  634. size = "medium",
  635. srcs = ["beam_reader_ops_test.py"],
  636. data = [":testdata"],
  637. tags = ["notsan"],
  638. deps = [
  639. ":structured_graph_builder",
  640. ],
  641. )
  642. py_test(
  643. name = "graph_builder_test",
  644. size = "medium",
  645. srcs = ["graph_builder_test.py"],
  646. data = [
  647. ":testdata",
  648. ],
  649. tags = ["notsan"],
  650. deps = [
  651. ":graph_builder",
  652. ":sparse_py_pb2",
  653. ],
  654. )
  655. sh_test(
  656. name = "parser_trainer_test",
  657. size = "large",
  658. srcs = ["parser_trainer_test.sh"],
  659. data = [
  660. ":parser_eval",
  661. ":parser_trainer",
  662. ":testdata",
  663. ],
  664. tags = ["notsan"],
  665. )