BUILD 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. # Description:
  2. # A syntactic parser and part-of-speech tagger in TensorFlow.
  3. package(
  4. default_visibility = ["//visibility:private"],
  5. features = ["-layering_check"],
  6. )
  7. licenses(["notice"]) # Apache 2.0
  8. load(
  9. "syntaxnet",
  10. "tf_proto_library",
  11. "tf_proto_library_py",
  12. "tf_gen_op_libs",
  13. "tf_gen_op_wrapper_py",
  14. )
  15. # proto libraries
  16. tf_proto_library(
  17. name = "feature_extractor_proto",
  18. srcs = ["feature_extractor.proto"],
  19. )
  20. tf_proto_library(
  21. name = "sentence_proto",
  22. srcs = ["sentence.proto"],
  23. )
  24. tf_proto_library_py(
  25. name = "sentence_py_pb2",
  26. srcs = ["sentence.proto"],
  27. )
  28. tf_proto_library(
  29. name = "dictionary_proto",
  30. srcs = ["dictionary.proto"],
  31. )
  32. tf_proto_library_py(
  33. name = "dictionary_py_pb2",
  34. srcs = ["dictionary.proto"],
  35. )
  36. tf_proto_library(
  37. name = "kbest_syntax_proto",
  38. srcs = ["kbest_syntax.proto"],
  39. deps = [":sentence_proto"],
  40. )
  41. tf_proto_library(
  42. name = "task_spec_proto",
  43. srcs = ["task_spec.proto"],
  44. )
  45. tf_proto_library_py(
  46. name = "task_spec_py_pb2",
  47. srcs = ["task_spec.proto"],
  48. )
  49. tf_proto_library(
  50. name = "sparse_proto",
  51. srcs = ["sparse.proto"],
  52. )
  53. tf_proto_library_py(
  54. name = "sparse_py_pb2",
  55. srcs = ["sparse.proto"],
  56. )
  57. # cc libraries for feature extraction and parsing
  58. cc_library(
  59. name = "base",
  60. hdrs = ["base.h"],
  61. visibility = ["//visibility:public"],
  62. deps = [
  63. "@re2//:re2",
  64. "@tf//google/protobuf",
  65. "@tf//third_party/eigen3",
  66. ] + select({
  67. "//conditions:default": [
  68. "@tf//tensorflow/core:framework",
  69. "@tf//tensorflow/core:lib",
  70. ],
  71. "@tf//tensorflow:darwin": [
  72. "@tf//tensorflow/core:framework_headers_lib",
  73. ],
  74. }),
  75. )
  76. cc_library(
  77. name = "utils",
  78. srcs = ["utils.cc"],
  79. hdrs = [
  80. "utils.h",
  81. ],
  82. deps = [
  83. ":base",
  84. "//util/utf8:unicodetext",
  85. ],
  86. )
  87. cc_library(
  88. name = "test_main",
  89. testonly = 1,
  90. srcs = ["test_main.cc"],
  91. linkopts = ["-lm"],
  92. deps = [
  93. "@tf//tensorflow/core:lib",
  94. "@tf//tensorflow/core:testlib",
  95. "//external:gtest",
  96. ],
  97. )
  98. cc_library(
  99. name = "document_format",
  100. srcs = ["document_format.cc"],
  101. hdrs = ["document_format.h"],
  102. deps = [
  103. ":registry",
  104. ":sentence_proto",
  105. ":task_context",
  106. ],
  107. )
  108. cc_library(
  109. name = "text_formats",
  110. srcs = ["text_formats.cc"],
  111. deps = [
  112. ":document_format",
  113. ],
  114. alwayslink = 1,
  115. )
  116. cc_library(
  117. name = "fml_parser",
  118. srcs = ["fml_parser.cc"],
  119. hdrs = ["fml_parser.h"],
  120. deps = [
  121. ":feature_extractor_proto",
  122. ":utils",
  123. ],
  124. )
  125. cc_library(
  126. name = "proto_io",
  127. hdrs = ["proto_io.h"],
  128. deps = [
  129. ":feature_extractor_proto",
  130. ":fml_parser",
  131. ":kbest_syntax_proto",
  132. ":sentence_proto",
  133. ":task_context",
  134. ],
  135. )
  136. cc_library(
  137. name = "feature_extractor",
  138. srcs = ["feature_extractor.cc"],
  139. hdrs = [
  140. "feature_extractor.h",
  141. "feature_types.h",
  142. ],
  143. deps = [
  144. ":document_format",
  145. ":feature_extractor_proto",
  146. ":kbest_syntax_proto",
  147. ":proto_io",
  148. ":sentence_proto",
  149. ":task_context",
  150. ":utils",
  151. ":workspace",
  152. ],
  153. )
  154. cc_library(
  155. name = "affix",
  156. srcs = ["affix.cc"],
  157. hdrs = ["affix.h"],
  158. deps = [
  159. ":dictionary_proto",
  160. ":feature_extractor",
  161. ":shared_store",
  162. ":term_frequency_map",
  163. ":utils",
  164. ":workspace",
  165. ],
  166. )
  167. cc_library(
  168. name = "sentence_features",
  169. srcs = ["sentence_features.cc"],
  170. hdrs = ["sentence_features.h"],
  171. deps = [
  172. ":affix",
  173. ":feature_extractor",
  174. ":registry",
  175. ],
  176. )
  177. cc_library(
  178. name = "shared_store",
  179. srcs = ["shared_store.cc"],
  180. hdrs = ["shared_store.h"],
  181. deps = [
  182. ":utils",
  183. ],
  184. )
  185. cc_library(
  186. name = "registry",
  187. srcs = ["registry.cc"],
  188. hdrs = ["registry.h"],
  189. deps = [
  190. ":utils",
  191. ],
  192. )
  193. cc_library(
  194. name = "workspace",
  195. srcs = ["workspace.cc"],
  196. hdrs = ["workspace.h"],
  197. deps = [
  198. ":utils",
  199. ],
  200. )
  201. cc_library(
  202. name = "task_context",
  203. srcs = ["task_context.cc"],
  204. hdrs = ["task_context.h"],
  205. deps = [
  206. ":task_spec_proto",
  207. ":utils",
  208. ],
  209. )
  210. cc_library(
  211. name = "term_frequency_map",
  212. srcs = ["term_frequency_map.cc"],
  213. hdrs = ["term_frequency_map.h"],
  214. visibility = ["//visibility:public"],
  215. deps = [
  216. ":utils",
  217. ],
  218. alwayslink = 1,
  219. )
  220. cc_library(
  221. name = "parser_transitions",
  222. srcs = [
  223. "arc_standard_transitions.cc",
  224. "parser_state.cc",
  225. "parser_transitions.cc",
  226. "tagger_transitions.cc",
  227. ],
  228. hdrs = [
  229. "parser_state.h",
  230. "parser_transitions.h",
  231. ],
  232. deps = [
  233. ":kbest_syntax_proto",
  234. ":registry",
  235. ":shared_store",
  236. ":task_context",
  237. ":term_frequency_map",
  238. ],
  239. alwayslink = 1,
  240. )
  241. cc_library(
  242. name = "populate_test_inputs",
  243. testonly = 1,
  244. srcs = ["populate_test_inputs.cc"],
  245. hdrs = ["populate_test_inputs.h"],
  246. deps = [
  247. ":dictionary_proto",
  248. ":sentence_proto",
  249. ":task_context",
  250. ":term_frequency_map",
  251. ":test_main",
  252. ],
  253. )
  254. cc_library(
  255. name = "parser_features",
  256. srcs = ["parser_features.cc"],
  257. hdrs = ["parser_features.h"],
  258. deps = [
  259. ":affix",
  260. ":feature_extractor",
  261. ":parser_transitions",
  262. ":registry",
  263. ":sentence_features",
  264. ":sentence_proto",
  265. ":task_context",
  266. ":term_frequency_map",
  267. ":workspace",
  268. ],
  269. alwayslink = 1,
  270. )
  271. cc_library(
  272. name = "embedding_feature_extractor",
  273. srcs = ["embedding_feature_extractor.cc"],
  274. hdrs = ["embedding_feature_extractor.h"],
  275. deps = [
  276. ":feature_extractor",
  277. ":parser_features",
  278. ":parser_transitions",
  279. ":sparse_proto",
  280. ":task_context",
  281. ":workspace",
  282. ],
  283. )
  284. cc_library(
  285. name = "sentence_batch",
  286. srcs = ["sentence_batch.cc"],
  287. hdrs = ["sentence_batch.h"],
  288. deps = [
  289. ":embedding_feature_extractor",
  290. ":feature_extractor",
  291. ":parser_features",
  292. ":parser_transitions",
  293. ":sparse_proto",
  294. ":task_context",
  295. ":task_spec_proto",
  296. ":term_frequency_map",
  297. ":workspace",
  298. ],
  299. )
  300. cc_library(
  301. name = "reader_ops",
  302. srcs = [
  303. "beam_reader_ops.cc",
  304. "reader_ops.cc",
  305. ],
  306. deps = [
  307. ":parser_features",
  308. ":parser_transitions",
  309. ":sentence_batch",
  310. ":sentence_proto",
  311. ":task_context",
  312. ":task_spec_proto",
  313. ],
  314. alwayslink = 1,
  315. )
  316. cc_library(
  317. name = "document_filters",
  318. srcs = ["document_filters.cc"],
  319. deps = [
  320. ":document_format",
  321. ":parser_features",
  322. ":parser_transitions",
  323. ":sentence_batch",
  324. ":sentence_proto",
  325. ":task_context",
  326. ":task_spec_proto",
  327. ":text_formats",
  328. ],
  329. alwayslink = 1,
  330. )
  331. cc_library(
  332. name = "lexicon_builder",
  333. srcs = ["lexicon_builder.cc"],
  334. deps = [
  335. ":document_format",
  336. ":parser_features",
  337. ":parser_transitions",
  338. ":sentence_batch",
  339. ":sentence_proto",
  340. ":task_context",
  341. ":task_spec_proto",
  342. ":text_formats",
  343. ],
  344. alwayslink = 1,
  345. )
  346. cc_library(
  347. name = "unpack_sparse_features",
  348. srcs = ["unpack_sparse_features.cc"],
  349. deps = [
  350. ":sparse_proto",
  351. ":utils",
  352. ],
  353. alwayslink = 1,
  354. )
  355. cc_library(
  356. name = "parser_ops_cc",
  357. srcs = ["ops/parser_ops.cc"],
  358. deps = [
  359. ":base",
  360. ":document_filters",
  361. ":lexicon_builder",
  362. ":reader_ops",
  363. ":unpack_sparse_features",
  364. ],
  365. alwayslink = 1,
  366. )
  367. cc_binary(
  368. name = "parser_ops.so",
  369. linkopts = select({
  370. "//conditions:default": ["-lm"],
  371. "@tf//tensorflow:darwin": [],
  372. }),
  373. linkshared = 1,
  374. linkstatic = 1,
  375. deps = [
  376. ":parser_ops_cc",
  377. ],
  378. )
  379. # cc tests
  380. filegroup(
  381. name = "testdata",
  382. srcs = [
  383. "testdata/context.pbtxt",
  384. "testdata/document",
  385. "testdata/mini-training-set",
  386. ],
  387. )
  388. cc_test(
  389. name = "shared_store_test",
  390. size = "small",
  391. srcs = ["shared_store_test.cc"],
  392. deps = [
  393. ":shared_store",
  394. ":test_main",
  395. ],
  396. )
  397. cc_test(
  398. name = "sentence_features_test",
  399. size = "medium",
  400. srcs = ["sentence_features_test.cc"],
  401. deps = [
  402. ":feature_extractor",
  403. ":populate_test_inputs",
  404. ":sentence_features",
  405. ":sentence_proto",
  406. ":task_context",
  407. ":task_spec_proto",
  408. ":term_frequency_map",
  409. ":test_main",
  410. ":workspace",
  411. ],
  412. )
  413. cc_test(
  414. name = "arc_standard_transitions_test",
  415. size = "small",
  416. srcs = ["arc_standard_transitions_test.cc"],
  417. data = [":testdata"],
  418. deps = [
  419. ":parser_transitions",
  420. ":populate_test_inputs",
  421. ":test_main",
  422. ],
  423. )
  424. cc_test(
  425. name = "tagger_transitions_test",
  426. size = "small",
  427. srcs = ["tagger_transitions_test.cc"],
  428. data = [":testdata"],
  429. deps = [
  430. ":parser_transitions",
  431. ":populate_test_inputs",
  432. ":test_main",
  433. ],
  434. )
  435. cc_test(
  436. name = "parser_features_test",
  437. size = "small",
  438. srcs = ["parser_features_test.cc"],
  439. deps = [
  440. ":feature_extractor",
  441. ":parser_features",
  442. ":parser_transitions",
  443. ":populate_test_inputs",
  444. ":sentence_proto",
  445. ":task_context",
  446. ":task_spec_proto",
  447. ":term_frequency_map",
  448. ":test_main",
  449. ":workspace",
  450. ],
  451. )
  452. # py graph builder and trainer
  453. tf_gen_op_libs(
  454. op_lib_names = ["parser_ops"],
  455. )
  456. tf_gen_op_wrapper_py(
  457. name = "parser_ops",
  458. deps = [":parser_ops_op_lib"],
  459. )
  460. py_library(
  461. name = "load_parser_ops_py",
  462. srcs = ["load_parser_ops.py"],
  463. data = [":parser_ops.so"],
  464. )
  465. py_library(
  466. name = "graph_builder",
  467. srcs = ["graph_builder.py"],
  468. deps = [
  469. "@tf//tensorflow:tensorflow_py",
  470. "@tf//tensorflow/core:protos_all_py",
  471. ":load_parser_ops_py",
  472. ":parser_ops",
  473. ],
  474. )
  475. py_library(
  476. name = "structured_graph_builder",
  477. srcs = ["structured_graph_builder.py"],
  478. deps = [
  479. ":graph_builder",
  480. ],
  481. )
  482. py_binary(
  483. name = "parser_trainer",
  484. srcs = ["parser_trainer.py"],
  485. deps = [
  486. ":graph_builder",
  487. ":structured_graph_builder",
  488. ":task_spec_py_pb2",
  489. ],
  490. )
  491. py_binary(
  492. name = "parser_eval",
  493. srcs = ["parser_eval.py"],
  494. deps = [
  495. ":graph_builder",
  496. ":sentence_py_pb2",
  497. ":structured_graph_builder",
  498. ],
  499. )
  500. py_binary(
  501. name = "conll2tree",
  502. srcs = ["conll2tree.py"],
  503. deps = [
  504. ":graph_builder",
  505. ":sentence_py_pb2",
  506. ],
  507. )
  508. # py tests
  509. py_test(
  510. name = "lexicon_builder_test",
  511. size = "small",
  512. srcs = ["lexicon_builder_test.py"],
  513. deps = [
  514. ":graph_builder",
  515. ":sentence_py_pb2",
  516. ":task_spec_py_pb2",
  517. ],
  518. )
  519. py_test(
  520. name = "text_formats_test",
  521. size = "small",
  522. srcs = ["text_formats_test.py"],
  523. deps = [
  524. ":graph_builder",
  525. ":sentence_py_pb2",
  526. ":task_spec_py_pb2",
  527. ],
  528. )
  529. py_test(
  530. name = "reader_ops_test",
  531. size = "medium",
  532. srcs = ["reader_ops_test.py"],
  533. data = [":testdata"],
  534. tags = ["notsan"],
  535. deps = [
  536. ":dictionary_py_pb2",
  537. ":graph_builder",
  538. ":sparse_py_pb2",
  539. ],
  540. )
  541. py_test(
  542. name = "beam_reader_ops_test",
  543. size = "medium",
  544. srcs = ["beam_reader_ops_test.py"],
  545. data = [":testdata"],
  546. tags = ["notsan"],
  547. deps = [
  548. ":structured_graph_builder",
  549. ],
  550. )
  551. py_test(
  552. name = "graph_builder_test",
  553. size = "medium",
  554. srcs = ["graph_builder_test.py"],
  555. data = [
  556. ":testdata",
  557. ],
  558. tags = ["notsan"],
  559. deps = [
  560. ":graph_builder",
  561. ":sparse_py_pb2",
  562. ],
  563. )
  564. sh_test(
  565. name = "parser_trainer_test",
  566. size = "large",
  567. srcs = ["parser_trainer_test.sh"],
  568. data = [
  569. ":parser_eval",
  570. ":parser_trainer",
  571. ":testdata",
  572. ],
  573. tags = ["notsan"],
  574. )