BUILD 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. # Description:
  2. # A syntactic parser and part-of-speech tagger in TensorFlow.
  3. package(
  4. default_visibility = ["//visibility:private"],
  5. features = ["-layering_check"],
  6. )
  7. licenses(["notice"]) # Apache 2.0
  8. load(
  9. "syntaxnet",
  10. "tf_proto_library",
  11. "tf_proto_library_py",
  12. "tf_gen_op_libs",
  13. "tf_gen_op_wrapper_py",
  14. )
  15. # proto libraries
  16. tf_proto_library(
  17. name = "feature_extractor_proto",
  18. srcs = ["feature_extractor.proto"],
  19. )
  20. tf_proto_library(
  21. name = "sentence_proto",
  22. srcs = ["sentence.proto"],
  23. )
  24. tf_proto_library_py(
  25. name = "sentence_py_pb2",
  26. srcs = ["sentence.proto"],
  27. )
  28. tf_proto_library(
  29. name = "dictionary_proto",
  30. srcs = ["dictionary.proto"],
  31. )
  32. tf_proto_library_py(
  33. name = "dictionary_py_pb2",
  34. srcs = ["dictionary.proto"],
  35. )
  36. tf_proto_library(
  37. name = "kbest_syntax_proto",
  38. srcs = ["kbest_syntax.proto"],
  39. deps = [":sentence_proto"],
  40. )
  41. tf_proto_library(
  42. name = "task_spec_proto",
  43. srcs = ["task_spec.proto"],
  44. )
  45. tf_proto_library_py(
  46. name = "task_spec_py_pb2",
  47. srcs = ["task_spec.proto"],
  48. )
  49. tf_proto_library(
  50. name = "sparse_proto",
  51. srcs = ["sparse.proto"],
  52. )
  53. tf_proto_library_py(
  54. name = "sparse_py_pb2",
  55. srcs = ["sparse.proto"],
  56. )
  57. # cc libraries for feature extraction and parsing
  58. cc_library(
  59. name = "base",
  60. hdrs = ["base.h"],
  61. visibility = ["//visibility:public"],
  62. deps = [
  63. "@re2//:re2",
  64. "@tf//google/protobuf",
  65. "@tf//third_party/eigen3",
  66. ] + select({
  67. "//conditions:default": [
  68. "@tf//tensorflow/core:framework",
  69. "@tf//tensorflow/core:lib",
  70. ],
  71. "@tf//tensorflow:darwin": [
  72. "@tf//tensorflow/core:framework_headers_lib",
  73. ],
  74. }),
  75. )
  76. cc_library(
  77. name = "utils",
  78. srcs = ["utils.cc"],
  79. hdrs = [
  80. "utils.h",
  81. ],
  82. deps = [
  83. ":base",
  84. "//util/utf8:unicodetext",
  85. ],
  86. )
  87. cc_library(
  88. name = "test_main",
  89. testonly = 1,
  90. srcs = ["test_main.cc"],
  91. linkopts = ["-lm"],
  92. deps = [
  93. "@tf//tensorflow/core:lib",
  94. "@tf//tensorflow/core:testlib",
  95. "//external:gtest",
  96. ],
  97. )
  98. cc_library(
  99. name = "document_format",
  100. srcs = ["document_format.cc"],
  101. hdrs = ["document_format.h"],
  102. deps = [
  103. ":registry",
  104. ":sentence_proto",
  105. ":task_context",
  106. ],
  107. )
  108. cc_library(
  109. name = "text_formats",
  110. srcs = ["text_formats.cc"],
  111. deps = [
  112. ":document_format",
  113. ],
  114. alwayslink = 1,
  115. )
  116. cc_library(
  117. name = "fml_parser",
  118. srcs = ["fml_parser.cc"],
  119. hdrs = ["fml_parser.h"],
  120. deps = [
  121. ":feature_extractor_proto",
  122. ":utils",
  123. ],
  124. )
  125. cc_library(
  126. name = "proto_io",
  127. hdrs = ["proto_io.h"],
  128. deps = [
  129. ":feature_extractor_proto",
  130. ":fml_parser",
  131. ":kbest_syntax_proto",
  132. ":sentence_proto",
  133. ":task_context",
  134. ],
  135. )
  136. cc_library(
  137. name = "feature_extractor",
  138. srcs = ["feature_extractor.cc"],
  139. hdrs = [
  140. "feature_extractor.h",
  141. "feature_types.h",
  142. ],
  143. deps = [
  144. ":document_format",
  145. ":feature_extractor_proto",
  146. ":kbest_syntax_proto",
  147. ":proto_io",
  148. ":sentence_proto",
  149. ":task_context",
  150. ":utils",
  151. ":workspace",
  152. ],
  153. )
  154. cc_library(
  155. name = "affix",
  156. srcs = ["affix.cc"],
  157. hdrs = ["affix.h"],
  158. deps = [
  159. ":dictionary_proto",
  160. ":feature_extractor",
  161. ":shared_store",
  162. ":term_frequency_map",
  163. ":utils",
  164. ":workspace",
  165. ],
  166. )
  167. cc_library(
  168. name = "sentence_features",
  169. srcs = ["sentence_features.cc"],
  170. hdrs = ["sentence_features.h"],
  171. deps = [
  172. ":affix",
  173. ":feature_extractor",
  174. ":registry",
  175. ],
  176. )
  177. cc_library(
  178. name = "shared_store",
  179. srcs = ["shared_store.cc"],
  180. hdrs = ["shared_store.h"],
  181. deps = [
  182. ":utils",
  183. ],
  184. )
  185. cc_library(
  186. name = "registry",
  187. srcs = ["registry.cc"],
  188. hdrs = ["registry.h"],
  189. deps = [
  190. ":utils",
  191. ],
  192. )
  193. cc_library(
  194. name = "workspace",
  195. srcs = ["workspace.cc"],
  196. hdrs = ["workspace.h"],
  197. deps = [
  198. ":utils",
  199. ],
  200. )
  201. cc_library(
  202. name = "task_context",
  203. srcs = ["task_context.cc"],
  204. hdrs = ["task_context.h"],
  205. deps = [
  206. ":task_spec_proto",
  207. ":utils",
  208. ],
  209. )
  210. cc_library(
  211. name = "term_frequency_map",
  212. srcs = ["term_frequency_map.cc"],
  213. hdrs = ["term_frequency_map.h"],
  214. visibility = ["//visibility:public"],
  215. deps = [
  216. ":utils",
  217. ],
  218. alwayslink = 1,
  219. )
  220. cc_library(
  221. name = "parser_transitions",
  222. srcs = [
  223. "arc_standard_transitions.cc",
  224. "parser_state.cc",
  225. "parser_transitions.cc",
  226. "tagger_transitions.cc",
  227. ],
  228. hdrs = [
  229. "parser_state.h",
  230. "parser_transitions.h",
  231. ],
  232. deps = [
  233. ":kbest_syntax_proto",
  234. ":registry",
  235. ":shared_store",
  236. ":task_context",
  237. ":term_frequency_map",
  238. ],
  239. alwayslink = 1,
  240. )
  241. cc_library(
  242. name = "populate_test_inputs",
  243. testonly = 1,
  244. srcs = ["populate_test_inputs.cc"],
  245. hdrs = ["populate_test_inputs.h"],
  246. deps = [
  247. ":dictionary_proto",
  248. ":sentence_proto",
  249. ":task_context",
  250. ":term_frequency_map",
  251. ":test_main",
  252. ],
  253. )
  254. cc_library(
  255. name = "parser_features",
  256. srcs = ["parser_features.cc"],
  257. hdrs = ["parser_features.h"],
  258. deps = [
  259. ":affix",
  260. ":feature_extractor",
  261. ":parser_transitions",
  262. ":registry",
  263. ":sentence_features",
  264. ":sentence_proto",
  265. ":task_context",
  266. ":term_frequency_map",
  267. ":workspace",
  268. ],
  269. alwayslink = 1,
  270. )
  271. cc_library(
  272. name = "embedding_feature_extractor",
  273. srcs = ["embedding_feature_extractor.cc"],
  274. hdrs = ["embedding_feature_extractor.h"],
  275. deps = [
  276. ":feature_extractor",
  277. ":parser_features",
  278. ":parser_transitions",
  279. ":sparse_proto",
  280. ":task_context",
  281. ":workspace",
  282. ],
  283. )
  284. cc_library(
  285. name = "sentence_batch",
  286. srcs = ["sentence_batch.cc"],
  287. hdrs = ["sentence_batch.h"],
  288. deps = [
  289. ":embedding_feature_extractor",
  290. ":feature_extractor",
  291. ":parser_features",
  292. ":parser_transitions",
  293. ":sparse_proto",
  294. ":task_context",
  295. ":task_spec_proto",
  296. ":term_frequency_map",
  297. ":workspace",
  298. ],
  299. )
  300. cc_library(
  301. name = "reader_ops",
  302. srcs = [
  303. "beam_reader_ops.cc",
  304. "reader_ops.cc",
  305. ],
  306. deps = [
  307. ":parser_features",
  308. ":parser_transitions",
  309. ":sentence_batch",
  310. ":sentence_proto",
  311. ":task_context",
  312. ":task_spec_proto",
  313. ],
  314. alwayslink = 1,
  315. )
  316. cc_library(
  317. name = "document_filters",
  318. srcs = ["document_filters.cc"],
  319. deps = [
  320. ":document_format",
  321. ":parser_features",
  322. ":parser_transitions",
  323. ":sentence_batch",
  324. ":sentence_proto",
  325. ":task_context",
  326. ":task_spec_proto",
  327. ":text_formats",
  328. ],
  329. alwayslink = 1,
  330. )
  331. cc_library(
  332. name = "lexicon_builder",
  333. srcs = ["lexicon_builder.cc"],
  334. deps = [
  335. ":document_format",
  336. ":parser_features",
  337. ":parser_transitions",
  338. ":sentence_batch",
  339. ":sentence_proto",
  340. ":task_context",
  341. ":task_spec_proto",
  342. ":text_formats",
  343. ],
  344. alwayslink = 1,
  345. )
  346. cc_library(
  347. name = "unpack_sparse_features",
  348. srcs = ["unpack_sparse_features.cc"],
  349. deps = [
  350. ":sparse_proto",
  351. ":utils",
  352. ],
  353. )
  354. cc_library(
  355. name = "parser_ops_cc",
  356. srcs = ["ops/parser_ops.cc"],
  357. deps = [
  358. ":base",
  359. ":document_filters",
  360. ":lexicon_builder",
  361. ":reader_ops",
  362. ":unpack_sparse_features",
  363. ],
  364. alwayslink = 1,
  365. )
  366. cc_binary(
  367. name = "parser_ops.so",
  368. linkopts = select({
  369. "//conditions:default": ["-lm"],
  370. "@tf//tensorflow:darwin": [],
  371. }),
  372. linkshared = 1,
  373. linkstatic = 1,
  374. deps = [
  375. ":parser_ops_cc",
  376. ],
  377. )
  378. # cc tests
  379. filegroup(
  380. name = "testdata",
  381. srcs = [
  382. "testdata/context.pbtxt",
  383. "testdata/document",
  384. "testdata/mini-training-set",
  385. ],
  386. )
  387. cc_test(
  388. name = "shared_store_test",
  389. size = "small",
  390. srcs = ["shared_store_test.cc"],
  391. deps = [
  392. ":shared_store",
  393. ":test_main",
  394. ],
  395. )
  396. cc_test(
  397. name = "sentence_features_test",
  398. size = "medium",
  399. srcs = ["sentence_features_test.cc"],
  400. deps = [
  401. ":feature_extractor",
  402. ":populate_test_inputs",
  403. ":sentence_features",
  404. ":sentence_proto",
  405. ":task_context",
  406. ":task_spec_proto",
  407. ":term_frequency_map",
  408. ":test_main",
  409. ":workspace",
  410. ],
  411. )
  412. cc_test(
  413. name = "arc_standard_transitions_test",
  414. size = "small",
  415. srcs = ["arc_standard_transitions_test.cc"],
  416. data = [":testdata"],
  417. deps = [
  418. ":parser_transitions",
  419. ":populate_test_inputs",
  420. ":test_main",
  421. ],
  422. )
  423. cc_test(
  424. name = "tagger_transitions_test",
  425. size = "small",
  426. srcs = ["tagger_transitions_test.cc"],
  427. data = [":testdata"],
  428. deps = [
  429. ":parser_transitions",
  430. ":populate_test_inputs",
  431. ":test_main",
  432. ],
  433. )
  434. cc_test(
  435. name = "parser_features_test",
  436. size = "small",
  437. srcs = ["parser_features_test.cc"],
  438. deps = [
  439. ":feature_extractor",
  440. ":parser_features",
  441. ":parser_transitions",
  442. ":populate_test_inputs",
  443. ":sentence_proto",
  444. ":task_context",
  445. ":task_spec_proto",
  446. ":term_frequency_map",
  447. ":test_main",
  448. ":workspace",
  449. ],
  450. )
  451. # py graph builder and trainer
  452. tf_gen_op_libs(
  453. op_lib_names = ["parser_ops"],
  454. )
  455. tf_gen_op_wrapper_py(
  456. name = "parser_ops",
  457. deps = [":parser_ops_op_lib"],
  458. )
  459. py_library(
  460. name = "load_parser_ops_py",
  461. srcs = ["load_parser_ops.py"],
  462. data = [":parser_ops.so"],
  463. )
  464. py_library(
  465. name = "graph_builder",
  466. srcs = ["graph_builder.py"],
  467. deps = [
  468. "@tf//tensorflow:tensorflow_py",
  469. "@tf//tensorflow/core:protos_all_py",
  470. ":load_parser_ops_py",
  471. ":parser_ops",
  472. ],
  473. )
  474. py_library(
  475. name = "structured_graph_builder",
  476. srcs = ["structured_graph_builder.py"],
  477. deps = [
  478. ":graph_builder",
  479. ],
  480. )
  481. py_binary(
  482. name = "parser_trainer",
  483. srcs = ["parser_trainer.py"],
  484. deps = [
  485. ":graph_builder",
  486. ":structured_graph_builder",
  487. ":task_spec_py_pb2",
  488. ],
  489. )
  490. py_binary(
  491. name = "parser_eval",
  492. srcs = ["parser_eval.py"],
  493. deps = [
  494. ":graph_builder",
  495. ":sentence_py_pb2",
  496. ":structured_graph_builder",
  497. ],
  498. )
  499. py_binary(
  500. name = "conll2tree",
  501. srcs = ["conll2tree.py"],
  502. deps = [
  503. ":graph_builder",
  504. ":sentence_py_pb2",
  505. ],
  506. )
  507. # py tests
  508. py_test(
  509. name = "lexicon_builder_test",
  510. size = "small",
  511. srcs = ["lexicon_builder_test.py"],
  512. deps = [
  513. ":graph_builder",
  514. ":sentence_py_pb2",
  515. ":task_spec_py_pb2",
  516. ],
  517. )
  518. py_test(
  519. name = "text_formats_test",
  520. size = "small",
  521. srcs = ["text_formats_test.py"],
  522. deps = [
  523. ":graph_builder",
  524. ":sentence_py_pb2",
  525. ":task_spec_py_pb2",
  526. ],
  527. )
  528. py_test(
  529. name = "reader_ops_test",
  530. size = "medium",
  531. srcs = ["reader_ops_test.py"],
  532. data = [":testdata"],
  533. tags = ["notsan"],
  534. deps = [
  535. ":dictionary_py_pb2",
  536. ":graph_builder",
  537. ":sparse_py_pb2",
  538. ],
  539. )
  540. py_test(
  541. name = "beam_reader_ops_test",
  542. size = "medium",
  543. srcs = ["beam_reader_ops_test.py"],
  544. data = [":testdata"],
  545. tags = ["notsan"],
  546. deps = [
  547. ":structured_graph_builder",
  548. ],
  549. )
  550. py_test(
  551. name = "graph_builder_test",
  552. size = "medium",
  553. srcs = ["graph_builder_test.py"],
  554. data = [
  555. ":testdata",
  556. ],
  557. tags = ["notsan"],
  558. deps = [
  559. ":graph_builder",
  560. ":sparse_py_pb2",
  561. ],
  562. )
  563. sh_test(
  564. name = "parser_trainer_test",
  565. size = "medium",
  566. srcs = ["parser_trainer_test.sh"],
  567. data = [
  568. ":parser_eval",
  569. ":parser_trainer",
  570. ":testdata",
  571. ],
  572. tags = ["notsan"],
  573. )