123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638 |
- # Description:
- # A syntactic parser and part-of-speech tagger in TensorFlow.
- package(
- default_visibility = ["//visibility:private"],
- features = ["-layering_check"],
- )
- licenses(["notice"]) # Apache 2.0
- load(
- "syntaxnet",
- "tf_proto_library",
- "tf_proto_library_py",
- "tf_gen_op_libs",
- "tf_gen_op_wrapper_py",
- )
- # proto libraries
- tf_proto_library(
- name = "feature_extractor_proto",
- srcs = ["feature_extractor.proto"],
- )
- tf_proto_library(
- name = "sentence_proto",
- srcs = ["sentence.proto"],
- )
- tf_proto_library_py(
- name = "sentence_py_pb2",
- srcs = ["sentence.proto"],
- )
- tf_proto_library(
- name = "dictionary_proto",
- srcs = ["dictionary.proto"],
- )
- tf_proto_library_py(
- name = "dictionary_py_pb2",
- srcs = ["dictionary.proto"],
- )
- tf_proto_library(
- name = "kbest_syntax_proto",
- srcs = ["kbest_syntax.proto"],
- deps = [":sentence_proto"],
- )
- tf_proto_library(
- name = "task_spec_proto",
- srcs = ["task_spec.proto"],
- )
- tf_proto_library_py(
- name = "task_spec_py_pb2",
- srcs = ["task_spec.proto"],
- )
- tf_proto_library(
- name = "sparse_proto",
- srcs = ["sparse.proto"],
- )
- tf_proto_library_py(
- name = "sparse_py_pb2",
- srcs = ["sparse.proto"],
- )
- # cc libraries for feature extraction and parsing
- cc_library(
- name = "base",
- hdrs = ["base.h"],
- visibility = ["//visibility:public"],
- deps = [
- "@re2//:re2",
- "@tf//google/protobuf",
- "@tf//third_party/eigen3",
- ] + select({
- "//conditions:default": [
- "@tf//tensorflow/core:framework",
- "@tf//tensorflow/core:lib",
- ],
- "@tf//tensorflow:darwin": [
- "@tf//tensorflow/core:framework_headers_lib",
- ],
- }),
- )
- cc_library(
- name = "utils",
- srcs = ["utils.cc"],
- hdrs = [
- "utils.h",
- ],
- deps = [
- ":base",
- "//util/utf8:unicodetext",
- ],
- )
- cc_library(
- name = "test_main",
- testonly = 1,
- srcs = ["test_main.cc"],
- linkopts = ["-lm"],
- deps = [
- "@tf//tensorflow/core:lib",
- "@tf//tensorflow/core:testlib",
- "//external:gtest",
- ],
- )
- cc_library(
- name = "document_format",
- srcs = ["document_format.cc"],
- hdrs = ["document_format.h"],
- deps = [
- ":registry",
- ":sentence_proto",
- ":task_context",
- ],
- )
- cc_library(
- name = "text_formats",
- srcs = ["text_formats.cc"],
- deps = [
- ":document_format",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "fml_parser",
- srcs = ["fml_parser.cc"],
- hdrs = ["fml_parser.h"],
- deps = [
- ":feature_extractor_proto",
- ":utils",
- ],
- )
- cc_library(
- name = "proto_io",
- hdrs = ["proto_io.h"],
- deps = [
- ":feature_extractor_proto",
- ":fml_parser",
- ":kbest_syntax_proto",
- ":sentence_proto",
- ":task_context",
- ],
- )
- cc_library(
- name = "feature_extractor",
- srcs = ["feature_extractor.cc"],
- hdrs = [
- "feature_extractor.h",
- "feature_types.h",
- ],
- deps = [
- ":document_format",
- ":feature_extractor_proto",
- ":kbest_syntax_proto",
- ":proto_io",
- ":sentence_proto",
- ":task_context",
- ":utils",
- ":workspace",
- ],
- )
- cc_library(
- name = "affix",
- srcs = ["affix.cc"],
- hdrs = ["affix.h"],
- deps = [
- ":dictionary_proto",
- ":feature_extractor",
- ":shared_store",
- ":term_frequency_map",
- ":utils",
- ":workspace",
- ],
- )
- cc_library(
- name = "sentence_features",
- srcs = ["sentence_features.cc"],
- hdrs = ["sentence_features.h"],
- deps = [
- ":affix",
- ":feature_extractor",
- ":registry",
- ],
- )
- cc_library(
- name = "shared_store",
- srcs = ["shared_store.cc"],
- hdrs = ["shared_store.h"],
- deps = [
- ":utils",
- ],
- )
- cc_library(
- name = "registry",
- srcs = ["registry.cc"],
- hdrs = ["registry.h"],
- deps = [
- ":utils",
- ],
- )
- cc_library(
- name = "workspace",
- srcs = ["workspace.cc"],
- hdrs = ["workspace.h"],
- deps = [
- ":utils",
- ],
- )
- cc_library(
- name = "task_context",
- srcs = ["task_context.cc"],
- hdrs = ["task_context.h"],
- deps = [
- ":task_spec_proto",
- ":utils",
- ],
- )
- cc_library(
- name = "term_frequency_map",
- srcs = ["term_frequency_map.cc"],
- hdrs = ["term_frequency_map.h"],
- visibility = ["//visibility:public"],
- deps = [
- ":utils",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "parser_transitions",
- srcs = [
- "arc_standard_transitions.cc",
- "parser_state.cc",
- "parser_transitions.cc",
- "tagger_transitions.cc",
- ],
- hdrs = [
- "parser_state.h",
- "parser_transitions.h",
- ],
- deps = [
- ":kbest_syntax_proto",
- ":registry",
- ":shared_store",
- ":task_context",
- ":term_frequency_map",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "populate_test_inputs",
- testonly = 1,
- srcs = ["populate_test_inputs.cc"],
- hdrs = ["populate_test_inputs.h"],
- deps = [
- ":dictionary_proto",
- ":sentence_proto",
- ":task_context",
- ":term_frequency_map",
- ":test_main",
- ],
- )
- cc_library(
- name = "parser_features",
- srcs = ["parser_features.cc"],
- hdrs = ["parser_features.h"],
- deps = [
- ":affix",
- ":feature_extractor",
- ":parser_transitions",
- ":registry",
- ":sentence_features",
- ":sentence_proto",
- ":task_context",
- ":term_frequency_map",
- ":workspace",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "embedding_feature_extractor",
- srcs = ["embedding_feature_extractor.cc"],
- hdrs = ["embedding_feature_extractor.h"],
- deps = [
- ":feature_extractor",
- ":parser_features",
- ":parser_transitions",
- ":sparse_proto",
- ":task_context",
- ":workspace",
- ],
- )
- cc_library(
- name = "sentence_batch",
- srcs = ["sentence_batch.cc"],
- hdrs = ["sentence_batch.h"],
- deps = [
- ":embedding_feature_extractor",
- ":feature_extractor",
- ":parser_features",
- ":parser_transitions",
- ":sparse_proto",
- ":task_context",
- ":task_spec_proto",
- ":term_frequency_map",
- ":workspace",
- ],
- )
- cc_library(
- name = "reader_ops",
- srcs = [
- "beam_reader_ops.cc",
- "reader_ops.cc",
- ],
- deps = [
- ":parser_features",
- ":parser_transitions",
- ":sentence_batch",
- ":sentence_proto",
- ":task_context",
- ":task_spec_proto",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "document_filters",
- srcs = ["document_filters.cc"],
- deps = [
- ":document_format",
- ":parser_features",
- ":parser_transitions",
- ":sentence_batch",
- ":sentence_proto",
- ":task_context",
- ":task_spec_proto",
- ":text_formats",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "lexicon_builder",
- srcs = ["lexicon_builder.cc"],
- deps = [
- ":document_format",
- ":parser_features",
- ":parser_transitions",
- ":sentence_batch",
- ":sentence_proto",
- ":task_context",
- ":task_spec_proto",
- ":text_formats",
- ],
- alwayslink = 1,
- )
- cc_library(
- name = "unpack_sparse_features",
- srcs = ["unpack_sparse_features.cc"],
- deps = [
- ":sparse_proto",
- ":utils",
- ],
- )
- cc_library(
- name = "parser_ops_cc",
- srcs = ["ops/parser_ops.cc"],
- deps = [
- ":base",
- ":document_filters",
- ":lexicon_builder",
- ":reader_ops",
- ":unpack_sparse_features",
- ],
- alwayslink = 1,
- )
- cc_binary(
- name = "parser_ops.so",
- linkopts = select({
- "//conditions:default": ["-lm"],
- "@tf//tensorflow:darwin": [],
- }),
- linkshared = 1,
- linkstatic = 1,
- deps = [
- ":parser_ops_cc",
- ],
- )
- # cc tests
- filegroup(
- name = "testdata",
- srcs = [
- "testdata/context.pbtxt",
- "testdata/document",
- "testdata/mini-training-set",
- ],
- )
- cc_test(
- name = "shared_store_test",
- size = "small",
- srcs = ["shared_store_test.cc"],
- deps = [
- ":shared_store",
- ":test_main",
- ],
- )
- cc_test(
- name = "sentence_features_test",
- size = "medium",
- srcs = ["sentence_features_test.cc"],
- deps = [
- ":feature_extractor",
- ":populate_test_inputs",
- ":sentence_features",
- ":sentence_proto",
- ":task_context",
- ":task_spec_proto",
- ":term_frequency_map",
- ":test_main",
- ":workspace",
- ],
- )
- cc_test(
- name = "arc_standard_transitions_test",
- size = "small",
- srcs = ["arc_standard_transitions_test.cc"],
- data = [":testdata"],
- deps = [
- ":parser_transitions",
- ":populate_test_inputs",
- ":test_main",
- ],
- )
- cc_test(
- name = "tagger_transitions_test",
- size = "small",
- srcs = ["tagger_transitions_test.cc"],
- data = [":testdata"],
- deps = [
- ":parser_transitions",
- ":populate_test_inputs",
- ":test_main",
- ],
- )
- cc_test(
- name = "parser_features_test",
- size = "small",
- srcs = ["parser_features_test.cc"],
- deps = [
- ":feature_extractor",
- ":parser_features",
- ":parser_transitions",
- ":populate_test_inputs",
- ":sentence_proto",
- ":task_context",
- ":task_spec_proto",
- ":term_frequency_map",
- ":test_main",
- ":workspace",
- ],
- )
- # py graph builder and trainer
- tf_gen_op_libs(
- op_lib_names = ["parser_ops"],
- )
- tf_gen_op_wrapper_py(
- name = "parser_ops",
- deps = [":parser_ops_op_lib"],
- )
- py_library(
- name = "load_parser_ops_py",
- srcs = ["load_parser_ops.py"],
- data = [":parser_ops.so"],
- )
- py_library(
- name = "graph_builder",
- srcs = ["graph_builder.py"],
- deps = [
- "@tf//tensorflow:tensorflow_py",
- "@tf//tensorflow/core:protos_all_py",
- ":load_parser_ops_py",
- ":parser_ops",
- ],
- )
- py_library(
- name = "structured_graph_builder",
- srcs = ["structured_graph_builder.py"],
- deps = [
- ":graph_builder",
- ],
- )
- py_binary(
- name = "parser_trainer",
- srcs = ["parser_trainer.py"],
- deps = [
- ":graph_builder",
- ":structured_graph_builder",
- ":task_spec_py_pb2",
- ],
- )
- py_binary(
- name = "parser_eval",
- srcs = ["parser_eval.py"],
- deps = [
- ":graph_builder",
- ":sentence_py_pb2",
- ":structured_graph_builder",
- ],
- )
- py_binary(
- name = "conll2tree",
- srcs = ["conll2tree.py"],
- deps = [
- ":graph_builder",
- ":sentence_py_pb2",
- ],
- )
- # py tests
- py_test(
- name = "lexicon_builder_test",
- size = "small",
- srcs = ["lexicon_builder_test.py"],
- deps = [
- ":graph_builder",
- ":sentence_py_pb2",
- ":task_spec_py_pb2",
- ],
- )
- py_test(
- name = "text_formats_test",
- size = "small",
- srcs = ["text_formats_test.py"],
- deps = [
- ":graph_builder",
- ":sentence_py_pb2",
- ":task_spec_py_pb2",
- ],
- )
- py_test(
- name = "reader_ops_test",
- size = "medium",
- srcs = ["reader_ops_test.py"],
- data = [":testdata"],
- tags = ["notsan"],
- deps = [
- ":dictionary_py_pb2",
- ":graph_builder",
- ":sparse_py_pb2",
- ],
- )
- py_test(
- name = "beam_reader_ops_test",
- size = "medium",
- srcs = ["beam_reader_ops_test.py"],
- data = [":testdata"],
- tags = ["notsan"],
- deps = [
- ":structured_graph_builder",
- ],
- )
- py_test(
- name = "graph_builder_test",
- size = "medium",
- srcs = ["graph_builder_test.py"],
- data = [
- ":testdata",
- ],
- tags = ["notsan"],
- deps = [
- ":graph_builder",
- ":sparse_py_pb2",
- ],
- )
- sh_test(
- name = "parser_trainer_test",
- size = "medium",
- srcs = ["parser_trainer_test.sh"],
- data = [
- ":parser_eval",
- ":parser_trainer",
- ":testdata",
- ],
- tags = ["notsan"],
- )
|