flair.datasets.text_text#

class flair.datasets.text_text.ParallelTextCorpus(source_file, target_file, name, use_tokenizer=True, max_tokens_per_doc=-1, max_chars_per_doc=-1, in_memory=True, **corpusargs)View on GitHub#

Bases: Corpus

is_in_memory()View on GitHub#
Return type:

bool

class flair.datasets.text_text.OpusParallelCorpus(dataset, l1, l2, use_tokenizer=True, max_tokens_per_doc=-1, max_chars_per_doc=-1, in_memory=True, **corpusargs)View on GitHub#

Bases: ParallelTextCorpus

class flair.datasets.text_text.ParallelTextDataset(path_to_source, path_to_target, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True)View on GitHub#

Bases: FlairDataset

is_in_memory()View on GitHub#
Return type:

bool

class flair.datasets.text_text.DataPairCorpus(data_folder, columns=[0, 1, 2], train_file=None, test_file=None, dev_file=None, use_tokenizer=True, max_tokens_per_doc=-1, max_chars_per_doc=-1, in_memory=True, label_type=None, autofind_splits=True, sample_missing_splits=True, skip_first_line=False, separator='\\t', encoding='utf-8')View on GitHub#

Bases: Corpus

class flair.datasets.text_text.DataPairDataset(path_to_data, columns=[0, 1, 2], max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, label_type=None, skip_first_line=False, separator='\\t', encoding='utf-8', label=True)View on GitHub#

Bases: FlairDataset

is_in_memory()View on GitHub#
Return type:

bool

class flair.datasets.text_text.GLUE_RTE(label_type='entailment', base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#
class flair.datasets.text_text.GLUE_MNLI(label_type='entailment', evaluate_on_matched=True, base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#
class flair.datasets.text_text.GLUE_MRPC(label_type='paraphrase', base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#
class flair.datasets.text_text.GLUE_QNLI(label_type='entailment', base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#
class flair.datasets.text_text.GLUE_QQP(label_type='paraphrase', base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#
class flair.datasets.text_text.GLUE_WNLI(label_type='entailment', base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#
class flair.datasets.text_text.GLUE_STSB(label_type='similarity', base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

tsv_from_eval_dataset(folder_path)View on GitHub#

Create a tsv file of the predictions of the eval_dataset.

After calling classifier.predict(corpus.eval_dataset, label_name=’similarity’), this function can be used to produce a file called STS-B.tsv suitable for submission to the Glue Benchmark.

class flair.datasets.text_text.SUPERGLUE_RTE(base_path=None, max_tokens_per_doc=-1, max_chars_per_doc=-1, use_tokenizer=True, in_memory=True, sample_missing_splits=True)View on GitHub#

Bases: DataPairCorpus

jsonl_from_eval_dataset(folder_path)View on GitHub#
flair.datasets.text_text.rte_jsonl_to_tsv(file_path, label=True, remove=False, encoding='utf-8')View on GitHub#