spaCy/extra/example_data/ner_example_data/ner-token-per-line-conll2003.json

[
  {
    "id":0,
    "paragraphs":[
      {
        "sentences":[
          {
            "tokens":[
              {
                "orth":"When",
                "tag":"WRB",
                "ner":"O"
              },
              {
                "orth":"Sebastian",
                "tag":"NNP",
                "ner":"B-PERSON"
              },
              {
                "orth":"Thrun",
                "tag":"NNP",
                "ner":"L-PERSON"
              },
              {
                "orth":"started",
                "tag":"VBD",
                "ner":"O"
              },
              {
                "orth":"working",
                "tag":"VBG",
                "ner":"O"
              },
              {
                "orth":"on",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"self",
                "tag":"NN",
                "ner":"O"
              },
              {
                "orth":"-",
                "tag":"HYPH",
                "ner":"O"
              },
              {
                "orth":"driving",
                "tag":"VBG",
                "ner":"O"
              },
              {
                "orth":"cars",
                "tag":"NNS",
                "ner":"O"
              },
              {
                "orth":"at",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"Google",
                "tag":"NNP",
                "ner":"U-ORG"
              },
              {
                "orth":"in",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"2007",
                "tag":"CD",
                "ner":"U-DATE"
              },
              {
                "orth":",",
                "tag":",",
                "ner":"O"
              },
              {
                "orth":"few",
                "tag":"JJ",
                "ner":"O"
              },
              {
                "orth":"people",
                "tag":"NNS",
                "ner":"O"
              },
              {
                "orth":"outside",
                "tag":"RB",
                "ner":"O"
              },
              {
                "orth":"of",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"the",
                "tag":"DT",
                "ner":"O"
              },
              {
                "orth":"company",
                "tag":"NN",
                "ner":"O"
              },
              {
                "orth":"took",
                "tag":"VBD",
                "ner":"O"
              },
              {
                "orth":"him",
                "tag":"PRP",
                "ner":"O"
              },
              {
                "orth":"seriously",
                "tag":"RB",
                "ner":"O"
              },
              {
                "orth":".",
                "tag":".",
                "ner":"O"
              }
            ]
          },
          {
            "tokens":[
              {
                "orth":"\u201c",
                "tag":"''",
                "ner":"O"
              },
              {
                "orth":"I",
                "tag":"PRP",
                "ner":"O"
              },
              {
                "orth":"can",
                "tag":"MD",
                "ner":"O"
              },
              {
                "orth":"tell",
                "tag":"VB",
                "ner":"O"
              },
              {
                "orth":"you",
                "tag":"PRP",
                "ner":"O"
              },
              {
                "orth":"very",
                "tag":"RB",
                "ner":"O"
              },
              {
                "orth":"senior",
                "tag":"JJ",
                "ner":"O"
              },
              {
                "orth":"CEOs",
                "tag":"NNS",
                "ner":"O"
              },
              {
                "orth":"of",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"major",
                "tag":"JJ",
                "ner":"O"
              },
              {
                "orth":"American",
                "tag":"JJ",
                "ner":"U-NORP"
              },
              {
                "orth":"car",
                "tag":"NN",
                "ner":"O"
              },
              {
                "orth":"companies",
                "tag":"NNS",
                "ner":"O"
              },
              {
                "orth":"would",
                "tag":"MD",
                "ner":"O"
              },
              {
                "orth":"shake",
                "tag":"VB",
                "ner":"O"
              },
              {
                "orth":"my",
                "tag":"PRP$",
                "ner":"O"
              },
              {
                "orth":"hand",
                "tag":"NN",
                "ner":"O"
              },
              {
                "orth":"and",
                "tag":"CC",
                "ner":"O"
              },
              {
                "orth":"turn",
                "tag":"VB",
                "ner":"O"
              },
              {
                "orth":"away",
                "tag":"RB",
                "ner":"O"
              },
              {
                "orth":"because",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"I",
                "tag":"PRP",
                "ner":"O"
              },
              {
                "orth":"was",
                "tag":"VBD",
                "ner":"O"
              },
              {
                "orth":"n\u2019t",
                "tag":"RB",
                "ner":"O"
              },
              {
                "orth":"worth",
                "tag":"JJ",
                "ner":"O"
              },
              {
                "orth":"talking",
                "tag":"VBG",
                "ner":"O"
              },
              {
                "orth":"to",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":",",
                "tag":",",
                "ner":"O"
              },
              {
                "orth":"\u201d",
                "tag":"''",
                "ner":"O"
              },
              {
                "orth":"said",
                "tag":"VBD",
                "ner":"O"
              },
              {
                "orth":"Thrun",
                "tag":"NNP",
                "ner":"U-PERSON"
              },
              {
                "orth":",",
                "tag":",",
                "ner":"O"
              },
              {
                "orth":"in",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"an",
                "tag":"DT",
                "ner":"O"
              },
              {
                "orth":"interview",
                "tag":"NN",
                "ner":"O"
              },
              {
                "orth":"with",
                "tag":"IN",
                "ner":"O"
              },
              {
                "orth":"Recode",
                "tag":"NNP",
                "ner":"U-ORG"
              },
              {
                "orth":"earlier",
                "tag":"RBR",
                "ner":"B-DATE"
              },
              {
                "orth":"this",
                "tag":"DT",
                "ner":"I-DATE"
              },
              {
                "orth":"week",
                "tag":"NN",
                "ner":"L-DATE"
              },
              {
                "orth":".",
                "tag":".",
                "ner":"O"
              }
            ]
          }
        ]
      }
    ]
  }
]
Updates/bugfixes for NER/IOB converters (#4186) * Updates/bugfixes for NER/IOB converters * Converter formats `ner` and `iob` use autodetect to choose a converter if possible * `iob2json` is reverted to handle sentence-per-line data like `word1\|pos1\|ent1 word2\|pos2\|ent2` * Fix bug in `merge_sentences()` so the second sentence in each batch isn't skipped * `conll_ner2json` is made more general so it can handle more formats with whitespace-separated columns * Supports all formats where the first column is the token and the final column is the IOB tag; if present, the second column is the POS tag * As in CoNLL 2003 NER, blank lines separate sentences, `-DOCSTART- -X- O O` separates documents * Add option for segmenting sentences (new flag `-s`) * Parser-based sentence segmentation with a provided model, otherwise with sentencizer (new option `-b` to specify model) * Can group sentences into documents with `n_sents` as long as sentence segmentation is available * Only applies automatic segmentation when there are no existing delimiters in the data * Provide info about settings applied during conversion with warnings and suggestions if settings conflict or might not be not optimal. * Add tests for common formats * Add '(default)' back to docs for -c auto * Add document count back to output * Revert changes to converter output message * Use explicit tabs in convert CLI test data * Adjust/add messages for n_sents=1 default * Add sample NER data to training examples * Update README * Add links in docs to example NER data * Define msg within converters 2019-08-29 13:04:01 +03:00			`[`
			`{`
			`"id":0,`
			`"paragraphs":[`
			`{`
			`"sentences":[`
			`{`
			`"tokens":[`
			`{`
			`"orth":"When",`
			`"tag":"WRB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"Sebastian",`
			`"tag":"NNP",`
			`"ner":"B-PERSON"`
			`},`
			`{`
			`"orth":"Thrun",`
			`"tag":"NNP",`
			`"ner":"L-PERSON"`
			`},`
			`{`
			`"orth":"started",`
			`"tag":"VBD",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"working",`
			`"tag":"VBG",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"on",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"self",`
			`"tag":"NN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"-",`
			`"tag":"HYPH",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"driving",`
			`"tag":"VBG",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"cars",`
			`"tag":"NNS",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"at",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"Google",`
			`"tag":"NNP",`
			`"ner":"U-ORG"`
			`},`
			`{`
			`"orth":"in",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"2007",`
			`"tag":"CD",`
			`"ner":"U-DATE"`
			`},`
			`{`
			`"orth":",",`
			`"tag":",",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"few",`
			`"tag":"JJ",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"people",`
			`"tag":"NNS",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"outside",`
			`"tag":"RB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"of",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"the",`
			`"tag":"DT",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"company",`
			`"tag":"NN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"took",`
			`"tag":"VBD",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"him",`
			`"tag":"PRP",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"seriously",`
			`"tag":"RB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":".",`
			`"tag":".",`
			`"ner":"O"`
			`}`
			`]`
			`},`
			`{`
			`"tokens":[`
			`{`
			`"orth":"\u201c",`
			`"tag":"''",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"I",`
			`"tag":"PRP",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"can",`
			`"tag":"MD",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"tell",`
			`"tag":"VB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"you",`
			`"tag":"PRP",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"very",`
			`"tag":"RB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"senior",`
			`"tag":"JJ",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"CEOs",`
			`"tag":"NNS",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"of",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"major",`
			`"tag":"JJ",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"American",`
			`"tag":"JJ",`
			`"ner":"U-NORP"`
			`},`
			`{`
			`"orth":"car",`
			`"tag":"NN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"companies",`
			`"tag":"NNS",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"would",`
			`"tag":"MD",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"shake",`
			`"tag":"VB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"my",`
			`"tag":"PRP$",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"hand",`
			`"tag":"NN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"and",`
			`"tag":"CC",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"turn",`
			`"tag":"VB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"away",`
			`"tag":"RB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"because",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"I",`
			`"tag":"PRP",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"was",`
			`"tag":"VBD",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"n\u2019t",`
			`"tag":"RB",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"worth",`
			`"tag":"JJ",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"talking",`
			`"tag":"VBG",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"to",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":",",`
			`"tag":",",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"\u201d",`
			`"tag":"''",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"said",`
			`"tag":"VBD",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"Thrun",`
			`"tag":"NNP",`
			`"ner":"U-PERSON"`
			`},`
			`{`
			`"orth":",",`
			`"tag":",",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"in",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"an",`
			`"tag":"DT",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"interview",`
			`"tag":"NN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"with",`
			`"tag":"IN",`
			`"ner":"O"`
			`},`
			`{`
			`"orth":"Recode",`
			`"tag":"NNP",`
			`"ner":"U-ORG"`
			`},`
			`{`
			`"orth":"earlier",`
			`"tag":"RBR",`
			`"ner":"B-DATE"`
			`},`
			`{`
			`"orth":"this",`
			`"tag":"DT",`
			`"ner":"I-DATE"`
			`},`
			`{`
			`"orth":"week",`
			`"tag":"NN",`
			`"ner":"L-DATE"`
			`},`
			`{`
			`"orth":".",`
			`"tag":".",`
			`"ner":"O"`
			`}`
			`]`
			`}`
			`]`
			`}`
			`]`
			`}`
			`]`