spaCy

explosion/spaCy

Fork 0

mirror of https://github.com/explosion/spaCy.git synced 2025-09-19 18:42:37 +03:00

Commit Graph

Select branches

Hide Pull Requests

batching-doc

bugfix/fix-doc-copy

bugfix/fix-morph-memory-zone

bugfix/missing-tags

bugfix/windows-crash

chinese

cibuildwheel

compat/lemmatizer-factory-defaults

cython3

debug-nonproj

debug-trf

develop

docs-docker

docs-yaml-multiline

docs/llm_develop

docs/llm_main

example/keras-parikh-entailment

feat/add-pipe-instance

feat/prototype-el-with-custom-kb

feature/component-requirements

feature/convert-old-conll

feature/coref

feature/deep-learning-wrappers

feature/factory-context

feature/improve-displacy

feature/improve-pretrain

feature/linear-parser

feature/memory-zones

feature/morphology

feature/ngram-bow

feature/punct-augmenter

feature/radicli

feature/refactor-linker

feature/resume-training

feature/span-categorizer

feature/support-glove-vectors

fix-serialize

fix-windows-ci

fix-windows-crash

fix/enum-python-types

july16

kamikaze-cython3-upd

master

nightly.spacy.io

parser/early-update-beam

refactor/move-registrations

refactor/parser-gpu

revert-4530-iss4529

sense_unsupervised

spacy.io

tests/skip-multiproc-windows

tmp-ray

tmp/fix-tagger-begin-train

tmp/new-config

tmp/oracle-cuts

tmp/ragged-tok2vec

tmp/vocab

v1.x

v2.1.x

v2.2.x

v2.3.x

v2.spacy.io

v2.x

v3.0.x

v3.1.x

v3.2.x

v3.3.x

v3.4.x

v3.5.x

v3.6.x

v3.7.x

v3.8.x

v4

v5

website-universe-add-tmtk

#10002

#1001

#10019

#1002

#10026

#10037

#10038

#10043

#10045

#10048

#1005

#1005

#10051

#10052

#10067

#10068

#10069

#10070

#10071

#10072

#10073

#10075

#10079

#10090

#10091

#10092

#10093

#10098

#10099

#101

#10100

#10102

#10103

#10106

#10109

#10131

#1014

#10140

#10141

#10143

#10146

#10154

#10163

#10164

#10166

#10167

#10168

#10170

#10175

#10184

#10185

#10187

#10188

#10189

#10192

#10197

#10201

#10209

#10214

#10215

#10216

#1022

#10220

#10223

#10225

#10229

#10231

#10235

#10239

#1024

#1024

#10240

#10241

#10244

#10245

#10247

#1025

#10250

#10254

#10256

#10260

#10261

#10262

#10280

#10282

#10294

#10295

#10302

#10304

#10305

#10309

#10312

#10313

#10319

#10322

#10324

#10332

#10333

#10335

#10344

#10345

#10346

#10347

#10348

#1035

#10351

#10355

#10356

#10357

#10371

#10377

#10384

#10387

#10388

#10389

#1039

#10392

#10393

#10394

#10399

#104

#10400

#10413

#10431

#10432

#10446

#10451

#10452

#10457

#10460

#10463

#10464

#10468

#1047

#10471

#10476

#10479

#10484

#10485

#10486

#10487

#10488

#10489

#10490

#10493

#10499

#10502

#10509

#10516

#10518

#10519

#10521

#10522

#10524

#10534

#10536

#10537

#10538

#10539

#10540

#10546

#10547

#1055

#10550

#10551

#10556

#10560

#10563

#10563

#10572

#10573

#10576

#10577

#10580

#10581

#10582

#10592

#10593

#10596

#10598

#10600

#10614

#10616

#10617

#10620

#10622

#10628

#10629

#10633

#10639

#10642

#10645

#10650

#10651

#10653

#10655

#10659

#10665

#10668

#10669

#10673

#10676

#10677

#10681

#10684

#10687

#10688

#10689

#107

#1070

#10705

#10707

#10712

#10714

#10715

#10718

#10723

#10729

#10734

#1074

#10741

#10746

#10747

#10748

#10750

#10751

#1076

#10762

#10763

#1077

#10771

#10772

#10773

#10774

#10775

#10777

#10779

#10780

#10782

#10784

#10786

#10788

#10789

#1079

#10793

#10795

#10796

#10797

#10798

#1081

#10812

#10816

#1082

#10821

#10826

#10828

#10829

#10830

#10831

#10833

#10835

#10836

#10837

#10841

#10842

#10843

#10844

#10847

#10848

#10849

#10857

#10858

#10860

#10862

#10863

#10864

#10867

#10873

#10874

#10875

#10877

#10878

#10880

#10897

#10898

#10899

#10901

#10904

#10908

#10910

#10911

#10916

#10917

#10919

#10919

#10921

#10925

#10927

#1093

#10935

#10936

#10938

#10940

#10943

#10944

#10945

#10948

#10950

#10951

#10952

#10955

#10957

#1096

#10960

#10964

#10965

#10966

#10970

#10976

#10977

#10978

#10981

#10991

#10992

#10993

#10994

#10997

#110

#11002

#11004

#11005

#1101

#11011

#11012

#11013

#11014

#11015

#11016

#11017

#11018

#11021

#11022

#11024

#11026

#1103

#1103

#11034

#11035

#11042

#11043

#11052

#11054

#11055

#11056

#11057

#11058

#11062

#11063

#11064

#11067

#11068

#11069

#11070

#11073

#11074

#11076

#11077

#11078

#11081

#11082

#11087

#11088

#11089

#1109

#11090

#11092

#11095

#11096

#11097

#11099

#111

#11100

#11101

#11102

#11111

#11112

#11119

#11121

#11123

#11124

#11125

#11127

#11130

#1114

#1115

#11159

#11162

#11171

#11175

#1118

#11184

#11185

#11188

#1119

#11192

#11193

#11194

#1120

#11200

#11209

#11210

#11211

#11214

#11215

#11221

#11222

#11226

#11228

#1124

#11242

#1125

#11255

#11258

#1126

#11261

#11263

#11264

#11268

#1127

#11270

#11275

#11276

#11279

#1128

#11280

#11283

#11284

#11286

#11291

#11292

#11293

#11294

#11312

#11317

#11318

#11319

#11320

#11326

#11328

#11331

#11332

#11333

#11334

#11335

#11336

#11337

#11343

#11344

#11345

#11349

#11352

#11353

#11359

#11364

#11365

#11366

#11368

#11370

#11371

#11375

#11376

#11377

#11378

#11380

#11381

#11382

#11383

#11384

#11385

#11390

#11406

#11410

#11418

#11419

#1142

#11424

#11426

#11427

#11429

#11430

#11432

#11435

#11437

#11442

#11444

#11448

#11453

#11457

#11458

#11459

#1146

#11460

#11463

#11464

#11465

#11468

#1147

#1147

#11489

#11491

#11500

#11504

#11506

#11508

#11511

#11512

#11513

#1152

#1153

#11530

#11538

#11540

#11542

#11546

#11551

#11552

#11555

#11557

#11563

#11569

#1157

#11570

#11571

#11572

#11573

#11580

#11583

#11592

#11593

#1161

#11610

#11611

#11612

#11615

#11618

#11619

#11627

#11631

#11635

#11640

#11649

#11661

#11665

#11666

#11667

#11668

#11669

#1167

#11671

#11672

#11674

#11677

#11678

#11679

#1168

#11686

#11687

#11694

#11696

#11698

#117

#11700

#11701

#11702

#11703

#11705

#1171

#11711

#11713

#11714

#11717

#11720

#11722

#11732

#11735

#11736

#11737

#11738

#11739

#11741

#11744

#11745

#11746

#11747

#11749

#11750

#11751

#11753

#11762

#11763

#11764

#11765

#11766

#11768

#1177

#11774

#11777

#11778

#11779

#11780

#11781

#11792

#11793

#11801

#11802

#11803

#11806

#1181

#11810

#11811

#11819

#11820

#11825

#11826

#11828

#11834

#11837

#11845

#11846

#1185

#11855

#1186

#11860

#11864

#11867

#11869

#11871

#11879

#11884

#11885

#11886

#11887

#11888

#11889

#11892

#11899

#11900

#11901

#11902

#11903

#11908

#11910

#11913

#11914

#11916

#11918

#11919

#11921

#11924

#11927

#11928

#11929

#11930

#11932

#11933

#11934

#11935

#11937

#11940

#11943

#11945

#11947

#11948

#1195

#11955

#11956

#11958

#11959

#11963

#11964

#11965

#11966

#11967

#1197

#11971

#11972

#11973

#11974

#11977

#11978

#11979

#11980

#11981

#11982

#11994

#11995

#11996

#11997

#12005

#12006

#12008

#12009

#12010

#12013

#12015

#12016

#12017

#12019

#12020

#12020

#12027

#12029

#12035

#12036

#12038

#12039

#12040

#12045

#12047

#12049

#12050

#12051

#12057

#12058

#12059

#12073

#12077

#12078

#12086

#12088

#12089

#12092

#12094

#12095

#12096

#12098

#1210

#1210

#12100

#12101

#12102

#12107

#12108

#12113

#12116

#12119

#12120

#12121

#12122

#12125

#12126

#12127

#12128

#12129

#12131

#12132

#12136

#12137

#12145

#12148

#12149

#12150

#12151

#12153

#12155

#12157

#12158

#12159

#12160

#12161

#12162

#12163

#12164

#12165

#12166

#12169

#12171

#12172

#12173

#12175

#12176

#12177

#12178

#12179

#12181

#12182

#12183

#12184

#12185

#12186

#12187

#12188

#12189

#1219

#1219

#12190

#12192

#12193

#12194

#12196

#1220

#1220

#12201

#12202

#12208

#12209

#12210

#12213

#12214

#12215

#12218

#12219

#12220

#12227

#12234

#12242

#12243

#12244

#12245

#12250

#12251

#12252

#12257

#12258

#12262

#12268

#12272

#12283

#12285

#12287

#12288

#12290

#12292

#12293

#12303

#12314

#12315

#12318

#12320

#12328

#12334

#12336

#12341

#12343

#12344

#12345

#12347

#12348

#12349

#12350

#12351

#12352

#12356

#12365

#12366

#12367

#12368

#12369

#12371

#12372

#12375

#12377

#12380

#12384

#12385

#12393

#12394

#12395

#12398

#12403

#12405

#12406

#12408

#12409

#12418

#12419

#12422

#12427

#12429

#12435

#12436

#12437

#12438

#12439

#1244

#12440

#12442

#1245

#12450

#12452

#12456

#12459

#1246

#12464

#12465

#12465

#12466

#12469

#12470

#12471

#12472

#12473

#12477

#12484

#12486

#12490

#12491

#12492

#12493

#12494

#12495

#12498

#12499

#12506

#12507

#12508

#12511

#1252

#12528

#12531

#12538

#12540

#12542

#12543

#12545

#12553

#12554

#12554

#12557

#12559

#12560

#12567

#12568

#12569

#12572

#12575

#12577

#12582

#12586

#12592

#12595

#12597

#126

#12601

#12602

#12604

#12607

#12608

#12609

#12615

#12616

#12617

#1262

#12620

#12621

#12623

#12624

#12625

#12626

#12628

#12629

#12630

#12631

#12633

#12636

#12642

#12643

#1265

#1265

#12652

#12658

#12660

#12661

#12671

#12674

#12676

#12677

#12679

#12684

#12689

#12692

#12693

#12694

#127

#127

#12700

#12701

#12703

#12708

#12709

#12710

#12710

#12714

#12715

#12717

#12718

#1272

#12721

#12727

#12730

#12733

#12735

#12737

#12742

#12743

#12744

#12748

#12749

#12752

#12753

#12754

#12757

#1276

#12767

#12769

#12770

#12781

#12782

#12783

#12784

#12785

#1279

#12791

#12792

#12793

#12794

#12795

#12797

#12798

#12799

#12800

#12801

#12803

#12805

#12817

#12819

#12820

#12820

#12823

#12826

#12827

#12828

#12829

#12832

#12835

#12836

#12839

#12840

#12842

#12844

#12853

#12857

#12860

#1287

#12870

#12875

#12877

#12879

#1288

#12881

#12882

#12883

#12888

#12889

#12890

#12892

#12893

#12894

#12895

#12896

#12897

#12899

#129

#12901

#12902

#12903

#12904

#12905

#12908

#12909

#12918

#12928

#12939

#1294

#12944

#12945

#12947

#12948

#12949

#12950

#12952

#12955

#12961

#12962

#12963

#12965

#12966

#12967

#12968

#12969

#12979

#1298

#12980

#12981

#12988

#12993

#12994

#12995

#12999

#13003

#13009

#13011

#13013

#13014

#13017

#13018

#13025

#13027

#13028

#13029

#13034

#13035

#13037

#13040

#13042

#13043

#13044

#13045

#13046

#13050

#13051

#13053

#13058

#13063

#13066

#13068

#13071

#13078

#13081

#13082

#13086

#13093

#13095

#13103

#13104

#13106

#13107

#13108

#13109

#13110

#13116

#13119

#13133

#13149

#13164

#13167

#13169

#13173

#13174

#13174

#13180

#13181

#13183

#13187

#13191

#13197

#13201

#13203

#13214

#13215

#1323

#13240

#13247

#13249

#13250

#13251

#13253

#13254

#13255

#13259

#13269

#13270

#13271

#13272

#13273

#13282

#13284

#13286

#13287

#13288

#13288

#13289

#13290

#13291

#13292

#13299

#133

#13301

#13302

#13304

#13313

#13317

#13318

#13318

#13321

#13327

#1333

#13337

#13337

#13366

#13378

#13378

#13379

#13390

#13391

#13395

#1340

#13400

#13409

#13413

#13414

#13415

#13416

#13417

#13418

#1342

#13424

#13425

#13429

#1343

#13431

#13432

#13433

#13436

#13439

#13440

#13441

#13448

#13452

#13458

#13463

#13463

#13465

#13465

#13466

#13469

#13470

#13471

#13472

#13490

#13493

#135

#1350

#13502

#1351

#13510

#13514

#13515

#13516

#1352

#1355

#13561

#13570

#13587

#136

#13603

#13613

#13613

#13618

#13618

#13619

#1362

#13621

#13627

#13631

#13632

#1364

#13641

#13643

#13644

#13646

#1365

#13654

#13655

#13656

#13657

#13660

#13660

#13665

#1367

#13694

#13694

#137

#13702

#13713

#13716

#13721

#13721

#13727

#13737

#13737

#13740

#13743

#13749

#13756

#13756

#13759

#13760

#13762

#13762

#13764

#13765

#13766

#13766

#13768

#13781

#13784

#13787

#13787

#13788

#138

#13800

#13807

#13811

#13813

#13816

#13821

#13821

#13822

#13822

#13823

#13825

#13825

#13827

#13827

#13832

#13832

#13836

#13836

#13840

#13840

#13842

#13842

#13844

#13844

#13848

#13848

#1385

#13851

#13854

#13854

#13859

#13859

#1386

#13865

#13865

#13869

#13869

#13870

#13870

#1389

#1390

#1391

#1392

#1393

#1396

#1397

#140

#140

#1400

#1401

#1402

#1407

#1408

#1411

#1413

#1414

#1415

#1416

#1418

#1420

#1422

#1422

#1423

#1424

#1425

#1428

#1429

#143

#1433

#1433

#1435

#1437

#1438

#1440

#1441

#1442

#1443

#1447

#1448

#1449

#145

#1454

#1461

#1462

#1464

#1466

#1467

#1468

#147

#1473

#1475

#1479

#1479

#148

#1481

#1486

#149

#1492

#1493

#1497

#150

#1501

#1501

#1502

#1509

#151

#1511

#1524

#1526

#153

#1542

#1552

#1563

#1564

#1565

#1566

#1569

#1570

#1576

#1583

#159

#159

#1594

#1602

#1604

#1607

#1607

#1608

#161

#1611

#1614

#1619

#1619

#1620

#1621

#1624

#1631

#1637

#1644

#1645

#1646

#1647

#1650

#1653

#1664

#1665

#1667

#1668

#167

#167

#1672

#1674

#168

#168

#1683

#1687

#1688

#1689

#1705

#1708

#1710

#1715

#1719

#1720

#1724

#173

#1731

#1732

#1735

#1736

#1736

#1742

#1747

#1749

#1751

#1775

#1788

#1789

#1790

#1792

#1793

#1795

#180

#1801

#1808

#181

#1828

#1829

#1830

#1833

#1833

#1835

#1835

#1836

#1841

#1842

#1845

#185

#1857

#1857

#186

#1870

#1876

#1879

#1882

#189

#1891

#1893

#1896

#190

#1905

#1913

#1914

#1916

#1930

#1932

#1932

#1935

#1940

#1948

#195

#1956

#1957

#1961

#1968

#1979

#198

#1982

#1987

#1988

#1999

#2

#200

#2000

#2001

#2002

#2012

#2019

#202

#2025

#2026

#2026

#2036

#2037

#2037

#2040

#2065

#207

#2080

#209

#2095

#21

#210

#2102

#2104

#2108

#2109

#2109

#2110

#2116

#2123

#2126

#2127

#2128

#2128

#2131

#2135

#2137

#2138

#2139

#2140

#2141

#2142

#2146

#215

#2152

#2155

#2158

#2159

#2163

#2167

#2168

#2168

#2170

#2172

#2184

#2195

#2197

#2207

#2207

#2212

#2214

#2214

#222

#2221

#2221

#223

#2230

#2233

#2236

#224

#2241

#2247

#2252

#2255

#2258

#2258

#2272

#2274

#2275

#2282

#2286

#2289

#2295

#2297

#2301

#2301

#2302

#2305

#2307

#2308

#2310

#2314

#2315

#2316

#2319

#2324

#234

#2340

#2344

#2344

#2346

#2348

#2351

#2354

#2357

#2359

#2372

#2374

#2378

#2392

#240

#240

#2415

#2416

#2418

#2422

#2423

#2425

#2426

#2426

#2429

#2430

#2433

#2436

#2444

#2454

#2459

#2478

#248

#249

#2495

#2497

#2497

#2500

#2500

#2506

#2513

#2521

#2525

#2526

#2529

#2535

#2537

#2543

#2547

#2549

#255

#2558

#2560

#2565

#2568

#2572

#2579

#2584

#2587

#2590

#2591

#2594

#2596

#2597

#2598

#2598

#2599

#2604

#2613

#2615

#2616

#2618

#2620

#2620

#2621

#2623

#2629

#263

#2634

#2639

#265

#2658

#2664

#2680

#2680

#2681

#2687

#2687

#2688

#2696

#2709

#2730

#2731

#2732

#2737

#2738

#275

#2750

#2751

#2752

#2759

#2763

#278

#2786

#2788

#2790

#2795

#2797

#2799

#280

#2802

#2803

#2805

#2806

#2807

#2808

#2814

#2818

#282

#2830

#2836

#284

#2842

#2845

#2846

#2856

#286

#2865

#2866

#2867

#287

#2876

#2880

#2881

#2885

#2886

#2899

#2905

#2906

#2909

#2919

#2922

#2925

#2927

#2931

#2932

#2940

#2943

#2949

#296

#2964

#2965

#2966

#2967

#2968

#2971

#2972

#2974

#2977

#2979

#2980

#2981

#2983

#2985

#2988

#2992

#2998

#2999

#3

#3003

#3004

#3006

#3007

#3017

#3021

#3022

#3023

#3024

#3029

#3030

#3031

#3032

#3035

#3038

#3044

#3045

#3046

#305

#306

#3062

#3065

#307

#3073

#3074

#3075

#3076

#3078

#3079

#3080

#3083

#3084

#3085

#3089

#3096

#3099

#3100

#3101

#3103

#3104

#3105

#3106

#3107

#3109

#3110

#3118

#3119

#312

#3124

#3126

#3131

#3139

#3144

#3153

#3154

#3155

#3156

#3158

#3159

#3164

#3167

#3173

#3174

#3179

#3180

#3185

#3186

#3194

#3198

#3200

#3204

#3206

#3210

#3213

#3217

#3218

#3220

#3223

#3225

#3227

#3228

#3235

#3236

#3241

#3243

#3244

#3246

#3252

#3253

#3255

#3264

#3266

#3267

#3273

#3276

#3280

#3281

#3282

#3283

#3284

#3285

#3286

#329

#3293

#3294

#33

#33

#3301

#3308

#3318

#3324

#3325

#3329

#3332

#3340

#3341

#3362

#3364

#3369

#3370

#3371

#3374

#3378

#3379

#3381

#3383

#3385

#3386

#3387

#3388

#3389

#3390

#3391

#3392

#3393

#34

#3405

#3408

#3409

#3413

#3414

#3415

#3416

#3417

#3419

#3427

#3434

#3440

#3441

#3455

#3459

#346

#3460

#3462

#3465

#3470

#3471

#3472

#3480

#3491

#3492

#3495

#3498

#3499

#3500

#3510

#3511

#3512

#3513

#3514

#3515

#3519

#3529

#3530

#3532

#3535

#3538

#3539

#3542

#3543

#3545

#3548

#3550

#3557

#3559

#3565

#357

#3570

#3573

#3575

#3576

#3577

#358

#3583

#3586

#3589

#359

#3591

#3599

#3601

#3610

#3612

#3613

#3614

#3627

#363

#363

#3630

#3641

#3643

#3646

#3649

#3651

#3652

#3653

#3655

#3656

#3657

#3658

#3660

#3661

#3662

#3685

#3686

#3688

#3689

#3693

#3704

#3705

#3711

#3719

#3721

#3729

#3741

#3755

#3757

#3767

#3778

#3784

#3786

#3790

#3797

#3800

#3806

#3807

#3808

#3809

#3810

#3814

#3815

#3819

#3835

#3836

#3838

#384

#3843

#385

#3855

#386

#3860

#3864

#3865

#3873

#3894

#3895

#3899

#3900

#3901

#3902

#3911

#3914

#3915

#3916

#3918

#3919

#3925

#3927

#3928

#3929

#3939

#3941

#3945

#3946

#3948

#3949

#3950

#3953

#3957

#3964

#3969

#3980

#3982

#3992

#3999

#4

#4

#4001

#4003

#4012

#4013

#4014

#4021

#4022

#4023

#4031

#4033

#4035

#4049

#4050

#4052

#4057

#4062

#4064

#4065

#4066

#4067

#4072

#4075

#4078

#4079

#4080

#4081

#4082

#4084

#4089

#4090

#4091

#4092

#4097

#4101

#4103

#4105

#4110

#4113

#4114

#4117

#4123

#4129

#4130

#4132

#4139

#4140

#4141

#4143

#4144

#4147

#4150

#4151

#4153

#4159

#4162

#4163

#4166

#4167

#4169

#4171

#4174

#4178

#4179

#4186

#4187

#4188

#4189

#4191

#4192

#4202

#4205

#4207

#4208

#4210

#4217

#4219

#422

#4222

#4226

#4232

#4235

#4237

#4239

#4245

#4246

#4247

#4248

#4249

#4251

#4252

#4257

#4258

#4259

#4263

#4265

#4266

#4268

#4275

#4276

#4279

#4282

#4284

#4285

#4288

#4289

#4290

#4291

#4292

#4294

#4300

#4303

#4305

#4307

#4309

#4314

#4315

#4316

#4317

#4320

#4321

#4322

#4325

#4326

#4327

#4330

#4331

#4334

#4335

#4336

#4339

#4343

#4344

#4345

#4346

#4351

#4353

#4359

#4360

#4361

#4364

#4368

#4371

#4374

#4375

#4378

#4381

#4383

#4387

#4388

#4392

#4393

#4394

#4395

#4399

#4403

#4412

#4414

#4415

#4420

#4421

#4422

#4423

#4424

#4425

#4429

#4437

#4444

#4446

#4447

#4449

#4454

#4455

#4456

#4458

#4459

#4464

#4466

#4467

#4468

#447

#4471

#4472

#4479

#4483

#4484

#4486

#4487

#4488

#4489

#4492

#4498

#4499

#4501

#4507

#4509

#4510

#4513

#4516

#4517

#4518

#4520

#4521

#4522

#4524

#4526

#4527

#4530

#4533

#4534

#4535

#4536

#4537

#4539

#454

#4540

#4541

#4542

#4543

#4549

#4551

#4553

#4555

#4557

#4560

#4561

#4563

#4564

#4579

#4580

#4582

#4583

#4584

#4587

#4596

#4599

#4600

#4601

#4606

#4608

#4612

#4614

#4618

#4619

#4621

#4624

#4632

#4636

#4639

#4640

#4641

#4648

#465

#4656

#4657

#4660

#4662

#4664

#4666

#4670

#4671

#4672

#4675

#4676

#4679

#4680

#4685

#4686

#4690

#4691

#4696

#4697

#4702

#4708

#4710

#4711

#4713

#4715

#4716

#4721

#4722

#4726

#4728

#4729

#4734

#4738

#4749

#4750

#4761

#4762

#4763

#4767

#4774

#4778

#4779

#4783

#4789

#4793

#4794

#4796

#4799

#4801

#4804

#4811

#4826

#4827

#4828

#4831

#4834

#4835

#4836

#4839

#4840

#4842

#4843

#4844

#4846

#4851

#4852

#4853

#4857

#4875

#4877

#4878

#4881

#4882

#4883

#4884

#4889

#4890

#4891

#4892

#4900

#4904

#4909

#4911

#4914

#4920

#4925

#4927

#4931

#4932

#4933

#4938

#4940

#4941

#4942

#4943

#4947

#4950

#4952

#4953

#4957

#4960

#4967

#4968

#4969

#4976

#4977

#4985

#4988

#4989

#4990

#4995

#4997

#4998

#5

#5000

#5001

#5004

#5005

#5006

#5007

#5008

#5009

#501

#5011

#5017

#5019

#5021

#5022

#5024

#5026

#5027

#5028

#5030

#5032

#5036

#5038

#5039

#5040

#5041

#5049

#5058

#5060

#5061

#5062

#5063

#5064

#5065

#5067

#5070

#5071

#5077

#5078

#5079

#5080

#5081

#5085

#5091

#5093

#5097

#5099

#5100

#5101

#5102

#5105

#5106

#5107

#5108

#5109

#5110

#5113

#5114

#5116

#5121

#5125

#5127

#5130

#5143

#5146

#5147

#5148

#5150

#5153

#5155

#5157

#5159

#5161

#5162

#5166

#5167

#5168

#5174

#5177

#5185

#5186

#5187

#5190

#5191

#5194

#5196

#5197

#5198

#5199

#5201

#5202

#5203

#5204

#5205

#5206

#5207

#5209

#5210

#5211

#5213

#5216

#5218

#5223

#5227

#5228

#5234

#5236

#5238

#5244

#5246

#5250

#5251

#5252

#5253

#5257

#5258

#5259

#526

#5263

#5264

#5265

#5266

#5267

#5271

#5278

#528

#5282

#5284

#5289

#529

#529

#5293

#5303

#5308

#5312

#5315

#5316

#5317

#5319

#5325

#5328

#5330

#5331

#5334

#5335

#5342

#5344

#5346

#5348

#5352

#5355

#5357

#5358

#5359

#5360

#5361

#5362

#5365

#5367

#5370

#5371

#5374

#5375

#5376

#5377

#5378

#5379

#538

#5381

#5386

#5387

#5395

#5396

#540

#5401

#5404

#5413

#5414

#5418

#5419

#5425

#5428

#5429

#5430

#5432

#5436

#5437

#5439

#5441

#5446

#5449

#545

#5452

#5456

#5457

#5460

#5461

#5462

#5463

#5464

#5465

#5466

#5467

#5468

#5470

#5473

#5474

#5475

#5476

#5477

#5478

#5479

#5480

#5481

#5482

#5485

#5486

#5488

#5489

#5490

#5491

#5492

#5495

#5496

#5497

#5498

#5499

#55

#5502

#5503

#5506

#5508

#551

#5512

#5514

#5516

#5517

#5518

#5521

#5525

#5526

#5527

#5531

#5533

#5534

#5535

#5536

#5542

#5543

#5544

#5545

#5546

#5547

#5548

#5553

#5554

#5555

#5558

#5560

#5561

#5562

#5563

#5564

#5566

#5567

#5569

#5570

#5572

#5573

#5574

#5575

#5580

#5582

#5583

#5588

#5589

#5592

#5593

#5594

#5595

#5599

#5606

#5608

#561

#5611

#5612

#5613

#5614

#5615

#5616

#5617

#5618

#5619

#5622

#5624

#5626

#5627

#5631

#5632

#5634

#5635

#5636

#5640

#5641

#5652

#5653

#5656

#5661

#5663

#5664

#5665

#5666

#5667

#5669

#567

#567

#5674

#5675

#5676

#5679

#5680

#5681

#5684

#5685

#5686

#5687

#5688

#569

#5690

#5691

#5693

#5694

#5696

#5697

#5699

#570

#5700

#5701

#5703

#5704

#5705

#5706

#5707

#5710

#5711

#5714

#5715

#5716

#5717

#5718

#5719

#572

#5720

#5722

#5723

#5724

#5726

#5730

#5731

#5732

#5733

#5734

#5735

#5736

#5741

#5747

#5748

#5749

#5751

#5752

#5755

#5756

#5757

#5759

#5761

#5762

#5764

#5766

#5767

#5768

#5770

#5771

#5772

#5773

#5774

#5776

#5777

#5781

#5784

#5787

#5788

#5791

#5792

#5793

#5794

#5796

#5798

#5801

#5803

#5804

#5806

#5807

#5808

#5809

#5810

#5812

#5813

#5814

#5819

#582

#5820

#5821

#5822

#5823

#5824

#5826

#5828

#5829

#5830

#5831

#5833

#5834

#5835

#5836

#5837

#5839

#584

#5841

#5842

#5843

#5844

#5845

#5846

#5847

#5848

#5849

#585

#5851

#5854

#5855

#5857

#5858

#5860

#5861

#5865

#5866

#5867

#5868

#5871

#5872

#5873

#5875

#5876

#5878

#5879

#5880

#5881

#5882

#5883

#5885

#5889

#5890

#5891

#5892

#5893

#5894

#5895

#5902

#5904

#5906

#5907

#5908

#5909

#5910

#5912

#5914

#5916

#5920

#5925

#5928

#5930

#5931

#5933

#5936

#5937

#5938

#5939

#5940

#5941

#5942

#5943

#5946

#5948

#5949

#5951

#5952

#5953

#5954

#5955

#5956

#5959

#5960

#5962

#5963

#5965

#5966

#5968

#5969

#5970

#5971

#5972

#5973

#5974

#5975

#5977

#5978

#5979

#5980

#5984

#5985

#5986

#5987

#5989

#5990

#5991

#5992

#5993

#5994

#5995

#5996

#5998

#6

#6000

#6001

#6002

#6003

#6004

#6005

#6007

#6008

#601

#6010

#6011

#6012

#6013

#6016

#6017

#6018

#6020

#6023

#6024

#6028

#6029

#6034

#6035

#6036

#6037

#6039

#604

#6040

#6041

#6042

#6043

#6044

#6045

#6046

#6047

#6048

#6049

#6051

#6052

#6053

#6055

#6057

#6058

#6062

#6063

#6064

#6067

#6068

#6069

#6072

#6074

#6076

#6078

#6083

#6084

#6085

#6086

#6087

#6089

#6090

#6091

#6092

#6094

#6096

#6098

#6099

#6100

#6102

#6103

#6104

#6105

#6106

#6107

#6109

#6110

#6111

#6112

#6113

#6114

#6116

#6118

#6123

#6124

#6125

#6127

#6128

#6130

#6131

#6132

#6133

#6134

#6135

#6137

#6138

#6139

#614

#6141

#6142

#6143

#6145

#6146

#6149

#6150

#6151

#6152

#6153

#6154

#6155

#6156

#6159

#616

#6160

#6161

#6162

#6163

#6164

#6165

#6167

#6168

#6170

#6172

#6173

#6174

#6175

#6176

#6178

#6179

#6180

#6181

#6182

#6183

#6184

#6185

#6186

#6187

#6188

#6190

#6191

#6192

#6193

#6194

#6195

#6196

#6197

#6198

#6199

#620

#6200

#6201

#6202

#6203

#6204

#6205

#6206

#6208

#6209

#6210

#6211

#6212

#6213

#6216

#6218

#6219

#622

#6221

#6222

#6224

#6227

#6229

#6230

#6231

#6232

#6234

#6235

#6236

#6238

#6239

#6242

#6245

#6246

#6247

#6248

#6249

#625

#6252

#6253

#6255

#6256

#6257

#6259

#6262

#6263

#6264

#6268

#627

#6271

#6272

#6273

#6274

#6275

#6276

#6279

#628

#6284

#6285

#6286

#629

#6293

#6297

#630

#6302

#6305

#6308

#6310

#6311

#6312

#6313

#6315

#6317

#6322

#6326

#633

#6333

#6336

#6338

#634

#6343

#6344

#6345

#6346

#6353

#6354

#6363

#6369

#6371

#6378

#6379

#638

#6384

#6386

#6391

#6394

#6395

#6396

#6399

#640

#6404

#6407

#6408

#6409

#6411

#6412

#6413

#6414

#6417

#6418

#6419

#642

#6421

#6422

#6428

#643

#6431

#6433

#6434

#6437

#6438

#644

#6440

#6441

#6442

#6444

#6445

#6446

#645

#6450

#6451

#6455

#6457

#646

#6460

#6463

#6464

#6465

#647

#6470

#6474

#6475

#6476

#6478

#6481

#6502

#6503

#6509

#6512

#6515

#6517

#6520

#6521

#6522

#6523

#6524

#6528

#6530

#6531

#6532

#654

#6542

#6543

#6545

#6547

#6566

#6571

#6572

#6575

#6576

#6577

#6578

#658

#6580

#6581

#6582

#6583

#6587

#6591

#6592

#6595

#6596

#66

#66

#660

#6605

#6611

#6614

#6620

#6621

#6625

#6629

#663

#663

#6634

#6635

#6636

#6643

#6645

#6646

#6647

#6651

#6653

#6654

#666

#6667

#6668

#6671

#6684

#6689

#6691

#6693

#6694

#6696

#6698

#6702

#6704

#6705

#6711

#6712

#6715

#6720

#6725

#6726

#6727

#6729

#6731

#6736

#6738

#6743

#6744

#6745

#6746

#6747

#675

#6757

#6758

#6759

#676

#6760

#6761

#6765

#6766

#6767

#677

#6771

#6772

#6781

#679

#6793

#6794

#6796

#6797

#680

#6800

#6802

#6804

#6807

#6808

#6809

#6810

#6811

#6816

#6817

#6818

#6819

#6820

#6822

#6824

#6825

#6827

#6828

#6832

#6833

#6834

#6835

#6840

#6852

#6853

#6855

#6856

#6857

#6858

#6859

#6860

#6861

#6862

#6864

#6869

#6870

#6878

#688

#6883

#6884

#6889

#6896

#690

#6909

#6911

#6916

#6917

#6918

#692

#6924

#6928

#694

#6945

#695

#6951

#6956

#6965

#697

#6974

#6975

#6976

#698

#6983

#6996

#700

#7000

#7011

#702

#7025

#7026

#703

#7039

#7046

#7047

#705

#7051

#7057

#7058

#707

#707

#7071

#7072

#7073

#7074

#7075

#7084

#7096

#7100

#7104

#7111

#7114

#7115

#7122

#7126

#7127

#7129

#713

#7157

#716

#7176

#7182

#7185

#7197

#7204

#7206

#7207

#7208

#7209

#7211

#7215

#7220

#7222

#7223

#7225

#7234

#7237

#7243

#7244

#7246

#7247

#7250

#7251

#7255

#7257

#726

#7261

#7264

#7272

#7293

#73

#73

#7308

#7319

#732

#7321

#7325

#7336

#7342

#7353

#7376

#738

#7389

#7394

#7407

#7408

#7409

#7417

#7418

#7419

#742

#7421

#7424

#7429

#7431

#7440

#7441

#7451

#746

#747

#7471

#7472

#7473

#7480

#7483

#7488

#7489

#749

#7490

#7491

#7492

#7495

#7497

#750

#7504

#7527

#7528

#7537

#7541

#7557

#7574

#7583

#7601

#7602

#7603

#7605

#7606

#7615

#7620

#7621

#7626

#7629

#7630

#7631

#7634

#7641

#7642

#7647

#7650

#7655

#7674

#769

#7690

#7693

#7697

#772

#7728

#7738

#7749

#7750

#7754

#7755

#7762

#7767

#7783

#7799

#7805

#7807

#7818

#782

#7826

#7834

#7836

#7839

#7845

#7847

#7851

#7854

#7862

#7865

#788

#788

#7886

#7897

#7907

#7911

#7914

#7919

#7925

#793

#7930

#7931

#7936

#794

#7941

#7944

#7951

#7958

#796

#796

#7961

#7963

#7965

#7973

#7982

#7984

#7988

#7991

#7992

#7993

#8

#8

#8004

#8005

#8009

#8012

#802

#8021

#8022

#8028

#804

#8054

#8055

#806

#8062

#8066

#8069

#807

#8072

#8079

#8089

#8090

#8096

#8099

#8105

#8106

#811

#8112

#8113

#8114

#8115

#8116

#8117

#8127

#8129

#8132

#814

#8142

#8143

#8158

#8159

#8169

#818

#8200

#8201

#8207

#8208

#821

#8212

#8239

#8240

#8244

#8245

#8246

#8256

#8261

#8265

#8271

#8279

#8282

#8285

#8287

#8289

#8290

#8297

#8298

#8299

#8308

#8327

#833

#8335

#8337

#8353

#8358

#836

#8371

#8378

#8379

#8384

#8388

#839

#8391

#8396

#8397

#8403

#8405

#8406

#8408

#8409

#841

#842

#8421

#8422

#8423

#8424

#8426

#8427

#8439

#8441

#8442

#8449

#8452

#8455

#8465

#8466

#8467

#8477

#8486

#8487

#8493

#85

#8504

#8505

#8507

#8512

#8514

#8522

#8523

#8527

#8529

#8547

#8551

#8559

#8572

#8573

#8578

#8579

#8580

#8581

#8584

#8586

#8590

#8592

#8593

#8597

#86

#8606

#8609

#8610

#8614

#8619

#8620

#8623

#8624

#8631

#8634

#8637

#8639

#8640

#865

#8655

#8656

#8657

#866

#8663

#8665

#8674

#869

#8698

#8699

#8702

#8703

#871

#871

#872

#8725

#8728

#8729

#873

#8731

#8735

#8739

#874

#8745

#8754

#8755

#8758

#876

#8765

#8766

#8768

#8769

#877

#877

#8773

#8775

#8776

#8784

#8785

#8786

#8787

#879

#8794

#8795

#8801

#8806

#8807

#8808

#8810

#8813

#8814

#8815

#8816

#8824

#883

#8830

#8832

#8840

#8841

#8844

#885

#8855

#8860

#8862

#8872

#8882

#8885

#8895

#8900

#8909

#8910

#8911

#8912

#8921

#8929

#8938

#8948

#8950

#8951

#8954

#8956

#8964

#8970

#8972

#8975

#8976

#8977

#8980

#8983

#899

#8992

#8993

#900

#9001

#9003

#9007

#9008

#9009

#901

#902

#9023

#9024

#9032

#9033

#9041

#9050

#9059

#9061

#9064

#9065

#9069

#9074

#9081

#9084

#9089

#9097

#9102

#9115

#9124

#9130

#9133

#914

#9142

#9143

#9148

#9150

#9155

#9163

#9167

#917

#9175

#9180

#9181

#9199

#9200

#9202

#921

#9211

#9218

#9223

#9226

#9234

#9237

#9244

#9247

#9248

#9254

#9269

#9270

#9278

#9284

#9296

#9297

#9300

#9305

#9321

#9329

#9335

#9336

#9337

#9342

#9346

#935

#9350

#936

#9361

#9384

#9388

#9390

#9398

#9399

#9400

#9401

#9405

#9406

#9407

#941

#9412

#942

#9420

#9422

#9423

#9425

#9426

#943

#9431

#9438

#9443

#9446

#9447

#9448

#9449

#9450

#9458

#9459

#9460

#9461

#9462

#9463

#9464

#9465

#9469

#9471

#9474

#9475

#9476

#9477

#9481

#9486

#9492

#9496

#9498

#9499

#9500

#9501

#9502

#9505

#9506

#9515

#9516

#9518

#9519

#952

#952

#9520

#9524

#9525

#9526

#9530

#9537

#9539

#9540

#9541

#9544

#9545

#9546

#9549

#955

#9557

#9559

#9560

#9563

#9564

#9565

#9566

#9572

#9573

#9574

#9582

#9583

#9589

#9592

#9593

#9594

#9596

#9597

#960

#9603

#9608

#9610

#9612

#9620

#9622

#9629

#9630

#9631

#9633

#9638

#9639

#9649

#9650

#9654

#9655

#9657

#9658

#9659

#966

#9662

#9664

#9667

#9669

#9670

#9673

#9674

#9678

#9679

#9685

#9688

#9691

#9694

#9697

#9698

#9699

#9701

#9703

#9707

#9708

#9712

#9722

#9731

#9735

#9737

#9738

#9742

#9748

#9755

#9763

#9764

#9773

#9777

#979

#980

#9800

#9802

#982

#9820

#9821

#9823

#9835

#9838

#9859

#9861

#9868

#9869

#9873

#9878

#9879

#9880

#9891

#99

#9903

#9905

#9911

#9912

#9917

#9918

#9930

#9941

#9942

#9951

#9952

#9956

#996

#9960

#9961

#9963

#9972

#9979

#9981

#9987

#9989

0.100

0.100.1

0.100.2

0.100.3

0.100.4

0.100.5

0.100.6

0.100.7

0.101.0

0.93

0.94

0.95

0.96

0.97

0.98

0.99

4.0.0.dev3

checkpoint/split-feature-flag

checkpoint/working-without-split

prerelease-v3.7.6a

prerelease-v3.7.8

prerelease-v3.8.0.dev0

release-v3.7.6

release-v3.7.7

release-v3.7.8

release-v3.8.0

release-v3.8.1

release-v3.8.2

release-v3.8.3

release-v3.8.4

release-v3.8.5

release-v3.8.6

release-v3.8.7

v1.0.0

v1.1.0

v1.10.0

v1.10.1

v1.2.0

v1.3.0

v1.4.0

v1.5.0

v1.6.0

v1.7.0

v1.7.1

v1.7.2

v1.7.3

v1.7.4

v1.7.5

v1.8.0

v1.8.1

v1.8.2

v1.8.3

v1.9.0

v2.0.0

v2.0.0-alpha

v2.0.0rc1

v2.0.0rc2

v2.0.1

v2.0.10

v2.0.11

v2.0.12

v2.0.13

v2.0.14

v2.0.15

v2.0.16

v2.0.17

v2.0.18

v2.0.2

v2.0.3

v2.0.4

v2.0.5

v2.0.6

v2.0.7

v2.0.8

v2.0.9

v2.1.0

v2.1.0a0

v2.1.0a1

v2.1.0a10

v2.1.0a11

v2.1.0a12

v2.1.0a13

v2.1.0a3

v2.1.0a4

v2.1.0a5

v2.1.0a6

v2.1.0a7

v2.1.0a8

v2.1.0a9

v2.1.1

v2.1.2

v2.1.3

v2.1.4

v2.1.5

v2.1.6

v2.1.7

v2.1.8

v2.1.9

v2.2.0

v2.2.1

v2.2.2

v2.2.3

v2.2.4

v2.3.0

v2.3.1

v2.3.2

v2.3.3

v2.3.4

v2.3.5

v2.3.6

v2.3.7

v2.3.8

v2.3.9

v3.0.0

v3.0.0rc1

v3.0.0rc2

v3.0.0rc3

v3.0.0rc4

v3.0.0rc5

v3.0.1

v3.0.2

v3.0.3

v3.0.4

v3.0.5

v3.0.6

v3.0.7

v3.0.8

v3.0.9

v3.1.0

v3.1.1

v3.1.2

v3.1.3

v3.1.4

v3.1.5

v3.1.6

v3.1.7

v3.2.0

v3.2.1

v3.2.2

v3.2.3

v3.2.4

v3.2.5

v3.2.6

v3.3.0

v3.3.0.dev0

v3.3.1

v3.3.2

v3.3.3

v3.4.0

v3.4.1

v3.4.2

v3.4.3

v3.4.4

v3.5.0

v3.5.1

v3.5.2

v3.5.3

v3.5.4

v3.6.0

v3.6.0.dev0

v3.6.0.dev1

v3.6.1

v3.7.0

v3.7.0.dev0

v3.7.1

v3.7.2

v3.7.3

v3.7.4

v3.7.5

v4.0.0.dev0

v4.0.0.dev1

v4.0.0.dev10

v4.0.0.dev2

v4.0.0.dev3

v4.0.0.dev4

v4.0.0.dev5

v4.0.0.dev6

v4.0.0.dev7

v4.0.0.dev8

v4.0.0.dev9

ae52f9f38c * Remove vocab10k from tokens Matthew Honnibal 2014-11-03 00:23:20 +1100
11915e5238 * Update tests Matthew Honnibal 2014-11-03 00:23:04 +1100
75329e9ef8 * Add Co. abbreviation to tokenization rules Matthew Honnibal 2014-11-03 00:16:20 +1100
32fb50dc35 * Remove non_sparse method --- features wanting this can do it easily enough. Matthew Honnibal 2014-11-03 00:15:47 +1100
b5ae1471db * Fiddle with POS tag features Matthew Honnibal 2014-11-03 00:15:03 +1100
70ea862703 * Remove vocab10k field, and add flags for gazetteers Matthew Honnibal 2014-11-03 00:13:51 +1100
f1c3e17c80 * Work on intro copy Matthew Honnibal 2014-11-03 00:13:19 +1100
fa91506073 * Add '' double quote to suffixes file Matthew Honnibal 2014-11-03 00:12:59 +1100
493d5ffb50 * Add test for '' in punct Matthew Honnibal 2014-11-02 21:24:09 +1100
711ed0f636 * Whitespace Matthew Honnibal 2014-11-02 14:22:32 +1100
fcd9490d56 * Add pos_tag method to Language Matthew Honnibal 2014-11-02 14:21:43 +1100
99b5cefa88 * Add tests for emoticon tokenization Matthew Honnibal 2014-11-02 13:22:14 +1100
23131f21bb * Add tests for like_url Matthew Honnibal 2014-11-02 13:21:57 +1100
dc6c3c0f56 * Add tests for like_number Matthew Honnibal 2014-11-02 13:21:39 +1100
829bb2bdbe * Add mappings to Twitter POS tag corpus Matthew Honnibal 2014-11-02 13:21:19 +1100
437cd2217d * Fix strings i/o, removing use of ujson library in favour of plain text file. Allows better control of codecs. Matthew Honnibal 2014-11-02 13:20:37 +1100
3352e89e21 * Use LIKE_URL and LIKE_NUMBER flag features. Seems to improve accuracy on onto web Matthew Honnibal 2014-11-02 13:19:54 +1100
8335706321 * Add LIKE_URL and LIKE_NUMBER flag features Matthew Honnibal 2014-11-02 13:19:05 +1100
c414d0eebe * Add tests for is_number Matthew Honnibal 2014-11-01 19:13:40 +1100
5484fbea69 * Implement is_number Matthew Honnibal 2014-11-01 19:13:24 +1100
f685218e21 * Add is_urlish function Matthew Honnibal 2014-11-01 17:39:34 +1100
11e42fd070 * Add emoticons to tokenization Matthew Honnibal 2014-11-01 15:14:46 +1100
39743323ea * Add i'ma to tokenization rules Matthew Honnibal 2014-10-31 17:45:44 +1100
09a3e54176 * Delete print statements from stringstore Matthew Honnibal 2014-10-31 17:45:26 +1100
b186a66bae * Rename Token.lex_pos to Token.postype, and Token.lex_supersense to Token.sensetype Matthew Honnibal 2014-10-31 17:44:39 +1100
a8ca078b24 * Restore lexemes field to lexicon Matthew Honnibal 2014-10-31 17:43:25 +1100
6c807aa45f * Restore id attribute to lexeme, and rename pos field to postype, to store clustered tag dictionaries Matthew Honnibal 2014-10-31 17:43:00 +1100
aaf6953fe0 * Add count_tags functionto pos.pyx, which should probably live in another file. Feature set achieves 97.9 on wsj19-21, 95.85 on onto web. Matthew Honnibal 2014-10-31 17:42:15 +1100
f67cb9a5a3 * Add count_tags functionto pos.pyx, which should probably live in another file. Feature set achieves 97.9 on wsj19-21, 95.85 on onto web. Matthew Honnibal 2014-10-31 17:42:04 +1100
63114820cf * Upd tests for tighter interface Matthew Honnibal 2014-10-30 18:15:30 +1100
ea8f1e7053 * Tighten interfaces Matthew Honnibal 2014-10-30 18:14:42 +1100
ea85bf3a0a * Tighten the interface to Language Matthew Honnibal 2014-10-30 18:01:27 +1100
c6fcd03692 * Small efficiency tweak to lexeme init Matthew Honnibal 2014-10-30 17:56:11 +1100
87c2418a89 * Fiddle with data types on Lexeme, to compress them to a much smaller size. Matthew Honnibal 2014-10-30 15:42:15 +1100
ac88893232 * Fix Token after lexeme changes Matthew Honnibal 2014-10-30 15:30:52 +1100
e6b87766fe * Remove lexemes vector from Lexicon, and the id and hash attributes from Lexeme Matthew Honnibal 2014-10-30 15:21:38 +1100
889b7b48b4 * Fix POS tagger, so that it loads correctly. Lexemes are being read in. Matthew Honnibal 2014-10-30 13:38:55 +1100
67c8c8019f * Update lexeme serialization, using a binary file format Matthew Honnibal 2014-10-30 01:01:00 +1100
13909a2e24 * Rewriting Lexeme serialization. Matthew Honnibal 2014-10-29 23:19:38 +1100
234d49bf4d * Seems to be working after refactor. Need to wire up more POS tag features, and wire up save/load of POS tags. Matthew Honnibal 2014-10-24 02:23:42 +1100
08ce602243 * Large refactor, particularly to Python API Matthew Honnibal 2014-10-24 00:59:17 +1100
168b2b8cb2 * Add tests for string intern Matthew Honnibal 2014-10-23 20:47:06 +1100
7baef5b7ff * Fix padding on tokens Matthew Honnibal 2014-10-23 04:01:17 +1100
96b835a3d4 * Upd for refactored Tokens class. Now gets 95.74, 185ms training on swbd_wsj_ewtb, eval on onto_web, Google POS tags. Matthew Honnibal 2014-10-23 03:20:02 +1100
e5e951ae67 * Remove the feature array stuff from Tokens class, and replace vector with array-based implementation, with padding. Matthew Honnibal 2014-10-23 01:57:59 +1100
ea1d4a81eb * Refactoring get_atoms, improving tokens API Matthew Honnibal 2014-10-22 13:10:56 +1100
ad49e2482e * Tagger now gets 97pc on wsj, parsing 19-21 in 500ms. Gets 92.7 on web text. Matthew Honnibal 2014-10-22 12:57:06 +1100
0a0e41f6c8 * Add prefix and suffix features Matthew Honnibal 2014-10-22 12:56:09 +1100
7018b53d3a * Improve array features in tokens Matthew Honnibal 2014-10-22 12:55:42 +1100
43d5964e13 * Add function to read detokenization rules Matthew Honnibal 2014-10-22 12:54:59 +1100
077885637d * Add test for reading in POS tags Matthew Honnibal 2014-10-22 10:18:43 +1100
224bdae996 * Add POS utilities Matthew Honnibal 2014-10-22 10:17:57 +1100
5ebe14f353 * Add greedy pos tagger Matthew Honnibal 2014-10-22 10:17:26 +1100
12742f4f83 * Add detokenize method and test Matthew Honnibal 2014-10-18 18:02:05 +1100
df110476d5 * Update docs Matthew Honnibal 2014-10-15 21:50:34 +1100
849de654e7 * Add file for infix patterns Matthew Honnibal 2014-10-14 20:26:43 +1100
31aad7c08a * Test hyphenation etc Matthew Honnibal 2014-10-14 20:26:16 +1100
99f5e59286 * Have tokenizer emit tokens for whitespace other than single spaces Matthew Honnibal 2014-10-14 20:25:57 +1100
43743a5d63 * Work on efficiency Matthew Honnibal 2014-10-14 18:22:41 +1100
6fb42c4919 * Add offsets to Tokens class. Some changes to interfaces, and reorganization of spacy.Lang Matthew Honnibal 2014-10-14 15:47:06 +1100
2805068ca8 * Have tokens track tuples that record the start offset and pos tag as well as a lexeme pointer Matthew Honnibal 2014-10-14 15:21:03 +1100
65d3ead4fd * Rename LexStr_casefix to LexStr_norm and LexInt_i to LexInt_id Matthew Honnibal 2014-10-14 15:19:07 +1100
5abb194553 * Add semi-colon to suffix punct Matthew Honnibal 2014-10-14 10:43:45 +1100
868e558037 * Preparations in place to handle hyphenation etc Matthew Honnibal 2014-10-10 20:23:23 +1100
ff79dbac2e * More slight cleaning for lang.pyx Matthew Honnibal 2014-10-10 20:11:22 +1100
3d82ed1e5e * More slight cleaning for lang.pyx Matthew Honnibal 2014-10-10 19:50:07 +1100
02e948e7d5 * Remove counts stuff from Language class Matthew Honnibal 2014-10-10 19:25:01 +1100
71ee921055 * Slight cleaning of tokenizer code Matthew Honnibal 2014-10-10 19:17:22 +1100
59b41a9fd3 * Switch to new data model, tests passing Matthew Honnibal 2014-10-10 08:11:31 +1100
1b0e01d3d8 * Revising data model of lexeme. Compiles. Matthew Honnibal 2014-10-09 19:53:30 +1100
e40caae51f * Update Lexicon class to expect a list of lexeme dict descriptions Matthew Honnibal 2014-10-09 14:51:35 +1100
51d75b244b * Add serialize/deserialize functions for lexeme, transport to/from python dict. Matthew Honnibal 2014-10-09 14:10:46 +1100
d73d89a2de * Add i attribute to lexeme, giving lexemes sequential IDs. Matthew Honnibal 2014-10-09 13:50:05 +1100
0c6402ab73 * Upd docs Matthew Honnibal 2014-09-26 18:40:18 +0200
096ef2b199 * Rename external hashing lib, from trustyc to preshed Matthew Honnibal 2014-09-26 18:40:03 +0200
11a346fd5e * Remove hashing modules, which are now taken over by external lib Matthew Honnibal 2014-09-26 18:39:40 +0200
bfab6403bc * Re-add docs, sorting out mess from gh-pages Matthew Honnibal 2014-09-25 18:42:20 +0200
aba4a7c7ea * Remove ptb3 file from setup Matthew Honnibal 2014-09-25 18:41:25 +0200
bc460de171 * Add extra tests Matthew Honnibal 2014-09-25 18:29:42 +0200
93505276ed * Add German tokenizer files Matthew Honnibal 2014-09-25 18:29:13 +0200
2e44fa7179 * Add util.py Matthew Honnibal 2014-09-25 18:26:22 +0200
c4cd3bc57a * Add prefix and suffix data files Matthew Honnibal 2014-09-25 18:24:52 +0200
2d4e5ceafd * Remove old docs stuff Matthew Honnibal 2014-09-25 18:24:05 +0200
b15619e170 * Use PointerHash instead of locally provided _hashing module Matthew Honnibal 2014-09-25 18:22:52 +0200
ed446c67ad * Add typedefs file Matthew Honnibal 2014-09-17 23:10:32 +0200
316a57c4be * Remove own memory classes, which have now been broken out into their own package Matthew Honnibal 2014-09-17 23:10:07 +0200
ac522e2553 * Switch from own memory class to cymem, in pip Matthew Honnibal 2014-09-17 23:09:24 +0200
6266cac593 * Switch to using a Python ref counted gateway to malloc/free, to prevent memory leaks Matthew Honnibal 2014-09-17 20:02:26 +0200
5a20dfc03e * Add memory management code Matthew Honnibal 2014-09-17 20:02:06 +0200
0152831c89 * Refactor tokenization, enable cache, and ensure we look up specials correctly even when there's confusing punctuation surrounding the token. Matthew Honnibal 2014-09-16 18:01:46 +0200
143e51ec73 * Refactor tokenization, splitting it into a clearer life-cycle. Matthew Honnibal 2014-09-16 13:16:02 +0200
c396581a0b * Fiddle with the way strings are interned in lexeme Matthew Honnibal 2014-09-15 06:34:45 +0200
0bb547ab98 * Fix memory error in cache, where entry wasn't being null-terminated. Various other changes, some good for performance Matthew Honnibal 2014-09-15 06:33:53 +0200
7959141d36 * Add a few abbreviations, to get tests to pass Matthew Honnibal 2014-09-15 06:32:18 +0200
db191361ee * Add new tests for fancier tokenization cases Matthew Honnibal 2014-09-15 06:31:58 +0200
6fc06bfe2f * Hack a hard-cased unit in to get a test to pass Matthew Honnibal 2014-09-15 06:31:35 +0200
d235299260 * Few nips and tucks to hash table Matthew Honnibal 2014-09-15 05:03:44 +0200
e68a431e5e * Pass only the tokens vector to _tokenize, instead of the whole python object. Matthew Honnibal 2014-09-15 04:01:38 +0200
08cef75ffd * Switch to using a heap-allocated vector in tokens Matthew Honnibal 2014-09-15 03:46:14 +0200
f77b7098c0 * Upd Tokens to use vector, with bounds checking. Matthew Honnibal 2014-09-15 03:22:40 +0200