Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3719282
Added package.json for npm
konsumer Sep 19, 2012
dce04d1
Better formatting
konsumer Sep 19, 2012
7b4b799
Added UMD for POSTagger
konsumer Sep 19, 2012
6c05f45
Wrapped lexicons in UMD
konsumer Sep 19, 2012
b086945
Wrapped lexer in UMD
konsumer Sep 19, 2012
894d2e5
made structure neater
konsumer Sep 19, 2012
00003ac
moved to vows testing
konsumer Sep 19, 2012
5ac6215
forgot to remove util
konsumer Sep 19, 2012
0cf333d
for Mac's DS_Store
konsumer Sep 19, 2012
6a7cf07
re-structured demo
konsumer Sep 19, 2012
e43364d
simple comment
konsumer Sep 19, 2012
52c949e
added modules to ignore
konsumer Sep 19, 2012
5422ccc
added demo-server & require demo
konsumer Sep 19, 2012
b53dea7
improved ocumentation with markdown, and added the changes I made
konsumer Sep 19, 2012
0cadb08
didn't make sense, before.
konsumer Sep 19, 2012
48df5fd
Commented demo-server
konsumer Sep 19, 2012
04d4d3d
simpler testing feedback
konsumer Sep 19, 2012
df5ef03
No capitalization
konsumer Sep 19, 2012
ecb68ba
forgot 1 capitalization
konsumer Sep 19, 2012
c4afca8
I did that, too.
konsumer Sep 19, 2012
85e7d15
added syntax highlighting
konsumer Sep 19, 2012
efc7a7c
added version introspection to node module
konsumer Sep 19, 2012
26a881d
followed jshint's advice
konsumer Sep 19, 2012
7b2c244
added build system
konsumer Sep 19, 2012
088588b
Merge branch 'master' of github.com:konsumer/jspos
konsumer Sep 19, 2012
8a79092
simpler build instructions
konsumer Sep 19, 2012
91d3201
package doesn't work on a lot of versions
konsumer Sep 19, 2012
2f62e2d
saw errors in parsing, made test. doesn't pass.
konsumer Sep 19, 2012
ca80f76
started work on eliza demo, halting till I figure out parsing problems.
konsumer Sep 19, 2012
824b820
more atomic tests
konsumer Sep 19, 2012
d7076a5
forgot a console.log
konsumer Sep 19, 2012
09a015f
the name is jsPOS\!
konsumer Sep 20, 2012
727df22
update to demos
konsumer Sep 20, 2012
ec10270
deadend code
konsumer Sep 20, 2012
d262e9d
forgot a caps
konsumer Sep 20, 2012
44c1b40
I think tehre was a unicode prob, somewhere
konsumer Sep 20, 2012
4661e69
another possible unicode prob
konsumer Sep 20, 2012
1f6f19f
build test from docs.
konsumer Sep 20, 2012
f255f25
note for later
konsumer Sep 20, 2012
b66e87f
seperated tests into different files
konsumer Sep 21, 2012
68262c8
re-arranged tests, use prettyPrint comparison, instead of JSON.stringify
konsumer Sep 21, 2012
d20e3f3
forgot to rename test export
konsumer Sep 21, 2012
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.DS_Store
node_modules
jsopos.min.js
135 changes: 135 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# jsPOS

jsPOS is a Javascript port of [Mark Watson's FastTag Part of Speech Tagger](http://www.markwatson.com/opensource/) which was itself based on Eric Brill's trained rule set and English lexicon. jsPOS also includes a basic lexer that can be used to extract words and other tokens from text strings.


## FILES

* `lexicon.js` - Javascript version of Eric Brill's English lexicon
* `lexer.js` - Lexer to break a sentence into taggable tokens (e.g. words)
* `POSTagger.js` - the Part of Speech tagger.


## USAGE

See demo in **demos/simple/**.

```javascript
var words = new Lexer().lex("This is some sample text. This text can contain multiple sentences.");
var taggedWords = new POSTagger().tag(words);
for (i in taggedWords) {
var taggedWord = taggedWords[i];
var word = taggedWord[0];
var tag = taggedWord[1];
}
```

### node.js

See demo using [vows](http://vowsjs.org/) testing in **test/**

You can install jspos in your project (once jsPOS is published to npm) with this command: `npm install jspos`. You can use it in your code, like this:

```javascript
var jspos = require('jspos'),
lexer = new jspos.Lexer(),
tagger = new jspos.POSTagger();

var tags = tagger.tag(lexer.lex("This is some text. It can be whatever length. Cool!"));
console.log(tags);
```

### require.js

See demo in **demos/simple_with_require/**.

```javascript
define(['jspos/lib/post_tagger', 'jspos/lib/lexer'], function(POSTagger, Lexer){
var lexer = new Lexer(),
tagger = new POSTagger();

var tags = tagger.tag(lexer.lex("This is some text. It can be whatever length. Cool!"));
console.log(tags)
});
```

### developer tools

There are a few tools linked to npm, via **package.json**:

* `npm install` - install all dependencies
* `npm test` - test the library
* `npm start` - start the demo webserver
* `node build.js` - creates a minified version of the library (for the web) in jspos.min.js


## TAGS

CC Coord Conjuncn and,but,or
CD Cardinal number one,two
DT Determiner the,some
EX Existential there there
FW Foreign Word mon dieu
IN Preposition of,in,by
JJ Adjective big
JJR Adj., comparative bigger
JJS Adj., superlative biggest
LS List item marker 1,One
MD Modal can,should
NN Noun, sing. or mass dog
NNP Proper noun, sing. Edinburgh
NNPS Proper noun, plural Smiths
NNS Noun, plural dogs
POS Possessive ending 's
PDT Predeterminer all,both
PP$ Possessive pronoun my,one's
PRP Personal pronoun I,you,she
RB Adverb quickly
RBR Adverb, comparative faster
RBS Adverb, superlative fastest
RP Particle up,off
SYM Symbol +,%,&
TO ?to? to
UH Interjection oh,oops
VB verb, base form eat
VBD verb, past tense ate
VBG verb, gerund eating
VBN verb, past part eaten
VBP Verb, present eat
VBZ Verb, present eats
WDT Wh-determiner which,that
WP Wh pronoun who,what
WP$ Possessive-Wh whose
WRB Wh-adverb how,where
, Comma ,
. Sent-final punct .,!,?
: Mid-sent punct. :,;
$ Dollar sign $
# Pound sign #
" quote "
( Left paren (
) Right paren )



## LICENSE

jsPOS is licensed under the GNU LGPLv3


## ACKNOWLEDGEMENTS

Thanks to Mark Watson for writing FastTag, which served as the basis for jsPOS.


## AUTHOR

[Percy Wegmann](http://www.percywegmann.com/)

The orignal of this code is avallable [on google code](http://code.google.com/p/jspos/)

Kieren Diment <zarquon@cpan.org> added the demo.html and main.js files.

David Konsumer <konsumer@jetboystudio.com> updated demos, documentation & added npm/node/require.js support

The next step is to add noun phrase extraction routines and other utility functions (see the Perl Module [Lingua::EN::Tagger](http://search.cpan.org/perldoc?Lingua::EN::Tagger) ).
89 changes: 0 additions & 89 deletions README.txt

This file was deleted.

25 changes: 25 additions & 0 deletions build.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/**
* Simple javascript minifier
* run with node build.js or npm run-script build
*/

var uglify = require('uglify-js'),
path = require('path');
fs = require('fs');

var fname = 'jspos.min.js';

var orig_code = "";
orig_code += fs.readFileSync(path.join(__dirname, 'lib', 'lexicon.js'), 'utf8');
orig_code += fs.readFileSync(path.join(__dirname, 'lib', 'lexer.js'), 'utf8');
orig_code += fs.readFileSync(path.join(__dirname, 'lib', 'POSTagger.js'), 'utf8');

var uglify_options = {
strict_semicolons: true,
mangle_options: {except: ['$super']},
gen_options: {ascii_only: true},
};
var minified_code = uglify(orig_code, uglify_options);

fs.writeFileSync(path.join(__dirname, fname), minified_code, 'utf8');
console.log("Wrote minified file to " + fname);
32 changes: 32 additions & 0 deletions demo/eliza/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<html>
<head>
<title>Eliza Demo</title>
<style type="text/css">
body, input, textarea {
font-size: 20px;
}

textarea, input {
width: 100%;
}

textarea {
height: 500px;
display:block;
}
</style>
</head>

<body>
<form id="eliza">
<textarea id="output" disabled="disabled">This is an Eliza simulation using jspos. Press [Enter] to say something.</textarea>
<input id="input">
</form>
</body>

<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.8.1/jquery.min.js" type="text/javascript"></script>
<script src="../../lib/lexer.js" type="text/javascript"></script>
<script src="../../lib/lexicon.js" type="text/javascript"></script>
<script src="../../lib/POSTagger.js" type="text/javascript"></script>
<script src="main.js" type="text/javascript"></script>
</html>
101 changes: 101 additions & 0 deletions demo/eliza/main.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/**
* from prototype
*
* Array#filter(iterator[, context]) -> Array
* - iterator (Function): An iterator function to use to test the
* elements.
* - context (Object): An optional object to use as `this` within
* calls to the iterator.
*
* Returns a new array containing all the items in this array for which
* `iterator` returned a truthy value.
*
* `Array#filter` acts as an ECMAScript 5 [polyfill](http://remysharp.com/2010/10/08/what-is-a-polyfill/).
* It is only defined if not already present in the user's browser, and it
* is meant to behave like the native version as much as possible. Consult
* the [ES5 specification](http://es5.github.com/#x15.4.4.20) for more
* information.
**/
if (!Array.prototype.filter){
Array.prototype.filter = function(iterator) {
var object = Object(this);
var results = [], context = arguments[1], value;

for (var i = 0, length = object.length >>> 0; i < length; i++) {
if (i in object) {
value = object[i];
if (iterator.call(context, value, i, object)) {
results.push(value);
}
}
}
return results;
}
}

$(function(){
var lexer = new Lexer();
var tagger = new POSTagger();

// talk data
var data = {
"init": [
"How do you do. Please tell me your problem.",
"Please tell me what's been bothering you.",
"Is something troubling you?"
],

"synons" : {
"be": ["am", "is", "are", "was"],
"belief": ["feel", "think", "believe", "wish"],
"cannot": ["can't"],
"desire": ["want", "need"],
"everyone": ["everybody", "nobody", "noone"],
"family": ["mother", "mom", "father", "dad", "sister", "brother", "wife", "children", "child"],
"happy": ["elated", "glad", "better"],
"sad": ["unhappy", "depressed", "sick"]
},

"sorry":[
"Please don't apologise.",
"Apologies are not necessary.",
"I've told you that apologies are not required.",
"It did not bother me. Please continue."
]
};

function randq(name){
var q = data[name];
return q[Math.floor(Math.random()*q.length)];
}

$('#eliza').submit(function(){
var v = $('#input').val(),
out="";

$('#output').append("YOU: " + v + "\n\n");

var good_nouns = [];

if (v.match(/\w+/) !== null){
var words = tagger.tag(lexer.lex(v));
console.log("words", words);
// find the most relevant noun/pronouns
var subjects = words.filter(function(s, i, a){ s.push(i); return (s[1] == 'PP$' || s[1] == 'PRP$' || s[1][0] == 'N'); });
console.log("subjects", subjects);

}

if (out.length){
$('#output').append("ELIZA: " + out +"\n\n");
}

return false;
});

$('#input').focus();
$('#output').append("\n\nELIZA: " + randq('init') + "\n\n");



});
Loading