From 92a77ea465eb62c597babeb95d2db622d80cc676 Mon Sep 17 00:00:00 2001 From: Hung-chih Yang Date: Fri, 3 Apr 2020 10:57:58 -0700 Subject: [PATCH] Weight support, fixing CV/test indexing bug, and refactoring --- packages/dispatcher/README.md | 2 +- .../resources/data/Columnar/Email.txt | 601 ++++++++++++++++++ .../resources/data/Columnar/EmailTest.txt | 75 +++ .../dispatcher/src/data/AppColumnarData.ts | 10 + packages/dispatcher/src/data/ColumnarData.ts | 131 +++- packages/dispatcher/src/data/DataUtility.ts | 2 + .../src/data/EntityAnnotatedCorpusData.ts | 97 ++- packages/dispatcher/src/data/LuData.ts | 119 +++- packages/dispatcher/src/data/data.ts | 123 ++-- .../data_structure/DictionaryMapUtility.ts | 114 +++- packages/dispatcher/src/index.ts | 7 + .../confusion_matrix/AppConfusionMatrix.ts | 322 +++++++++- .../confusion_matrix/ConfusionMatrix.ts | 2 +- .../AbstractBaseEvaluator.ts | 2 +- .../cross_validation/AppCrossValidator.ts | 17 +- .../cross_validation/CrossValidator.ts | 68 +- .../src/model/evaluation/predict/Predictor.ts | 18 +- .../report/AppDataProfileReporter.ts | 16 +- .../report/AppModelMetaDataProfileReporter.ts | 11 + .../evaluation/report/AppThresholdReporter.ts | 141 +++- .../evaluation/report/ThresholdReporter.ts | 44 +- .../src/model/evaluation/test/AppTester.ts | 29 +- .../src/model/evaluation/test/Tester.ts | 40 +- .../featurizer/NgramSubwordFeaturizer.ts | 90 +-- .../AppAutoActiveLearner.ts | 35 +- .../learner/AppSoftmaxRegressionSparse.ts | 14 +- .../neural_network/learner/UtilityLearner.ts | 68 +- packages/dispatcher/src/utility/AppUtility.ts | 38 +- packages/dispatcher/src/utility/utility.ts | 232 +++++-- .../test/data/AppColumnarData.test.ts | 2 + .../dispatcher/test/data/ColumnarData.test.ts | 6 +- .../test/data/ColumnarDataGlueSst2.test.ts | 6 +- .../confusion_matrix/ConfusionMatrix.test.ts | 2 +- .../AppCrossValidator.test.ts | 7 +- .../cross_validation/CrossValidator.test.ts | 6 +- .../featurizer/NgramSubwordFeaturizer.test.ts | 36 +- .../AppAutoActiveLearner.test.ts | 3 + .../AutoActiveLearner.test.ts | 3 + .../learner/UtilityLearner.test.ts | 6 + .../test/utility/AppUtility.test.ts | 4 + .../dispatcher/test/utility/utility.test.ts | 28 +- packages/dispatcher/tsconfig.json | 7 +- 42 files changed, 2225 insertions(+), 359 deletions(-) create mode 100644 packages/dispatcher/resources/data/Columnar/Email.txt create mode 100644 packages/dispatcher/resources/data/Columnar/EmailTest.txt diff --git a/packages/dispatcher/README.md b/packages/dispatcher/README.md index 055b0661e..e8e67bee3 100644 --- a/packages/dispatcher/README.md +++ b/packages/dispatcher/README.md @@ -1,7 +1,7 @@ @microsoft/bf-dispatcher ======================== -This package is intended for Microsoft use only. It is not designed to be consumed as an independent package. +This package is intended to be consumed by other 'command' packages in the Botframework CLI suite, one example is 'bf-orchestrator'. 'bf-dispatcher' is a generic NLP intent classification package/library. It currently can do: diff --git a/packages/dispatcher/resources/data/Columnar/Email.txt b/packages/dispatcher/resources/data/Columnar/Email.txt new file mode 100644 index 000000000..58e566cd0 --- /dev/null +++ b/packages/dispatcher/resources/data/Columnar/Email.txt @@ -0,0 +1,601 @@ +AddFlag add flag to the email john just sent to me +AddFlag make it flagged +AddFlag i want to add a flag on this email +AddFlag add a flag please +AddFlag flag this email as important for me +AddFlag add a flag to the last email +AddFlag this email should be flagged +AddFlag add flag to this email +AddFlag put a flag on the new email +AddFlag add a flag +AddFlag the email to ruth needs to be flagged +AddFlag mark as flag +AddFlag add flag to it +AddFlag flag +AddFlag put a flag +AddFlag flag this email +AddFlag turn flag on +AddFlag flag it +AddFlag flag on +AddFlag flag the current email +AddFlag i want to add a flag +AddFlag flag the email from davis +AddFlag add flag +AddFlag the email from thomas should be flagged +AddFlag add a flag to this email +AddFlag flag the email +AddFlag this email needs to be flagged +AddFlag this email need to be flagged +AddFlag add flag to this message +AddFlag add flag on it +AddFlag mark the email flagged +AddMore i need to add something else to my email to cheryl +AddMore add more and change the message +AddMore i need to add more to the email +AddMore add: call me tonight after work +AddMore write more +AddMore i need to add more to the email message i am sending to vincent +AddMore put some additional lines to this message +AddMore i need to add more text +AddMore add more to message +AddMore add a picture +AddMore add a file to the email +AddMore please add it was terrible +AddMore i would like to add more to the email message +AddMore add more message +AddMore need to add information to the previous email +AddMore add some more +AddMore add a subject +AddMore i'd like to add more to the email +AddMore add photo +AddMore add more to the last email +AddMore add, by the way, what's the plan of next step +AddMore i would like to add more to the email +AddMore add to body of email +AddMore append an attachment to this email +AddMore edit email so i can type an additional message +AddMore add another line to the message +AddMore i need to add further contents +AddMore insert more lines for me please +AddMore add something +AddMore add more to the message +AddMore i forgot to add an important part to that email to james . please set it up to edit +AddMore is it ok if i add more to the email +AddMore i need to add additional lines +AddMore add more please +AddMore i need to add more message +AddMore add more details to it +AddMore add more +AddMore please add more +AddMore i need to add something else to that email to donna before it is sent +AddMore can i add more to the email +AddMore i'd like to add a bit more to the email. +AddMore i'd like to add a bit more to the message +AddMore add more to roy 's email +AddMore add more text please +AddMore i want to add more the email +AddMore can i add more to the message +AddMore add more to email body +AddMore i would like to open a new line +AddMore i wish to add more to the message +AddMore i am not done yet. i need to add some more details +AddMore add did you enjoy the entire program +AddMore ok, i need to add a few things to that +AddMore more text +AddMore add more to the email +AddMore add file to email +AddMore wait, i need to write more +AddMore add more to it +AddMore add more to text +AddMore add more text +AddMore add more don't forget to bring beer +AddMore it isn't complete, need more contents +AddMore add more to email +AddMore please add, please let me know what i can bring. i'd be happy to make a side dish or dessert +AddMore attach file +AddMore insert more text in my email +CancelMessages cancel my email to jane +CancelMessages don't show me +CancelMessages don't send that email +CancelMessages can you cancel it +CancelMessages don't email to her +CancelMessages forget about the email +CancelMessages don ' t read +CancelMessages never mind cancel the message +CancelMessages stop reading +CancelMessages never mind cancel the mail +CancelMessages okay cancel sending the mail +CancelMessages cancel this email +CancelMessages quit the sending +CancelMessages cancel the mail +CancelMessages don't send out +CancelMessages don't send this email +CancelMessages cancel email +CancelMessages don't read the message +CancelMessages neither of them +CancelMessages don't send it +CancelMessages cancel message +CancelMessages stop message +CancelMessages don't email +CancelMessages exit +CancelMessages i want you to cancel the email +CancelMessages never mind, forget about the mail +CancelMessages cancel this sending process +CancelMessages don't send +CancelMessages no, i don't want to send this message +CancelMessages no just cancel the email +CancelMessages don ' t read it +CancelMessages nevermind cancel +CancelMessages cancel email to natalie +CancelMessages cancel the message +CancelMessages don't read the email +CancelMessages no, no, cancel the reading +CancelMessages cancel searching the messages +CancelMessages abort deletion +CancelMessages cancel the email to my sister +CancelMessages cancel this message +CancelMessages don 't send the email +CancelMessages cancel the email sent to alex +CancelMessages cancel the email +CancelMessages no don't send +CancelMessages no don't send it +CancelMessages no cancel it, i don't want to send the mail +CheckMessages do i have new message +CheckMessages does anyone send message to me just then +CheckMessages do i receive new message +CheckMessages does anyone send email to me just then +CheckMessages do i have any new mail +CheckMessages does my outlook have new email +CheckMessages could you please check my emails +CheckMessages please check my emails +CheckMessages is there new email +CheckMessages any new email now +CheckMessages whether i get new message +CheckMessages check outlook please +CheckMessages whether i have new email +CheckMessages do i receive new mail in outlook +CheckMessages show the important emails in my inbox +CheckMessages any new message now +CheckMessages check my inbox +CheckMessages show my unread mails +CheckMessages i'd like to check my inbox +CheckMessages do i receive new email +CheckMessages check email +CheckMessages whether i have new message +CheckMessages do i get new email +CheckMessages check email please +CheckMessages i want to check my inbox +CheckMessages please check my inbox +CheckMessages could you please check my inbox +CheckMessages show latest emails +CheckMessages check up messages +CheckMessages check up email +CheckMessages whether i receive new email +CheckMessages show my emails +CheckMessages please check my outlook +CheckMessages check my message +CheckMessages i want to check my emails +CheckMessages check my gmail +CheckMessages check my email please +CheckMessages any new email +CheckMessages do i have new email now +CheckMessages any new email available +CheckMessages could you please check my messages +CheckMessages do i have new email +CheckMessages check my emails +CheckMessages whether i get new email +CheckMessages check my mail box +ConfirmMessages no problem, go ahead send the mail +ConfirmMessages just do it +ConfirmMessages yeah right, send to alex +ConfirmMessages ok, good, just send it +ConfirmMessages of course, just delete the mail +ConfirmMessages "yes, you can" +ConfirmMessages yes, send it +ConfirmMessages i confirm that i want to send this email +ConfirmMessages perfect thank you +ConfirmMessages alright, just send the message +ConfirmMessages okay, send it now +ConfirmMessages "sure, go ahead" +ConfirmMessages ok, good to me, send it please +ConfirmMessages correct, please send it. +ConfirmMessages yes it's right +ConfirmMessages right, send it please +ConfirmMessages ok send the mail to may +ConfirmMessages okay send it +ConfirmMessages "okay, send it" +ConfirmMessages yes that's right +ConfirmMessages okay +Delete can you help me delete it +Delete delete the previous 4 emails +Delete empty the email inbox +Delete put it in the recycle bin +Delete delete the email from my hotmail account +Delete delete all emails from tom +Delete remove the emails received yesterday +Delete delete this message permanently +Delete put the emails from this file folder to trash bin +Delete remove emails that are duplicate +Delete remove it from my inbox +Delete delete this email +Delete remove the email from mary +Delete put the email to trash bin +Delete remove emails with red flags +Delete delete all emails received tonight +Delete delete the email sent from mary jane +Delete clear my inbox +Delete delete the unread emails +Delete delete the first email for me +Delete delete what i just wrote +Delete delete the red ones +Delete put the email in the recycle bin +Delete delete the second one +Delete delete the last one +Delete delete the second mail +Forward forward message to girlfriend +Forward forward email to girlfriend +Forward could you please forward this email to my sister +Forward forward to alan tonight +Forward forward this email to patricia +Forward forward the last email to susan +Forward please forward this message +Forward forward the email from john smith to michelle by saying fyi +Forward forward to thomas please +Forward forward to partoneparttwo@gmail.com next monday +Forward forward to wife by saying i love you +Forward forward to deborah with a message saying that i don't want that +Forward forward to dorothy by typing i agree with it +Forward forward the email to dad +Forward forward to my boss and attach the schedule file +Forward forward emails to gabriel +Forward forward this email to gary brown please +Forward forward by saying if you interest to rebecca +Forward forward this email +Forward forward the email from melissa to peter +Forward forward to brian potter tonight +Forward forward this email to joseph +Forward forward email +Forward please forward this email to partoneparttwo@163.com +Forward could you forward this message to ronald and roy +Forward please forward this email to albert by typing everything goes fine +Forward please forward to benjamin +Forward forward all files from sally to austin +Forward forward this email to partone dot parttwo at gmail dot com +Forward forward to mom +Forward forward this email to eugene by typing what do you think +Forward please forward this email to partoneparttwo@outlook.com +None 2 +None the first one +None the second one +None the third one +None 1 +None 3 +QueryLastText please tell me who emailed me last +QueryLastText open the last email +QueryLastText come to the last +QueryLastText what was the last email +QueryLastText what is the lastest email i received from dad +QueryLastText the last email +QueryLastText whose email just then +QueryLastText show me the lastest email +QueryLastText who recently emailed me +QueryLastText who emailed me last +QueryLastText who sent me the email lastly yesterday +QueryLastText what was the last email i got from steve edwards +QueryLastText who email me just now +QueryLastText show the last email +QueryLastText who emailed me just now +QueryLastText open the lastest email i got +QueryLastText what harry last email said +QueryLastText i want to see the last email +QueryLastText what did mom just say +QueryLastText show me the newest email +QueryLastText what was the last email i got from dad +QueryLastText what henry just said +QueryLastText last email +QueryLastText who texted me +QueryLastText can you tell me the last email i received +QueryLastText who sent me the mail just now +QueryLastText whose email now +QueryLastText go to the last one +QueryLastText what is the last email i received today +QueryLastText who emailed me +QueryLastText what eric watson just said +QueryLastText who texted me just now +ReadAloud read me the email on apple +ReadAloud read email +ReadAloud read my most recent email +ReadAloud read my last email +ReadAloud read aloud my new email +ReadAloud read me my latest emails +ReadAloud read me the newest email +ReadAloud read my last email out to me +ReadAloud read out the email from liu about transfer +ReadAloud read unread message +ReadAloud read it +ReadAloud read emails +ReadAloud read last incoming emails +ReadAloud read my recent email to me +ReadAloud read my emails from patty +ReadAloud read latest email +ReadAloud read the last email +ReadAloud read the latest email from mom +ReadAloud read email to me +ReadAloud read my email messages +ReadAloud read me the last emails of the five minutes +ReadAloud read first email in the linked inbox +ReadAloud read my second email +ReadAloud read last email received +ReadAloud read emails from clay +ReadAloud read new email from david ma +ReadAloud read my email from tyler swift +ReadAloud read me the email titled happy new year +ReadAloud read my email please +ReadAloud read todays mail +ReadAloud read my email to me +ReadAloud read my email from baby +ReadAloud read most recent email +ReadAloud read first email in link box +ReadAloud read google mail +ReadAloud read email from dawn +ReadAloud read darren's mail on the movie +ReadAloud read me the email sent on thanksgiving day +ReadAloud read recent email +ReadAloud read my notification +ReadAloud read my inbox +ReadAloud read new message +ReadAloud read mary grace white email +ReadAloud please read my last email +ReadAloud read my recent email +ReadAloud read the first email in hotmail +ReadAloud read me the email +ReadAloud read me the emails from agatha +ReadAloud read my emails +ReadAloud read last mail +ReadAloud read the first email +ReadAloud read the last email message +ReadAloud read out darren's mail +ReadAloud read email from kat +ReadAloud read new email +ReadAloud read my email from hubby +ReadAloud read my new email +ReadAloud read me the last email claude sent +ReadAloud read the latest email from steve lip +ReadAloud read my recent email message please +ReadAloud read me the recent email titled abcd from jessica +ReadAloud read unread email +ReadAloud read the email +ReadAloud read the email on auto repair +ReadAloud read my outlook email +ReadAloud read today's mail +ReadAloud read me dylan's email sent on yesterday +ReadAloud read my new emails +ReadAloud read aloud the christmas party email +ReadAloud read please +ReadAloud read email from mum +ReadAloud could you read out the email on how to use the new tool +ReadAloud read my recent email messages +ReadAloud read me jessica's email on dress code for the party +ReadAloud read me the email on thanksgiving day +ReadAloud read me my last hotmail email +ReadAloud can you read my emails +ReadAloud read out xu's email about apple's news +ReadAloud read the latest email i sent +ReadAloud can you read my last email +Reply reply by saying i love you +Reply reply yee ha +Reply reply with hello +Reply email back +Reply reply to the first one +Reply email back i will call you back +Reply send email back +Reply reply by saying yes +Reply respond to lore hound +Reply create a response to the email by saying pls send me the picture again +Reply respond to the email by saying i am busy today +Reply reply yee hello +Reply reply by email thank you very much best regards jun +Reply reply required to an email +Reply respond i ' m sick i can ' t do it +Reply reply to the email +Reply send the response with i've already know +Reply reply that i am busy +Reply reply to edward +Reply reply to email i am busy now +Reply reply we'll see you later +Reply reply to susan +Reply make a response with thank you very much +Reply respond to nathan +Reply how to reply to an email +Reply reply to my last email +Reply return siberian huskies mobile +Reply return barbara on mobile +Reply reply by typing hello +Reply reply +Reply reply yes boss. +SearchMessages show me emails from clara chan +SearchMessages email sent from lisa +SearchMessages search keywordsone keywordstwo from inbox +SearchMessages did i get any email from tom +SearchMessages find mails titled recommended courses +SearchMessages detect the email containing keyword beauty +SearchMessages find emails from mom +SearchMessages find an email from abc123@outlook.com +SearchMessages search keywords keywordone keywordtwo in my emails +SearchMessages search text with words lunch together +SearchMessages show me the email about spring festival +SearchMessages search emails contain work items +SearchMessages show me the email sent from mom +SearchMessages show me the email from tom and filtering with word lunch +SearchMessages show me emails from girlfriend +SearchMessages search an email with subject background screening +SearchMessages find email titled new design +SearchMessages tell me the email from lily wong +SearchMessages list the emails contain funny picture +SearchMessages emails contains bank +SearchMessages find emails with resume +SearchMessages find an email from angela +SearchMessages query emails with bill +SearchMessages can you search my emails +SearchMessages find emails that contain malta +SearchMessages detect the email from lisa +SearchMessages find email with title production tools +SearchMessages search the emails contains money +SearchMessages search emails from mike +SearchMessages find an email on the dinner reservation +SearchMessages did i get emails from tom +SearchMessages search my emails +SearchMessages detect emails from betty +SearchMessages find an email about new year's planning +SearchMessages search emails contains coupons +SearchMessages search email with key words lunch +SearchMessages tell me the email with subject weekly report +SearchMessages show emails with "credit card" +SearchMessages search bla bla in my emails +SearchMessages show emails contain words "future plan" +SearchMessages search the email with keywords hello +SearchMessages search emails about boating +SearchMessages find an email from jay that contains halloween +SearchMessages looking for an email with hello +SearchMessages search the emails contains microsoft +SearchMessages search jensen's emails +SearchMessages search email contain outlook +SearchMessages enumerate the emails with algroithm +SearchMessages did i get the email containing keyword lunch +SendEmail email my presentation +SendEmail send and email about swim team practice +SendEmail email to cynthia and mike, that dinner last week was splendid. +SendEmail send an urgent email from my work account to christian +SendEmail send an email to jacqueline and tianyu about the test result +SendEmail send an email to larry , joseph and billy larkson +SendEmail set an email today +SendEmail send this document to an email +SendEmail send email to kai xu, mingming and my mother +SendEmail send an email about swim team practice +SendEmail start new email to friends about the club +SendEmail new email about really good talk to michelle +SendEmail send important email to evelyn and gary +SendEmail send an email to partone@gmail.com +SendEmail send the email now +SendEmail write an urgent email to bobby +SendEmail send an urgent email +SendEmail start up a new email to michelle about watching baseball +SendEmail send a new email about the problem solving to andrea, angela, and ron +SendEmail send an email to mom +SendEmail send email to partone.parttwo@outlook.com +SendEmail send an email marked with a bang to amy +SendEmail the new email is high priority that is being sent to jacob +SendEmail send an email to lily roth and abc123@microsoft.com +SendEmail send an email +SendEmail send lori a new flagged email +SendEmail send an email for me +SendEmail send the email +SendEmail send an email to jimmy klein saying this is the message about weekend plans +SendEmail send a new email about the hockey tournament to marie jane, joseph , and john +SendEmail send an email to my brother +SendEmail send thomas an email +SendEmail send an email to lu , yue and qiong about funding +SendEmail send angela an email marked as high priority +SendEmail send an important email to olivia +SendEmail new email to kimberly about wingman +SendEmail send mail to dorothy +SendEmail send an email to harry potter +SendEmail send an email to christopher carpenter about the hiking trip +SendEmail send email marked priority to yun-sim and yi +SendEmail email my brother +SendEmail send my housekeeping doc to jeffrey +SendEmail send a new high importance email to jordan +SendEmail write an email about the fundraiser +SendEmail send email to jiayi today +SendEmail email to tom white about that flower saying beautiful +SendEmail send an email to partone_parttwo@microsoft.com +SendEmail send an email about test status to mark +SendEmail send jacqueline an email with low priority +SendEmail email her the message "fine, ok" +SendEmail start a new email saying lets go to the park +SendEmail start new email about taco blog to nicole and emily +SendEmail write email +SendEmail send a mail to daniel +SendEmail email to lawrence about opening issue +SendEmail send a email to leehom wong about the piano concert saying it's wonderful +SendEmail make a new email about weather forecast +SendEmail send an email about the window that is broken +SendEmail send an email to sean about weekend plans +SendEmail send email to hannah saying test +SendEmail write email to mom subject is babysit +SendEmail send an email to a.j.ron marked as important +SendEmail send an urgent email from my work email to jack +SendEmail email the file to henry mathew +SendEmail send a new email to larry with a file attached +SendEmail send new email to christian and mark it high importance +SendEmail send an email marked for follow up to christian +SendEmail compose new email about spanish homework +SendEmail start a new email from tracy saying here is my resume +SendEmail send email to a and tian +SendEmail send a read receipt email to samuel +SendEmail write an email which title is hello and context is let's have meeting together +SendEmail send alexander a red bang email +SendEmail send an email to zachary about we can plan things let's go hiking +SendEmail i need to send an email about the words to a song +SendEmail send an email today +SendEmail send my payment visio diagram to ronald +SendEmail send email about homework plan to raymond and philip +SendEmail email to amy cooper about haha saying hello +SendEmail email to mike waters : mike, that dinner last week was splendid. +SendEmail send email to louis and mark it important +SendEmail start a new email to aaron about sleeping over tonight +SendEmail send billy an email with a red bang +SendEmail create new mail titled urgent meeting information to jonathan +SendEmail mark email for follow up and send to arthur +SendEmail start a new email about marriage counselor appointments +SendEmail send a new email to partonepartwopartthree@yahoo.com +SendEmail new email about writing documents +SendEmail send email to heather about car +SendEmail email to partoneparttwo@gmail.com +SendEmail send an email marked follow up to jerry +SendEmail will you send a marked non urgent email to james +SendEmail send an email with read receipt to peter +SendEmail send large files through email +SendEmail email to harry potter and hermione granger +SendEmail send an email to nathan with a red bang +SendEmail send a new email to nicholas and jesse about coupons +SendEmail start an email to jason about speaking up +SendEmail send a new email about facebook +SendEmail send an email to harold and bob kappus about team lunch saying same team lunch this tuesday +ShowNext move on to next mails +ShowNext move on next mail by jason +ShowNext show the next emails by wong +ShowNext show me next from mary +ShowNext next email +ShowNext go on, show me more mails +ShowNext the next important message +ShowNext move forward +ShowNext show next unread +ShowNext go to next mail +ShowNext show next email +ShowNext go forward to next mails +ShowNext next unread one +ShowNext go to the next page +ShowNext show the next messages +ShowNext show the next email from my boss +ShowNext next unread email +ShowNext show me the next five mails +ShowNext are there any unread messages? show next +ShowNext the next email +ShowNext show me the next +ShowPrevious show me the last three mails +ShowPrevious show me the previous email +ShowPrevious show previous one in inbox +ShowPrevious go to previous mails +ShowPrevious show me previous email from jack +ShowPrevious show previous in red category +ShowPrevious show me the one before +ShowPrevious previous email +ShowPrevious show the previous email from my mentor +ShowPrevious move back to last mails +ShowPrevious the previous email +ShowPrevious bring the previous one, i want to read it again +ShowPrevious back to the last one from apple +ShowPrevious previous one please +ShowPrevious show the previous one +ShowPrevious what is the previous email diff --git a/packages/dispatcher/resources/data/Columnar/EmailTest.txt b/packages/dispatcher/resources/data/Columnar/EmailTest.txt new file mode 100644 index 000000000..619f24d56 --- /dev/null +++ b/packages/dispatcher/resources/data/Columnar/EmailTest.txt @@ -0,0 +1,75 @@ +AddFlag add flag to the email mary just sent to me +AddFlag make the email flagged +AddFlag i want to add a flag to this email +AddFlag attach a flag please +AddFlag flag this email as important +AddMore i need to add something more to my email to cheryl +AddMore add more and revise the message +AddMore i need to write more to the email +AddMore add: call me tonight after dinner +AddMore write thanks +CancelMessages cancel my draft to jane +CancelMessages don't show me the email +CancelMessages don't send the last email +CancelMessages can you cancel it, please +CancelMessages don't send to her +CheckMessages do i have new messages? +CheckMessages does anyone send message to me now +CheckMessages do i receive new message from work +CheckMessages does anyone send email to me just now +CheckMessages do i have any new mails? +ConfirmMessages no problem, go ahead and send the mail +ConfirmMessages do it +ConfirmMessages yes, send to alex +ConfirmMessages ok, good, just go ahead and send it +ConfirmMessages sure, just delete the mail +Delete please delete it +Delete delete the last 4 emails +Delete empty the email inbox and move them to recycle bin +Delete put the email in the recycle bin +Delete delete the email from my personal account +Forward forward message to mary +Forward forward email to mary +Forward could you please forward this email to my brother +Forward forward to allen tonight +Forward forward this email to patricia now +None etherb wger3b +None the first one wd +None the second one, nothing new +None the third one, nonsense +None wdfwdf wefqwefw edfwf +QueryLastText please tell me who sent me the last email +QueryLastText open the lastest email +QueryLastText move to the last +QueryLastText what was in the lastest email +QueryLastText what is the lastest email i received from mom +ReadAloud read me the email on iphone +ReadAloud read email out loud +ReadAloud read my most recent email to me +ReadAloud read my lastest email +ReadAloud read aloud my newest email +Reply reply and say i love you +Reply reply yeah +Reply reply with hi +Reply email back now +Reply reply to the first email +SearchMessages show me emails from clara chen +SearchMessages email sent from beth +SearchMessages search keyword no. one keyword no. two from inbox +SearchMessages did i get any email from chris +SearchMessages find mails with the title recommended courses +SendEmail email my presentation to manager +SendEmail send and email about swim team practice this week +SendEmail email to cynthia and mike, that the dinner last week was great. +SendEmail send an urgent email from my work account to christian now +SendEmail send an email to jacqueline about the great test result +ShowNext move on to the next mail +ShowNext move to next email by jason +ShowNext show the next email by wong +ShowNext show me next from lily +ShowNext next email, please +ShowPrevious show me the last four mails +ShowPrevious show me the previous email in inbox +ShowPrevious show the previous one in inbox +ShowPrevious go to the previous mail +ShowPrevious show me the previous email from jack diff --git a/packages/dispatcher/src/data/AppColumnarData.ts b/packages/dispatcher/src/data/AppColumnarData.ts index fad2da4ce..5bd688852 100644 --- a/packages/dispatcher/src/data/AppColumnarData.ts +++ b/packages/dispatcher/src/data/AppColumnarData.ts @@ -57,6 +57,14 @@ export function exampleFunctionData(): ColumnarData { required: false, }, ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); parser.addArgument( ["-ls", "--linesToSkip"], { @@ -83,6 +91,7 @@ export function exampleFunctionData(): ColumnarData { // ---- NOTE-TODO-PLACEHOLDER ---- } const labelColumnIndex: number = +args.labelColumnIndex; const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; const linesToSkip: number = +args.linesToSkip; Utility.debuggingLog( `filename=${filename}`); @@ -94,6 +103,7 @@ export function exampleFunctionData(): ColumnarData { new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); // ---- NOTE-TODO-PLACEHOLDER ---- columnarData.dumpLuLuisJsonStructureInLuFormat( diff --git a/packages/dispatcher/src/data/ColumnarData.ts b/packages/dispatcher/src/data/ColumnarData.ts index 69dc56390..e70c58138 100644 --- a/packages/dispatcher/src/data/ColumnarData.ts +++ b/packages/dispatcher/src/data/ColumnarData.ts @@ -15,6 +15,7 @@ export class ColumnarData extends Data { existingColumnarData: ColumnarData, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, samplingIndexArray: number[], toResetFeaturizerLabelFeatureMaps: boolean): ColumnarData { @@ -25,6 +26,7 @@ export class ColumnarData extends Data { existingColumnarData.getFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, toResetFeaturizerLabelFeatureMaps); // ------------------------------------------------------------------- @@ -40,7 +42,8 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }> = columnarData.luUtterances; + "text": string, + "weight": number }> = columnarData.luUtterances; const lengthUtterancesArray: number = luUtterances.length; columnarData.luUtterances = []; @@ -55,7 +58,7 @@ export class ColumnarData extends Data { columnarData.collectIntents(columnarData.luUtterances); columnarData.entityTypeInstanceIndexMapArray = columnarData.collectEntityTypes(columnarData.luUtterances); - columnarData.intentsUtterances.intents = columnarData.luUtterances.map( + columnarData.intentsUtterancesWeights.intents = columnarData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -68,8 +71,9 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.intent as string); - columnarData.intentsUtterances.utterances = columnarData.luUtterances.map( + "text": string, + "weight": number }) => entry.intent as string); + columnarData.intentsUtterancesWeights.utterances = columnarData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -82,7 +86,23 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.text as string); + "text": string, + "weight": number }) => entry.text as string); + columnarData.intentsUtterancesWeights.weights = columnarData.luUtterances.map( + (entry: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { columnarData.resetFeaturizerLabelFeatureMaps(); @@ -97,6 +117,7 @@ export class ColumnarData extends Data { existingColumnarData: ColumnarData, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, filteringIndexSet: Set, toResetFeaturizerLabelFeatureMaps: boolean): ColumnarData { @@ -107,6 +128,7 @@ export class ColumnarData extends Data { existingColumnarData.getFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, toResetFeaturizerLabelFeatureMaps); // ------------------------------------------------------------------- @@ -122,7 +144,8 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }> = + "text": string, + "weight": number }> = columnarData.luUtterances; columnarData.luUtterances = luUtterances.filter( (value: { @@ -137,7 +160,8 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }, + "text": string, + "weight": number }, index: number, array: Array<{ "entities": Array<{ @@ -151,7 +175,8 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }>) => { + "text": string, + "weight": number }>) => { return (filteringIndexSet.has(index)); }); // ------------------------------------------------------------------- @@ -159,7 +184,7 @@ export class ColumnarData extends Data { columnarData.collectIntents(columnarData.luUtterances); columnarData.entityTypeInstanceIndexMapArray = columnarData.collectEntityTypes(columnarData.luUtterances); - columnarData.intentsUtterances.intents = columnarData.luUtterances.map( + columnarData.intentsUtterancesWeights.intents = columnarData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -172,8 +197,9 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.intent as string); - columnarData.intentsUtterances.utterances = columnarData.luUtterances.map( + "text": string, + "weight": number }) => entry.intent as string); + columnarData.intentsUtterancesWeights.utterances = columnarData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -186,7 +212,23 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.text as string); + "text": string, + "weight": number }) => entry.text as string); + columnarData.intentsUtterancesWeights.weights = columnarData.luUtterances.map( + (entry: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { columnarData.resetFeaturizerLabelFeatureMaps(); @@ -202,6 +244,7 @@ export class ColumnarData extends Data { featurizer: NgramSubwordFeaturizer, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, toResetFeaturizerLabelFeatureMaps: boolean): ColumnarData { // ------------------------------------------------------------------- @@ -210,6 +253,7 @@ export class ColumnarData extends Data { featurizer, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip); columnarData.content = content; @@ -221,7 +265,22 @@ export class ColumnarData extends Data { columnarData.collectIntents(columnarData.luUtterances); columnarData.entityTypeInstanceIndexMapArray = columnarData.collectEntityTypes(columnarData.luUtterances); - columnarData.intentsUtterances.intents = columnarData.luUtterances.map( + columnarData.intentsUtterancesWeights.intents = columnarData.luUtterances.map( + (entry: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }) => entry.intent as string); + columnarData.intentsUtterancesWeights.utterances = columnarData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -234,8 +293,9 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.intent as string); - columnarData.intentsUtterances.utterances = columnarData.luUtterances.map( + "text": string, + "weight": number }) => entry.text as string); + columnarData.intentsUtterancesWeights.weights = columnarData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -248,7 +308,8 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.text as string); + "text": string, + "weight": number }) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { columnarData.resetFeaturizerLabelFeatureMaps(); @@ -261,16 +322,19 @@ export class ColumnarData extends Data { protected labelColumnIndex: number = 0; protected textColumnIndex: number = 1; + protected weightColumnIndex: number = -1; protected linesToSkip: number = 0; protected constructor( featurizer: NgramSubwordFeaturizer, labelColumnIndex: number = 0, textColumnIndex: number = 1, + weightColumnIndex: number = -1, linesToSkip: number = 0) { super(featurizer); this.labelColumnIndex = labelColumnIndex; this.textColumnIndex = textColumnIndex; + this.weightColumnIndex = weightColumnIndex; this.linesToSkip = linesToSkip; } @@ -278,6 +342,7 @@ export class ColumnarData extends Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, samplingIndexArray: number[], toResetFeaturizerLabelFeatureMaps: boolean): Promise { @@ -285,6 +350,7 @@ export class ColumnarData extends Data { existingData as ColumnarData, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, samplingIndexArray, toResetFeaturizerLabelFeatureMaps); @@ -294,6 +360,7 @@ export class ColumnarData extends Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, filteringIndexSet: Set, toResetFeaturizerLabelFeatureMaps: boolean): Promise { @@ -301,6 +368,7 @@ export class ColumnarData extends Data { existingData as ColumnarData, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, filteringIndexSet, toResetFeaturizerLabelFeatureMaps); @@ -318,17 +386,20 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }> { - const intentsUtterances: { "intents": string[], "utterances": string[] } = - Utility.loadLabelTextColumnarContent( + "text": string, + "weight": number }> { + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] } = + Utility.loadLabelUtteranceColumnarContent( content, // ---- filename: string, - this.getLabelColumnIndex(), // ---- labelColumnIndex: number = 0, - this.getTextColumnIndex(), // ---- textColumnIndex: number = 1, + this.getLabelColumnIndex(), // ---- labelColumnIndex: number = 0, + this.getTextColumnIndex(), // ---- textColumnIndex: number = 1, + this.getWeightColumnIndex(), // ---- weightColumnIndex: number = -1, this.getLinesToSkip(), // ---- lineIndexToStart: number = 0, "\t", // ---- columnDelimiter: string = "\t", "\n", // ---- rowDelimiter: string = "\n", + "utf8", // ---- encoding: string = "utf8", -1, // ---- lineIndexToEnd: number = -1 - ); + ); const luUtterances: Array<{ "entities": Array<{ "entity": string, @@ -341,12 +412,15 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string }> = []; - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + "text": string, + "weight": number }> = []; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; for (let i = 0; i < intents.length; i++) { const intent: string = intents[i]; const text: string = utterances[i]; + const weight: number = weights[i]; const luUtterance: { "entities": Array<{ "entity": string, @@ -359,11 +433,13 @@ export class ColumnarData extends Data { "endPos": number, }>, "intent": string, - "text": string } = { + "text": string, + "weight": number } = { entities: [], partOfSpeechTags: [], intent, text, + weight, }; luUtterances.push(luUtterance); } @@ -389,6 +465,9 @@ export class ColumnarData extends Data { public getTextColumnIndex(): number { return this.textColumnIndex; } + public getWeightColumnIndex(): number { + return this.weightColumnIndex; + } public getLinesToSkip(): number { return this.linesToSkip; } diff --git a/packages/dispatcher/src/data/DataUtility.ts b/packages/dispatcher/src/data/DataUtility.ts index e1e2a868b..2bcbeb540 100644 --- a/packages/dispatcher/src/data/DataUtility.ts +++ b/packages/dispatcher/src/data/DataUtility.ts @@ -51,6 +51,7 @@ export class DataUtility { filetype: string = "", labelColumnIndex: number = 0, textColumnIndex: number = 1, + weightColumnIndex: number = -1, linesToSkip: number = 0): Promise { const content: string = Utility.loadFile(filename); @@ -93,6 +94,7 @@ export class DataUtility { featurizerNullable, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, toResetFeaturizerLabelFeatureMaps); return columnarData; diff --git a/packages/dispatcher/src/data/EntityAnnotatedCorpusData.ts b/packages/dispatcher/src/data/EntityAnnotatedCorpusData.ts index f06a55056..867617e5b 100644 --- a/packages/dispatcher/src/data/EntityAnnotatedCorpusData.ts +++ b/packages/dispatcher/src/data/EntityAnnotatedCorpusData.ts @@ -36,7 +36,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }> = entityAnnotatedCorpusData.luUtterances; + "text": string, + "weight": number }> = entityAnnotatedCorpusData.luUtterances; const lengthUtterancesArray: number = luUtterances.length; entityAnnotatedCorpusData.luUtterances = []; @@ -51,7 +52,7 @@ export class EntityAnnotatedCorpusData extends Data { entityAnnotatedCorpusData.collectIntents(entityAnnotatedCorpusData.luUtterances); entityAnnotatedCorpusData.entityTypeInstanceIndexMapArray = entityAnnotatedCorpusData.collectEntityTypes(entityAnnotatedCorpusData.luUtterances); - entityAnnotatedCorpusData.intentsUtterances.intents = entityAnnotatedCorpusData.luUtterances.map( + entityAnnotatedCorpusData.intentsUtterancesWeights.intents = entityAnnotatedCorpusData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -64,8 +65,9 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.intent as string); - entityAnnotatedCorpusData.intentsUtterances.utterances = entityAnnotatedCorpusData.luUtterances.map( + "text": string, + "weight": number }) => entry.intent as string); + entityAnnotatedCorpusData.intentsUtterancesWeights.utterances = entityAnnotatedCorpusData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -78,7 +80,23 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.text as string); + "text": string, + "weight": number }) => entry.text as string); + entityAnnotatedCorpusData.intentsUtterancesWeights.weights = entityAnnotatedCorpusData.luUtterances.map( + (entry: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { entityAnnotatedCorpusData.resetFeaturizerLabelFeatureMaps(); @@ -114,7 +132,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }> = entityAnnotatedCorpusData.luUtterances; + "text": string, + "weight": number }> = entityAnnotatedCorpusData.luUtterances; entityAnnotatedCorpusData.luUtterances = luUtterances.filter( (value: { "entities": Array<{ @@ -128,7 +147,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }, + "text": string, + "weight": number }, index: number, array: Array<{ "entities": Array<{ @@ -142,7 +162,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }>) => { + "text": string, + "weight": number }>) => { return (filteringIndexSet.has(index)); }); // ------------------------------------------------------------------- @@ -150,7 +171,22 @@ export class EntityAnnotatedCorpusData extends Data { entityAnnotatedCorpusData.collectIntents(entityAnnotatedCorpusData.luUtterances); entityAnnotatedCorpusData.entityTypeInstanceIndexMapArray = entityAnnotatedCorpusData.collectEntityTypes(entityAnnotatedCorpusData.luUtterances); - entityAnnotatedCorpusData.intentsUtterances.intents = entityAnnotatedCorpusData.luUtterances.map( + entityAnnotatedCorpusData.intentsUtterancesWeights.intents = entityAnnotatedCorpusData.luUtterances.map( + (entry: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }) => entry.intent as string); + entityAnnotatedCorpusData.intentsUtterancesWeights.utterances = entityAnnotatedCorpusData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -163,8 +199,9 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.intent as string); - entityAnnotatedCorpusData.intentsUtterances.utterances = entityAnnotatedCorpusData.luUtterances.map( + "text": string, + "weight": number }) => entry.text as string); + entityAnnotatedCorpusData.intentsUtterancesWeights.weights = entityAnnotatedCorpusData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -177,7 +214,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.text as string); + "text": string, + "weight": number }) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { entityAnnotatedCorpusData.resetFeaturizerLabelFeatureMaps(); @@ -208,7 +246,22 @@ export class EntityAnnotatedCorpusData extends Data { entityAnnotatedCorpusData.collectIntents(entityAnnotatedCorpusData.luUtterances); entityAnnotatedCorpusData.entityTypeInstanceIndexMapArray = entityAnnotatedCorpusData.collectEntityTypes(entityAnnotatedCorpusData.luUtterances); - entityAnnotatedCorpusData.intentsUtterances.intents = entityAnnotatedCorpusData.luUtterances.map( + entityAnnotatedCorpusData.intentsUtterancesWeights.intents = entityAnnotatedCorpusData.luUtterances.map( + (entry: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }) => entry.intent as string); + entityAnnotatedCorpusData.intentsUtterancesWeights.utterances = entityAnnotatedCorpusData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -221,8 +274,9 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.intent as string); - entityAnnotatedCorpusData.intentsUtterances.utterances = entityAnnotatedCorpusData.luUtterances.map( + "text": string, + "weight": number }) => entry.text as string); + entityAnnotatedCorpusData.intentsUtterancesWeights.weights = entityAnnotatedCorpusData.luUtterances.map( (entry: { "entities": Array<{ "entity": string, @@ -235,7 +289,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }) => entry.text as string); + "text": string, + "weight": number }) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { entityAnnotatedCorpusData.resetFeaturizerLabelFeatureMaps(); @@ -259,6 +314,7 @@ export class EntityAnnotatedCorpusData extends Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, samplingIndexArray: number[], toResetFeaturizerLabelFeatureMaps: boolean): Promise { @@ -267,6 +323,7 @@ export class EntityAnnotatedCorpusData extends Data { existingData as EntityAnnotatedCorpusData, // ---- NOTE-NO-NEED-FOR-EntityAnnotatedCorpusData ---- labelColumnIndex, // ---- NOTE-NO-NEED-FOR-EntityAnnotatedCorpusData ---- textColumnIndex, + // ---- NOTE-NO-NEED-FOR-EntityAnnotatedCorpusData ---- weightColumnIndex, linesToSkip, samplingIndexArray, toResetFeaturizerLabelFeatureMaps); @@ -276,6 +333,7 @@ export class EntityAnnotatedCorpusData extends Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, filteringIndexSet: Set, toResetFeaturizerLabelFeatureMaps: boolean): Promise { @@ -284,6 +342,7 @@ export class EntityAnnotatedCorpusData extends Data { existingData as EntityAnnotatedCorpusData, // ---- NOTE-NO-NEED-FOR-EntityAnnotatedCorpusData ---- labelColumnIndex, // ---- NOTE-NO-NEED-FOR-EntityAnnotatedCorpusData ---- textColumnIndex, + // ---- NOTE-NO-NEED-FOR-EntityAnnotatedCorpusData ---- weightColumnIndex, linesToSkip, filteringIndexSet, toResetFeaturizerLabelFeatureMaps); @@ -306,7 +365,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }> { + "text": string, + "weight": number }> { const entityAnnotatedCorpusTypes: { "ids": string[], "wordArrays": string[][], @@ -331,7 +391,8 @@ export class EntityAnnotatedCorpusData extends Data { "endPos": number, }>, "intent": string, - "text": string }> = + "text": string, + "weight": number }> = Utility.entityAnnotatedCorpusTypesToEntityAnnotatedCorpusUtterances( entityAnnotatedCorpusTypes, includePartOfSpeechTagTagAsEntities, diff --git a/packages/dispatcher/src/data/LuData.ts b/packages/dispatcher/src/data/LuData.ts index f74d3b12f..cfd29d58f 100644 --- a/packages/dispatcher/src/data/LuData.ts +++ b/packages/dispatcher/src/data/LuData.ts @@ -30,7 +30,7 @@ export class LuData extends Data { const luLuisJsonStructure: any = luData.getLuLuisJsonStructure(); const utterancesArray: any[] = - luData.retrieveLuUtterances(luLuisJsonStructure); + luData.retrieveLuisLuUtterances(luLuisJsonStructure); const lengthUtterancesArray: number = utterancesArray.length; luLuisJsonStructure.utterances = []; @@ -40,17 +40,24 @@ export class LuData extends Data { } luLuisJsonStructure.utterances.push(utterancesArray[index]); } + // ---- NOTE-FOR-REFERENCE ---- luLuisJsonStructure.utterances = utterancesArray.filter( + // ---- NOTE-FOR-REFERENCE ---- (value: any, index: number, array: any[]) => { + // ---- NOTE-FOR-REFERENCE ---- return (samplingIndexArray.has(index)); + // ---- NOTE-FOR-REFERENCE ---- }); // ------------------------------------------------------------------- - luData.luUtterances = luData.retrieveLuUtterances(luLuisJsonStructure); + luData.luUtterances = + luData.retrieveLuUtterances(luLuisJsonStructure); // ------------------------------------------------------------------- luData.intentInstanceIndexMapArray = luData.collectIntents(luData.luUtterances); luData.entityTypeInstanceIndexMapArray = luData.collectEntityTypes(luData.luUtterances); - luData.intentsUtterances.intents = luData.luUtterances.map( + luData.intentsUtterancesWeights.intents = luData.luUtterances.map( (entry: any) => entry.intent as string); - luData.intentsUtterances.utterances = luData.luUtterances.map( + luData.intentsUtterancesWeights.utterances = luData.luUtterances.map( (entry: any) => entry.text as string); + luData.intentsUtterancesWeights.weights = luData.luUtterances.map( + (entry: any) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { luData.resetFeaturizerLabelFeatureMaps(); @@ -75,22 +82,34 @@ export class LuData extends Data { const luLuisJsonStructure: any = luData.getLuLuisJsonStructure(); const utterancesArray: any[] = - luData.retrieveLuUtterances(luLuisJsonStructure); - luLuisJsonStructure.utterances = utterancesArray.filter( - (value: any, index: number, array: any[]) => { - return (filteringIndexSet.has(index)); - }); + luData.retrieveLuisLuUtterances(luLuisJsonStructure); + const lengthUtterancesArray: number = + utterancesArray.length; + luLuisJsonStructure.utterances = []; + for (const index of filteringIndexSet) { + if ((index < 0) || (index > lengthUtterancesArray)) { + Utility.debuggingThrow(`(index|${index}|<0)||(index|${index}|>lengthUtterancesArray|${lengthUtterancesArray}|)`); + } + luLuisJsonStructure.utterances.push(utterancesArray[index]); + } + // ---- NOTE-FOR-REFERENCE ---- luLuisJsonStructure.utterances = utterancesArray.filter( + // ---- NOTE-FOR-REFERENCE ---- (value: any, index: number, array: any[]) => { + // ---- NOTE-FOR-REFERENCE ---- return (filteringIndexSet.has(index)); + // ---- NOTE-FOR-REFERENCE ---- }); // ------------------------------------------------------------------- - luData.luUtterances = luData.retrieveLuUtterances(luLuisJsonStructure); + luData.luUtterances = + luData.retrieveLuUtterances(luLuisJsonStructure); // ------------------------------------------------------------------- luData.intentInstanceIndexMapArray = luData.collectIntents(luData.luUtterances); luData.entityTypeInstanceIndexMapArray = luData.collectEntityTypes(luData.luUtterances); - luData.intentsUtterances.intents = luData.luUtterances.map( + luData.intentsUtterancesWeights.intents = luData.luUtterances.map( (entry: any) => entry.intent as string); - luData.intentsUtterances.utterances = luData.luUtterances.map( + luData.intentsUtterancesWeights.utterances = luData.luUtterances.map( (entry: any) => entry.text as string); + luData.intentsUtterancesWeights.weights = luData.luUtterances.map( + (entry: any) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { luData.resetFeaturizerLabelFeatureMaps(); @@ -114,16 +133,22 @@ export class LuData extends Data { luData.luObject = await parseFile(content); // ------------------------------------------------------------------- - luData.luUtterances = luData.retrieveLuUtterances(luData.getLuLuisJsonStructure()); + const luLuisJsonStructure: any = + luData.getLuLuisJsonStructure(); + // ------------------------------------------------------------------- + luData.luUtterances = + luData.retrieveLuUtterances(luLuisJsonStructure); // ------------------------------------------------------------------- luData.intentInstanceIndexMapArray = luData.collectIntents(luData.luUtterances); luData.entityTypeInstanceIndexMapArray = luData.collectEntityTypes(luData.luUtterances); - luData.intentsUtterances.intents = luData.luUtterances.map( + luData.intentsUtterancesWeights.intents = luData.luUtterances.map( (entry: any) => entry.intent as string); - luData.intentsUtterances.utterances = luData.luUtterances.map( + luData.intentsUtterancesWeights.utterances = luData.luUtterances.map( (entry: any) => entry.text as string); + luData.intentsUtterancesWeights.weights = luData.luUtterances.map( + (entry: any) => entry.weight as number); // ------------------------------------------------------------------- if (toResetFeaturizerLabelFeatureMaps) { luData.resetFeaturizerLabelFeatureMaps(); @@ -145,6 +170,7 @@ export class LuData extends Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, samplingIndexArray: number[], toResetFeaturizerLabelFeatureMaps: boolean): Promise { @@ -152,6 +178,7 @@ export class LuData extends Data { existingData as LuData, // ---- NOTE-NO-NEED-FOR-LuData ---- labelColumnIndex, // ---- NOTE-NO-NEED-FOR-LuData ---- textColumnIndex, + // ---- NOTE-NO-NEED-FOR-LuData ---- weightColumnIndex, // ---- NOTE-NO-NEED-FOR-LuData ---- linesToSkip, samplingIndexArray, toResetFeaturizerLabelFeatureMaps); @@ -161,6 +188,7 @@ export class LuData extends Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, filteringIndexSet: Set, toResetFeaturizerLabelFeatureMaps: boolean): Promise { @@ -168,14 +196,73 @@ export class LuData extends Data { existingData as LuData, // ---- NOTE-NO-NEED-FOR-LuData ---- labelColumnIndex, // ---- NOTE-NO-NEED-FOR-LuData ---- textColumnIndex, + // ---- NOTE-NO-NEED-FOR-LuData ---- weightColumnIndex, // ---- NOTE-NO-NEED-FOR-LuData ---- linesToSkip, filteringIndexSet, toResetFeaturizerLabelFeatureMaps); } - public retrieveLuUtterances(luLuisJsonStructure: any): any[] { // ---- NOTE: a shallow copy + public retrieveLuisLuUtterances(luLuisJsonStructure: any): any[] { // ---- NOTE: a shallow copy return (luLuisJsonStructure.utterances as any[]); } + public retrieveLuUtterances(luLuisJsonStructure: any): Array<{ + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }> { + const weight: number = 1; + const utterancesArray: any[] = + this.retrieveLuisLuUtterances(luLuisJsonStructure); + const luUtterances: Array<{ + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }> = []; + utterancesArray.forEach( + (entry: any) => { + const entities: Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }> = entry.entities; + const partOfSpeechTags: Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }> = []; + const intent: string = + entry.intent; + const text: string = + entry.text; + luUtterances.push({ + entities, + partOfSpeechTags, + intent, + text, + weight, + }); + }); + return luUtterances; + // ---- NOTE-FOR-REFERENCE ---- return (luLuisJsonStructure.utterances as any[]); + } public getLuObject(): any { return this.luObject; diff --git a/packages/dispatcher/src/data/data.ts b/packages/dispatcher/src/data/data.ts index a34493a05..fde0229e1 100644 --- a/packages/dispatcher/src/data/data.ts +++ b/packages/dispatcher/src/data/data.ts @@ -25,14 +25,16 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string }> = []; + "text": string, + "weight": number }> = []; protected intentInstanceIndexMapArray: Map = new Map(); protected entityTypeInstanceIndexMapArray: Map = new Map(); - protected intentsUtterances: { + protected intentsUtterancesWeights: { "intents": string[], - "utterances": string[] } = - { intents: [], utterances: [] }; + "utterances": string[], + "weights": number[] } = + { intents: [], utterances: [], weights: [] }; protected intentUtteranceSparseIndexArrays: { "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][] } = @@ -52,6 +54,7 @@ export abstract class Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, samplingIndexArray: number[], toResetFeaturizerLabelFeatureMaps: boolean): Promise; @@ -60,6 +63,7 @@ export abstract class Data { existingData: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, filteringIndexSet: Set, toResetFeaturizerLabelFeatureMaps: boolean): Promise; @@ -76,7 +80,8 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string }>): Map { + "text": string, + "weight": number }>): Map { const entityTypeInstanceIndexMapArray: Map = new Map(); luUtterances.forEach( (element: { @@ -91,25 +96,26 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string }, + "text": string, + "weight": number }, index: number) => { - const entities: Array<{ - "entity": string, - "startPos": number, - "endPos": number, - }> = element.entities; - entities.forEach((entityElement: { - "entity": string, - "startPos": number, - "endPos": number, - }) => { - const entityType: string = entityElement.entity as string; - if (entityType) { - Utility.addKeyValueToNumberMapArray( - entityTypeInstanceIndexMapArray, - entityType, - index); - } + const entities: Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }> = element.entities; + entities.forEach((entityElement: { + "entity": string, + "startPos": number, + "endPos": number, + }) => { + const entityType: string = entityElement.entity as string; + if (entityType) { + Utility.addKeyValueToNumberMapArray( + entityTypeInstanceIndexMapArray, + entityType, + index); + } }); }); return entityTypeInstanceIndexMapArray; @@ -126,27 +132,31 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string }>): Map { + "text": string, + "weight": number }>): Map { const intentInstanceIndexMapArray: Map = new Map(); - luUtterances.forEach((element: { - "entities": Array<{ - "entity": string, - "startPos": number, - "endPos": number, - }>, - "partOfSpeechTags": Array<{ - "partOfSpeechTag": string, - "startPos": number, - "endPos": number, - }>, - "intent": string, - "text": string }, index: number) => { - const intent: string = element.intent as string; - if (intent) { - Utility.addKeyValueToNumberMapArray( - intentInstanceIndexMapArray, - intent, - index); + luUtterances.forEach( + (element: { + "entities": Array<{ + "entity": string, + "startPos": number, + "endPos": number, + }>, + "partOfSpeechTags": Array<{ + "partOfSpeechTag": string, + "startPos": number, + "endPos": number, + }>, + "intent": string, + "text": string, + "weight": number }, + index: number) => { + const intent: string = element.intent as string; + if (intent) { + Utility.addKeyValueToNumberMapArray( + intentInstanceIndexMapArray, + intent, + index); } }); return intentInstanceIndexMapArray; @@ -181,7 +191,8 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string }> { + "text": string, + "weight": number }> { return this.luUtterances; } public getIntentInstanceIndexMapArray(): Map { @@ -233,16 +244,20 @@ export abstract class Data { return this.getEntityTypeInstanceIndexMapArray().size; } - public getIntentsUtterances(): { + public getIntentsUtterancesWeights(): { "intents": string[], - "utterances": string[] } { - return this.intentsUtterances; + "utterances": string[], + "weights": number[] } { + return this.intentsUtterancesWeights; } public getIntents(): string[] { - return this.intentsUtterances.intents; + return this.intentsUtterancesWeights.intents; } public getUtterances(): string[] { - return this.intentsUtterances.utterances; + return this.intentsUtterancesWeights.utterances; + } + public getWeights(): number[] { + return this.intentsUtterancesWeights.weights; } public getIntentUtteranceSparseIndexArrays(): { @@ -259,12 +274,12 @@ export abstract class Data { public resetFeaturizerLabelFeatureMaps(): void { this.getFeaturizer().resetLabelFeatureMaps( - this.getIntentsUtterances()); + this.getIntentsUtterancesWeights()); } public featurizeIntentsUtterances(): void { this.intentUtteranceSparseIndexArrays = this.getFeaturizer().createIntentUtteranceSparseIndexArrays( - this.getIntentsUtterances()); + this.getIntentsUtterancesWeights()); } public featurize(inputUtterance: string): string[] { return this.getFeaturizer().featurize(inputUtterance); @@ -307,7 +322,8 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string } = + "text": string, + "weight": number } = this.luUtterances[luUtteranceIndex]; const intent: string = luUtterance.intent as string; @@ -376,7 +392,8 @@ export abstract class Data { "endPos": number, }>, "intent": string, - "text": string } = this.luUtterances[luUtteranceIndex]; + "text": string, + "weight": number } = this.luUtterances[luUtteranceIndex]; let hasNewUtteranceFoundForCoveringAllIntentEntityLabels: boolean = false; if (toEnsureEachIntentHasOneUtteranceLabel) { if (intentSet.size < numberIntents) { diff --git a/packages/dispatcher/src/data_structure/DictionaryMapUtility.ts b/packages/dispatcher/src/data_structure/DictionaryMapUtility.ts index bf5aab3dd..ea3ee7519 100644 --- a/packages/dispatcher/src/data_structure/DictionaryMapUtility.ts +++ b/packages/dispatcher/src/data_structure/DictionaryMapUtility.ts @@ -24,27 +24,68 @@ import { Utility } from "../utility/Utility"; export class DictionaryMapUtility { + public static buildStringIdNumberValueDictionaryFromUniqueStringArrayFile( + filename: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": IDictionaryStringIdGenericValue } { + const content: string = Utility.loadFile(filename); + return DictionaryMapUtility.buildStringIdNumberValueDictionaryFromUniqueStringArrayContent( + content, + delimiter); + } + public static buildStringIdNumberValueDictionaryFromUniqueStringArrayContent( + content: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": IDictionaryStringIdGenericValue } { + const stringArray: string[] = Utility.split(content, delimiter); + const stringMap: IDictionaryStringIdGenericValue = + DictionaryMapUtility.buildStringIdNumberValueDictionaryFromUniqueStringArray(stringArray); + return { stringArray, stringMap }; + } public static buildStringIdNumberValueDictionaryFromUniqueStringArray( - stringArray: string[]): IDictionaryStringIdGenericValue { + inputStringArray: string[]): IDictionaryStringIdGenericValue { const stringMap: IDictionaryStringIdGenericValue = { }; - for (let index: number = 0; index < stringArray.length; index++) { - stringMap[stringArray[index]] = index; + for (let index: number = 0; index < inputStringArray.length; index++) { + stringMap[inputStringArray[index]] = index; } return stringMap; } + public static buildStringIdNumberValueDictionaryFromStringArrayFile( + filename: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": IDictionaryStringIdGenericValue } { + const content: string = Utility.loadFile(filename); + return DictionaryMapUtility.buildStringIdNumberValueDictionaryFromStringArrayContent( + content, + delimiter); + } + public static buildStringIdNumberValueDictionaryFromStringArrayContent( + content: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": IDictionaryStringIdGenericValue } { + const records: string[] = Utility.split(content, delimiter); + return DictionaryMapUtility.buildStringIdNumberValueDictionaryFromStringArray(records); + } public static buildStringIdNumberValueDictionaryFromStringArray( - strings: string[]): - { "stringArray": string[], "stringMap": IDictionaryStringIdGenericValue } { - const stringSet: Set = new Set(strings); + inputStringArray: string[]): { + "stringArray": string[], + "stringMap": IDictionaryStringIdGenericValue } { + const stringSet: Set = new Set(inputStringArray); const stringArray: string[] = Array.from(stringSet.values()); const stringMap: IDictionaryStringIdGenericValue = DictionaryMapUtility.buildStringIdNumberValueDictionaryFromUniqueStringArray(stringArray); return { stringArray, stringMap }; } public static buildStringIdNumberValueDictionaryFromStringArrays( - stringArrays: string[][]): { "stringArray": string[], "stringMap": IDictionaryStringIdGenericValue } { + inputStringArrays: string[][]): { + "stringArray": string[], + "stringMap": IDictionaryStringIdGenericValue } { const stringSet: Set = new Set(); - for (const elementStringArray of stringArrays) { + for (const elementStringArray of inputStringArrays) { for (const elementString of elementStringArray) { stringSet.add(elementString); } @@ -139,27 +180,68 @@ export class DictionaryMapUtility { } } + public static buildStringKeyNumberValueMapFromUniqueStringArrayFile( + filename: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": TMapStringKeyGenericValue } { + const content: string = Utility.loadFile(filename); + return DictionaryMapUtility.buildStringKeyNumberValueMapFromUniqueStringArrayContent( + content, + delimiter); + } + public static buildStringKeyNumberValueMapFromUniqueStringArrayContent( + content: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": TMapStringKeyGenericValue } { + const stringArray: string[] = Utility.split(content, delimiter); + const stringMap: TMapStringKeyGenericValue = + DictionaryMapUtility.buildStringKeyNumberValueMapFromUniqueStringArray(stringArray); + return { stringArray, stringMap }; + } public static buildStringKeyNumberValueMapFromUniqueStringArray( - stringArray: string[]): TMapStringKeyGenericValue { + inputStringArray: string[]): TMapStringKeyGenericValue { const stringMap: TMapStringKeyGenericValue = DictionaryMapUtility.newTMapStringKeyGenericValue(); - for (let index: number = 0; index < stringArray.length; index++) { - stringMap.set(stringArray[index], index); + for (let index: number = 0; index < inputStringArray.length; index++) { + stringMap.set(inputStringArray[index], index); } return stringMap; } + public static buildStringKeyNumberValueMapFromStringArrayFile( + filename: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": TMapStringKeyGenericValue } { + const content: string = Utility.loadFile(filename); + return DictionaryMapUtility.buildStringKeyNumberValueMapFromStringArrayContent( + content, + delimiter); + } + public static buildStringKeyNumberValueMapFromStringArrayContent( + content: string, + delimiter: string = "\t"): { + "stringArray": string[], + "stringMap": TMapStringKeyGenericValue } { + const records: string[] = Utility.split(content, delimiter); + return DictionaryMapUtility.buildStringKeyNumberValueMapFromStringArray(records); + } public static buildStringKeyNumberValueMapFromStringArray( - strings: string[]): - { "stringArray": string[], "stringMap": TMapStringKeyGenericValue } { - const stringSet: Set = new Set(strings); + inputStringArray: string[]): { + "stringArray": string[], + "stringMap": TMapStringKeyGenericValue } { + const stringSet: Set = new Set(inputStringArray); const stringArray: string[] = Array.from(stringSet.values()); const stringMap: TMapStringKeyGenericValue = DictionaryMapUtility.buildStringKeyNumberValueMapFromUniqueStringArray(stringArray); return { stringArray, stringMap }; } public static buildStringKeyNumberValueMapFromStringArrays( - stringArrays: string[][]): { "stringArray": string[], "stringMap": TMapStringKeyGenericValue } { + inputStringArrays: string[][]): { + "stringArray": string[], + "stringMap": TMapStringKeyGenericValue } { const stringSet: Set = new Set(); - for (const elementStringArray of stringArrays) { + for (const elementStringArray of inputStringArrays) { for (const elementString of elementStringArray) { stringSet.add(elementString); } diff --git a/packages/dispatcher/src/index.ts b/packages/dispatcher/src/index.ts index aea82abc4..6e0e4ca09 100644 --- a/packages/dispatcher/src/index.ts +++ b/packages/dispatcher/src/index.ts @@ -5,6 +5,10 @@ import { AppAutoActiveLearner } from "./model/supervised/classifier/auto_active_learning/AppAutoActiveLearner"; +import { mainConfusionMatrix } from "./mathematics/confusion_matrix/AppConfusionMatrix"; +import { mainConfusionMatrixFunction } from "./mathematics/confusion_matrix/AppConfusionMatrix"; +import { ConfusionMatrix } from "./mathematics/confusion_matrix/ConfusionMatrix"; + import { mainCrossValidatorWithColumnarContent } from "./model/evaluation/cross_validation/AppCrossValidator"; import { mainCrossValidatorWithLuContent } from "./model/evaluation/cross_validation/AppCrossValidator"; import { mainCrossValidator } from "./model/evaluation/cross_validation/AppCrossValidator"; @@ -17,6 +21,9 @@ import { mainThresholdReporter } from "./model/evaluation/report/AppThresholdRep export default { AppAutoActiveLearner, + mainConfusionMatrix, + mainConfusionMatrixFunction, + ConfusionMatrix, mainCrossValidator, mainCrossValidatorWithLuContent, mainCrossValidatorWithColumnarContent, diff --git a/packages/dispatcher/src/mathematics/confusion_matrix/AppConfusionMatrix.ts b/packages/dispatcher/src/mathematics/confusion_matrix/AppConfusionMatrix.ts index cf60c7653..6a9eb03ec 100644 --- a/packages/dispatcher/src/mathematics/confusion_matrix/AppConfusionMatrix.ts +++ b/packages/dispatcher/src/mathematics/confusion_matrix/AppConfusionMatrix.ts @@ -3,10 +3,327 @@ * Licensed under the MIT License. */ +import { ArgumentParser } from "argparse"; + +import { BinaryConfusionMatrix } from "./BinaryConfusionMatrix"; import { ConfusionMatrix } from "./ConfusionMatrix"; +import { IDictionaryStringIdGenericArrays } from "../../data_structure/IDictionaryStringIdGenericArrays"; +import { IDictionaryStringIdGenericValue } from "../../data_structure/IDictionaryStringIdGenericValue"; + +import { DictionaryMapUtility } from "../../data_structure/DictionaryMapUtility"; + import { Utility } from "../../utility/Utility"; +export function mainConfusionMatrixFunction( + scoreFilename: string, + labelFilename: string, + labelColumnIndex: number, + textColumnIndex: number, + weightColumnIndex: number, + identifierColumnIndex: number, + scoreColumnBeginIndex: number, + predictedLabelColumnIndex: number, + revisedTextColumnIndex: number, + lineIndexToStart: number): { + "labels": string[], + "labelMap": { [id: string]: number; }, + "binaryConfusionMatrices": BinaryConfusionMatrix[], + "confusionMatrix": ConfusionMatrix } { + // ----------------------------------------------------------------------- + if (Utility.isEmptyString(scoreFilename)) { + throw new Error("scoreFilename is empty"); + } + if (Utility.isEmptyString(labelFilename)) { + throw new Error("labelFilename is empty"); + } + // ----------------------------------------------------------------------- + let labels: string[] = []; + let labelMap: { [id: string]: number; } = {}; + if (!Utility.isEmptyString(labelFilename)) { + const labelsAndLabelMap: { "stringArray": string[], "stringMap": { [id: string]: number; } } = + DictionaryMapUtility.buildStringIdNumberValueDictionaryFromUniqueStringArrayFile(labelFilename); + labels = labelsAndLabelMap.stringArray; + labelMap = labelsAndLabelMap.stringMap; + } + if (Utility.isEmptyStringArray(labels)) { + throw new Error("labels is empty"); + } + // ----------------------------------------------------------------------- + const confusionMatrix = new ConfusionMatrix(labels, labelMap); + // ----------------------------------------------------------------------- + const scoreDataStructure: { + "labels": string[], + "texts": string[], + "weights": number[], + "identifiers": string[], + "scoreArrays": number[][], + "predictedLabels": string[], + "revisedTexts": string[] } = + Utility.loadLabelTextScoreFile( + scoreFilename, + labelColumnIndex, + textColumnIndex, + weightColumnIndex, + scoreColumnBeginIndex, + labels.length, + identifierColumnIndex, + predictedLabelColumnIndex, + revisedTextColumnIndex, + lineIndexToStart); + const numberInstances: number = scoreDataStructure.labels.length; + let numberMatches: number = 0; + for (let i = 0; i < numberInstances; i++) { + const label: string = scoreDataStructure.labels[i]; + // const text: string = scoreDataStructure.texts[i]; + // const weight: number = scoreDataStructure.weights[i]; + // const identifier: string = scoreDataStructure.identifiers[i]; + // const scoreArray: number[] = scoreDataStructure.scoreArrays[i]; + // const labelId: number = labelMap[label]; + // const weight: number = scoreDataStructure.weights[i]; + const predictedLabel: string = scoreDataStructure.predictedLabels[i]; + // const revisedText: string = scoreDataStructure.revisedTexts[i]; + // Utility.debuggingLog( + // "label=" + label); + // Utility.debuggingLog( + // "predictedLabel=" + predictedLabel); + if (label === predictedLabel) { + numberMatches++; + } + confusionMatrix.addInstance(label, predictedLabel); + } + // ----------------------------------------------------------------------- + Utility.debuggingLog( + "numberMatches=" + numberMatches); + Utility.debuggingLog( + "numberInstances=" + numberInstances); + Utility.debuggingLog( + "accuracy=" + numberMatches / numberInstances); + // ----------------------------------------------------------------------- + Utility.debuggingLog( + "labels=" + confusionMatrix.getLabels()); + Utility.debuggingLog( + Utility.JSONstringify(confusionMatrix.getLabelMap())); + Utility.debuggingLog( + "rows=" + confusionMatrix.getConfusionMatrixRows()); + Utility.debuggingLog( + "columns=" + confusionMatrix.getConfusionMatrixColumns()); + Utility.debuggingLog( + "total=" + confusionMatrix.getConfusionMatrixTotal()); + const binaryConfusionMatrices: BinaryConfusionMatrix[] = confusionMatrix.getBinaryConfusionMatrices(); + const confusionMatrixLabels: string[] = confusionMatrix.getLabels(); + for (let i = 0; i < binaryConfusionMatrices.length; i++) { + const binaryConfusionMatrix = binaryConfusionMatrices[i]; + const label: string = confusionMatrixLabels[i]; + Utility.debuggingLog( + label + ":" + i + ", precision = " + binaryConfusionMatrix.getPrecision()); + Utility.debuggingLog( + label + ":" + i + ", recall = " + binaryConfusionMatrix.getRecall()); + Utility.debuggingLog( + label + ":" + i + ", F1 = " + binaryConfusionMatrix.getF1Score()); + Utility.debuggingLog( + label + ":" + i + ", support = " + binaryConfusionMatrix.getSupport()); + Utility.debuggingLog( + label + ":" + i + ", total = " + binaryConfusionMatrix.getTotal()); + } + Utility.debuggingLog( + "micro-average metrics = " + confusionMatrix.getMicroAverageMetrics()); + Utility.debuggingLog( + "macro-average metrics = " + confusionMatrix.getMacroAverageMetrics()); + Utility.debuggingLog( + "weighted-macro-average metrics = " + confusionMatrix.getWeightedMacroAverageMetrics()); + Utility.debuggingLog( + "labels=" + confusionMatrix.getLabels()); + // ----------------------------------------------------------------------- + return { + labels, + labelMap, + binaryConfusionMatrices, + confusionMatrix }; + // ----------------------------------------------------------------------- +} +export function mainConfusionMatrix(): void { + // ----------------------------------------------------------------------- + const dateTimeBeginInString: string = (new Date()).toISOString(); + // ----------------------------------------------------------------------- + const parser = new ArgumentParser({ + addHelp: true, + description: "AppConfusionMatrix", + version: "0.0.1", + }); + parser.addArgument( + ["-f", "--scoreFilename"], + { + help: "an input score file", + required: true, + }, + ); + parser.addArgument( + ["-si", "--scoreColumnBeginIndex"], + { + help: "score column begin index", + required: true, + }, + ); + parser.addArgument( + ["-l", "--labelFilename"], + { + help: "an input label file", + required: true, + }, + ); + parser.addArgument( + ["-o", "--outputReportFilenamePrefix"], + { + help: "output report file prefix", + required: false, + }, + ); + parser.addArgument( + ["-d", "--debug"], + { + help: "enable printing debug information", + required: false, + }, + ); + parser.addArgument( + ["-li", "--labelColumnIndex"], + { + defaultValue: 0, + help: "label column index", + required: false, + }, + ); + parser.addArgument( + ["-ti", "--textColumnIndex"], + { + defaultValue: 1, + help: "text/utterance column index", + required: false, + }, + ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); + parser.addArgument( + ["-ii", "--identifierColumnIndex"], + { + defaultValue: 2, + help: "identifier column index", + required: false, + }, + ); + parser.addArgument( + ["-pli", "--predictedLabelColumnIndex"], + { + defaultValue: -1, + help: "predicted label column index", + required: false, + }, + ); + parser.addArgument( + ["-rti", "--revisedTextColumnIndex"], + { + defaultValue: -1, + help: "revised text/utterance column index", + required: false, + }, + ); + parser.addArgument( + ["-ls", "--lineIndexToStart"], + { + defaultValue: 0, + help: "number of lines to skip from the input file", + required: false, + }, + ); + const parsedKnownArgs: any[] = parser.parseKnownArgs(); + const args: any = parsedKnownArgs[0]; + const unknownArgs: any = parsedKnownArgs[1]; + Utility.debuggingLog( + `args=${Utility.JSONstringify(args)}`); + Utility.debuggingLog( + `unknownArgs=${Utility.JSONstringify(unknownArgs)}`); + const debugFlag: boolean = Utility.toBoolean(args.debug); + Utility.toPrintDebuggingLogToConsole = debugFlag; + // ---- NOTE-FOR-DEBUGGING ---- console.dir(args); + // ----------------------------------------------------------------------- + const scoreFilename: string = + args.scoreFilename; + if (!Utility.exists(scoreFilename)) { + Utility.debuggingThrow( + `The input score file ${scoreFilename} does not exist! process.cwd()=${process.cwd()}`); + } + let outputReportFilenamePrefix: string = args.outputReportFilenamePrefix; + if (Utility.isEmptyString(outputReportFilenamePrefix)) { + outputReportFilenamePrefix = Utility.getFilenameWithoutExtension(scoreFilename); + // Utility.debuggingThrow( + // `The output file ${outputReportFilenamePrefix} is empty! process.cwd()=${process.cwd()}`); + } + Utility.debuggingLog( + `scoreFilename=${scoreFilename}`); + Utility.debuggingLog( + `outputReportFilenamePrefix=${outputReportFilenamePrefix}`); + // ----------------------------------------------------------------------- + const labelFilename: string = + args.labelFilename; + Utility.debuggingLog( + `labelFilename=${labelFilename}`); + // ----------------------------------------------------------------------- + const labelColumnIndex: number = +args.labelColumnIndex; + const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; + const identifierColumnIndex: number = +args.identifierColumnIndex; + const scoreColumnBeginIndex: number = +args.scoreColumnBeginIndex; + const predictedLabelColumnIndex: number = +args.predictedLabelColumnIndex; + const revisedTextColumnIndex: number = +args.revisedTextColumnIndex; + const lineIndexToStart: number = +args.lineIndexToStart; + Utility.debuggingLog( + `labelColumnIndex=${labelColumnIndex}`); + Utility.debuggingLog( + `textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog( + `weightColumnIndex=${weightColumnIndex}`); + Utility.debuggingLog( + `identifierColumnIndex=${identifierColumnIndex}`); + Utility.debuggingLog( + `scoreColumnBeginIndex=${scoreColumnBeginIndex}`); + Utility.debuggingLog( + `predictedLabelColumnIndex=${predictedLabelColumnIndex}`); + Utility.debuggingLog( + `revisedTextColumnIndex=${revisedTextColumnIndex}`); + Utility.debuggingLog( + `lineIndexToStart=${lineIndexToStart}`); + // ----------------------------------------------------------------------- + const mainConfusionMatrixFunctionResult: { + "labels": string[], + "labelMap": { [id: string]: number; }, + "confusionMatrix": ConfusionMatrix } = mainConfusionMatrixFunction( + scoreFilename, + labelFilename, + labelColumnIndex, + textColumnIndex, + weightColumnIndex, + identifierColumnIndex, + scoreColumnBeginIndex, + predictedLabelColumnIndex, + revisedTextColumnIndex, + lineIndexToStart); + // ----------------------------------------------------------------------- + const dateTimeEndInString: string = (new Date()).toISOString(); + // ----------------------------------------------------------------------- + Utility.debuggingLog( + `dateTimeBeginInString=${dateTimeBeginInString}`); + Utility.debuggingLog( + `dateTimeEndInString=${dateTimeEndInString}`); + // ----------------------------------------------------------------------- +} + export function exampleFunctionConfusionMatrix(): void { const labels: string[] = [ "label0", "label1", "label2" ]; const labelMap: { [id: string]: number; } = { }; @@ -26,7 +343,7 @@ export function exampleFunctionConfusionMatrix(): void { Utility.debuggingLog( "labels=" + confusionMatrix.getLabels()); Utility.debuggingLog( - confusionMatrix.getLabelMap()); + Utility.JSONstringify(confusionMatrix.getLabelMap())); Utility.debuggingLog( "rows=" + confusionMatrix.getConfusionMatrixRows()); Utility.debuggingLog( @@ -58,5 +375,6 @@ export function exampleFunctionConfusionMatrix(): void { } if (require.main === module) { - exampleFunctionConfusionMatrix(); + mainConfusionMatrix(); + // ---- exampleFunctionConfusionMatrix(); } diff --git a/packages/dispatcher/src/mathematics/confusion_matrix/ConfusionMatrix.ts b/packages/dispatcher/src/mathematics/confusion_matrix/ConfusionMatrix.ts index 10f601aee..6552deb80 100644 --- a/packages/dispatcher/src/mathematics/confusion_matrix/ConfusionMatrix.ts +++ b/packages/dispatcher/src/mathematics/confusion_matrix/ConfusionMatrix.ts @@ -311,7 +311,7 @@ export class ConfusionMatrix { if (!(label in this.getLabelMap())) { if (throwIfNotLegal) { Utility.debuggingThrow( - `label=${label}, not int the label map=${this.getLabelMap()}`); + `label=${label}, not int the label map=${Utility.JSONstringify(this.getLabelMap())}`); } return false; } diff --git a/packages/dispatcher/src/model/evaluation/abstract_base_evaluator/AbstractBaseEvaluator.ts b/packages/dispatcher/src/model/evaluation/abstract_base_evaluator/AbstractBaseEvaluator.ts index e4f4c93ac..aa3496f3e 100644 --- a/packages/dispatcher/src/model/evaluation/abstract_base_evaluator/AbstractBaseEvaluator.ts +++ b/packages/dispatcher/src/model/evaluation/abstract_base_evaluator/AbstractBaseEvaluator.ts @@ -53,7 +53,7 @@ export abstract class AbstractBaseEvaluator { } return Utility.dumpFile( outputReportFilename, - JSON.stringify(outputReportContent, undefined, 4), + Utility.JSONstringify(outputReportContent), encoding); } public dumpEvaluationDirectReportToFile( diff --git a/packages/dispatcher/src/model/evaluation/cross_validation/AppCrossValidator.ts b/packages/dispatcher/src/model/evaluation/cross_validation/AppCrossValidator.ts index 04cff8f44..14a31934f 100644 --- a/packages/dispatcher/src/model/evaluation/cross_validation/AppCrossValidator.ts +++ b/packages/dispatcher/src/model/evaluation/cross_validation/AppCrossValidator.ts @@ -212,6 +212,7 @@ export async function mainCrossValidatorWithLuContent( * @param columnarContent - content of a TSV columnar file in string form as input. * @param labelColumnIndex - label/intent column index. * @param textColumnIndex - text/utterace column index. + * @param weightColumnIndex - weight column index. * @param linesToSkip - number of header lines skipped before processing each line as an instance record. * @param numberOfCrossValidationFolds - number of cross validation (CV) folds. * @param learnerParameterEpochs - CV Softmax Regression Learner parameter - number of epochs @@ -226,6 +227,7 @@ export function mainCrossValidatorWithColumnarContent( columnarContent: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, numberOfCrossValidationFolds: number = CrossValidator.defaultNumberOfCrossValidationFolds, @@ -250,6 +252,7 @@ export function mainCrossValidatorWithColumnarContent( new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); // ----------------------------------------------------------------------- @@ -440,11 +443,19 @@ export async function mainCrossValidator(): Promise<{ parser.addArgument( ["-ti", "--textColumnIndex"], { - defaultValue: 0, + defaultValue: 1, help: "text/utterance column index", required: false, }, ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); parser.addArgument( ["-ls", "--linesToSkip"], { @@ -521,11 +532,14 @@ export async function mainCrossValidator(): Promise<{ // ----------------------------------------------------------------------- const labelColumnIndex: number = +args.labelColumnIndex; const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; const linesToSkip: number = +args.linesToSkip; Utility.debuggingLog( `labelColumnIndex=${labelColumnIndex}`); Utility.debuggingLog( `textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog( + `weightColumnIndex=${weightColumnIndex}`); Utility.debuggingLog( `linesToSkip=${linesToSkip}`); // ----------------------------------------------------------------------- @@ -536,6 +550,7 @@ export async function mainCrossValidator(): Promise<{ filetype, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip); // ----------------------------------------------------------------------- const crossValidator: CrossValidator = diff --git a/packages/dispatcher/src/model/evaluation/cross_validation/CrossValidator.ts b/packages/dispatcher/src/model/evaluation/cross_validation/CrossValidator.ts index 65539a31c..6f36fbb07 100644 --- a/packages/dispatcher/src/model/evaluation/cross_validation/CrossValidator.ts +++ b/packages/dispatcher/src/model/evaluation/cross_validation/CrossValidator.ts @@ -59,6 +59,7 @@ export class CrossValidator extends AbstractBaseEvaluator { "thresholdReporterCrossValidation": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } = { @@ -66,6 +67,7 @@ export class CrossValidator extends AbstractBaseEvaluator { thresholdReporterCrossValidation: new ThresholdReporter("", "", null, null, [], {}), predictionLabels: [], predictionLabelIndexes: [], + instanceIndexes: [], groundTruthLabels: [], groundTruthLabelIndexes: [], predictions: [] }; @@ -111,6 +113,7 @@ export class CrossValidator extends AbstractBaseEvaluator { "thresholdReporterCrossValidation": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } { @@ -142,16 +145,19 @@ export class CrossValidator extends AbstractBaseEvaluator { this.crossValidationResultCachedAfterCrossValidation.predictionLabels; const predictionLabelIndexes: number[] = this.crossValidationResultCachedAfterCrossValidation.predictionLabelIndexes; + const instanceIndexes: number[] = + this.crossValidationResultCachedAfterCrossValidation.instanceIndexes; const groundTruthLabels: string[] = - this.crossValidationResultCachedAfterCrossValidation.predictionLabels; + this.crossValidationResultCachedAfterCrossValidation.groundTruthLabels; const groundTruthLabelIndexes: number[] = - this.crossValidationResultCachedAfterCrossValidation.predictionLabelIndexes; + this.crossValidationResultCachedAfterCrossValidation.groundTruthLabelIndexes; const predictions: number[][] = this.crossValidationResultCachedAfterCrossValidation.predictions; const outputEvaluationReportDataArraysScoreRecords: string[][] = []; for (let index: number = 0; index < this.intentsCachedAfterCrossValidation.length; index++) { - const intent: string = this.intentsCachedAfterCrossValidation[index]; - const utterance: string = this.utterancesCachedAfterCrossValidation[index]; + const instanceIndex: number = instanceIndexes[index]; + const intent: string = this.intentsCachedAfterCrossValidation[instanceIndex]; + const utterance: string = this.utterancesCachedAfterCrossValidation[instanceIndex]; const groundTruthLabel: string = groundTruthLabels[index]; const groundTruthLabelIndex: number = groundTruthLabelIndexes[index]; const predictionLabel: string = predictionLabels[index]; @@ -159,6 +165,7 @@ export class CrossValidator extends AbstractBaseEvaluator { const outputEvaluationReportDataArraysScoreRecord: string[] = []; outputEvaluationReportDataArraysScoreRecord.push(intent); outputEvaluationReportDataArraysScoreRecord.push(utterance); + outputEvaluationReportDataArraysScoreRecord.push(instanceIndex.toString()); outputEvaluationReportDataArraysScoreRecord.push(groundTruthLabel); outputEvaluationReportDataArraysScoreRecord.push(groundTruthLabelIndex.toString()); outputEvaluationReportDataArraysScoreRecord.push(predictionLabel); @@ -190,9 +197,10 @@ export class CrossValidator extends AbstractBaseEvaluator { outputEvaluationReportDataArrays = this.generateEvaluationDataArraysReport(); } + const outputFilenames: string[] = []; { let outputFilename: string = - `${outputReportFilenamePrefix}_CrossValidationScoreRecords.json`; + `${outputReportFilenamePrefix}_CrossValidationScoreRecords.txt`; outputFilename = Utility.storeDataArraysToTsvFile( outputFilename, outputEvaluationReportDataArrays.CrossValidationScoreRecords, @@ -200,10 +208,21 @@ export class CrossValidator extends AbstractBaseEvaluator { columnDelimiter, recordDelimiter, encoding); - const outputFilenames: string[] = - [outputFilename]; - return { outputEvaluationReportDataArrays, outputFilenames }; + outputFilenames.push(outputFilename); + } + { + let outputFilename: string = + `${outputReportFilenamePrefix}_CrossValidationScoreRecordsLabel.txt`; + outputFilename = Utility.storeDataArraysToTsvFile( + outputFilename, + [this.labelsCachedAfterCrossValidation], + [], + columnDelimiter, + recordDelimiter, + encoding); + outputFilenames.push(outputFilename); } + return { outputEvaluationReportDataArrays, outputFilenames }; } public generateEvaluationJsonReport(): IDictionaryStringIdGenericValue { @@ -288,6 +307,7 @@ export class CrossValidator extends AbstractBaseEvaluator { "thresholdReporterCrossValidation": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } { @@ -319,12 +339,17 @@ export class CrossValidator extends AbstractBaseEvaluator { []; const predictionLabelIndexes: number[] = []; + const instanceIndexes: number[] = + []; const groundTruthLabels: string[] = []; const groundTruthLabelIndexes: number[] = []; const numberOfCrossValidationFolds: number = this.getNumberOfCrossValidationFolds(); + let numberAcrossFoldsPredictions: number = 0; + let numberAcrossFoldsPredictionLabelMatches: number = 0; + let numberAcrossFoldsPredictionLabelIndexMatches: number = 0; for (let fold: number = 0; fold < numberOfCrossValidationFolds; fold++) { // --------------------------------------------------------------- const learner: SoftmaxRegressionSparse = @@ -332,6 +357,7 @@ export class CrossValidator extends AbstractBaseEvaluator { // --------------------------------------------------------------- const cvLabelDenseIndexArrayForTraining: number[] = []; const cvFeatureSparseIndexArraysForTraining: number[][] = []; + const cvInstanceIndexDenseArrayForTesting: number[] = []; const cvLabelDenseArrayForTesting: string[] = []; const cvLabelDenseIndexArrayForTesting: number[] = []; const cvFeatureSparseIndexArraysForTesting: number[][] = []; @@ -416,6 +442,7 @@ export class CrossValidator extends AbstractBaseEvaluator { } const instanceFeatureIndexArray: number[] = featureIndexArrays[instanceIndex]; + cvInstanceIndexDenseArrayForTesting.push(instanceIndex); cvLabelDenseArrayForTesting.push(instanceLabel); cvLabelDenseIndexArrayForTesting.push(instanceLabelIndex); cvFeatureSparseIndexArraysForTesting.push(instanceFeatureIndexArray); @@ -477,6 +504,9 @@ export class CrossValidator extends AbstractBaseEvaluator { } // --------------------------------------------------------------- for (let index = 0; index < cvLabelDenseIndexArrayForTesting.length; index++) { + const instanceIndex: number = + cvInstanceIndexDenseArrayForTesting[index]; + instanceIndexes.push(instanceIndex); const groundTruthLabel: string = cvLabelDenseArrayForTesting[index]; groundTruthLabels.push(groundTruthLabel); @@ -503,16 +533,36 @@ export class CrossValidator extends AbstractBaseEvaluator { prediction, groundTruthLabelIndex, "", - `${index}`); + `${index}`, + 1); + { + numberAcrossFoldsPredictions++; + if (predictionLabel === groundTruthLabel) { + numberAcrossFoldsPredictionLabelMatches++; + } + if (predictionLabelIndex === groundTruthLabelIndex) { + numberAcrossFoldsPredictionLabelIndexMatches++; + } + } } // --------------------------------------------------------------- } // ------------------------------------------------------------------- + { + Utility.debuggingLog( + `numberAcrossFoldsPredictions=${numberAcrossFoldsPredictions}` + + `,numberAcrossFoldsPredictionLabelMatches=${numberAcrossFoldsPredictionLabelMatches}` + + `,numberAcrossFoldsPredictionLabelIndexMatches=${numberAcrossFoldsPredictionLabelIndexMatches}` + + `,numberAcrossFoldsPredictionLabelMatchRatio=${numberAcrossFoldsPredictionLabelMatches / numberAcrossFoldsPredictions}` + + `,numberAcrossFoldsPredictionLabelIndexMatchRatio=${numberAcrossFoldsPredictionLabelIndexMatches / numberAcrossFoldsPredictions}`); + } + // ------------------------------------------------------------------- this.crossValidationResultCachedAfterCrossValidation = { confusionMatrixCrossValidation, thresholdReporterCrossValidation, predictionLabels, predictionLabelIndexes, + instanceIndexes, groundTruthLabels, groundTruthLabelIndexes, predictions }; diff --git a/packages/dispatcher/src/model/evaluation/predict/Predictor.ts b/packages/dispatcher/src/model/evaluation/predict/Predictor.ts index dd97a16d4..a41bbc8b1 100644 --- a/packages/dispatcher/src/model/evaluation/predict/Predictor.ts +++ b/packages/dispatcher/src/model/evaluation/predict/Predictor.ts @@ -39,6 +39,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { new ThresholdReporter("", "", null, null, [], {}); protected predictionLabels: string[] = []; protected predictionLabelIndexes: number[] = []; + protected instanceIndexes: number[] = []; protected groundTruthLabels: string[] = []; protected groundTruthLabelIndexes: number[] = []; protected predictions: number[][] = []; @@ -60,6 +61,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { "thresholdReporterPrediction": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } { @@ -68,6 +70,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { thresholdReporterPrediction: this.thresholdReporterPrediction, predictionLabels: this.predictionLabels, predictionLabelIndexes: this.predictionLabelIndexes, + instanceIndexes: this.instanceIndexes, groundTruthLabels: this.groundTruthLabels, groundTruthLabelIndexes: this.groundTruthLabelIndexes, predictions: this.predictions, @@ -81,6 +84,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { "thresholdReporterPrediction": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } = @@ -88,13 +92,15 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { { const predictionLabels: string[] = predictionResult.predictionLabels; const predictionLabelIndexes: number[] = predictionResult.predictionLabelIndexes; + const instanceIndexes: number[] = predictionResult.instanceIndexes; const groundTruthLabels: string[] = predictionResult.predictionLabels; const groundTruthLabelIndexes: number[] = predictionResult.predictionLabelIndexes; const predictions: number[][] = predictionResult.predictions; const outputEvaluationReportDataArraysScoreRecords: string[][] = []; for (let index: number = 0; index < this.intents.length; index++) { - const intent: string = this.intents[index]; - const utterance: string = this.utterances[index]; + const instanceIndex: number = instanceIndexes[index]; + const intent: string = this.intents[instanceIndex]; + const utterance: string = this.utterances[instanceIndex]; const groundTruthLabel: string = groundTruthLabels[index]; const groundTruthLabelIndex: number = groundTruthLabelIndexes[index]; const predictionLabel: string = predictionLabels[index]; @@ -134,7 +140,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { } { let outputFilename: string = - `${outputReportFilenamePrefix}_PredictionScoreRecords.json`; + `${outputReportFilenamePrefix}_PredictionScoreRecords.txt`; outputFilename = Utility.storeDataArraysToTsvFile( outputFilename, outputEvaluationReportDataArrays.PredictionScoreRecords, @@ -155,6 +161,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { "thresholdReporterPrediction": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } = @@ -232,6 +239,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { "thresholdReporterPrediction": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } { @@ -278,7 +286,8 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { prediction, groundTruthLabelIndex, utterance, - `${this.thresholdReporterPrediction.getNumberInstances()}`); + `${this.thresholdReporterPrediction.getNumberInstances()}`, + 1); } } const predictionLabel: string = labels[predictionLabelIndex]; @@ -289,6 +298,7 @@ export class Predictor extends AbstractBaseModelFeaturizerEvaluator { // --------------------------------------------------------------- this.predictionLabels.push(predictionLabel); this.predictionLabelIndexes.push(predictionLabelIndex); + this.instanceIndexes.push(this.instanceIndexes.length); this.groundTruthLabels.push(groundTruthLabel); this.groundTruthLabelIndexes.push(groundTruthLabelIndex); this.predictions.push(prediction); diff --git a/packages/dispatcher/src/model/evaluation/report/AppDataProfileReporter.ts b/packages/dispatcher/src/model/evaluation/report/AppDataProfileReporter.ts index 1f445a3db..d94f87a76 100644 --- a/packages/dispatcher/src/model/evaluation/report/AppDataProfileReporter.ts +++ b/packages/dispatcher/src/model/evaluation/report/AppDataProfileReporter.ts @@ -73,6 +73,14 @@ export function mainDataProfileReporter(): void { required: false, }, ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); parser.addArgument( ["-ls", "--linesToSkip"], { @@ -115,6 +123,7 @@ export function mainDataProfileReporter(): void { } const labelColumnIndex: number = +args.labelColumnIndex; const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; const linesToSkip: number = +args.linesToSkip; Utility.debuggingLog( `filename=${filename}`); @@ -126,6 +135,8 @@ export function mainDataProfileReporter(): void { `labelColumnIndex=${labelColumnIndex}`); Utility.debuggingLog( `textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog( + `weightColumnIndex=${weightColumnIndex}`); Utility.debuggingLog( `linesToSkip=${linesToSkip}`); // ----------------------------------------------------------------------- @@ -136,15 +147,16 @@ export function mainDataProfileReporter(): void { filetype, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip).then((data) => { // --------------------------------------------------------------- - const thresholdReporter: DataProfileReporter = + const dataProfileReporter: DataProfileReporter = new DataProfileReporter(data); // --------------------------------------------------------------- const evaluationDataArraysReportResult: { "outputEvaluationReportDataArrays": IDictionaryStringIdGenericArrays, "outputFilenames": string[], - } = thresholdReporter.generateEvaluationDataArraysReportToFiles( + } = dataProfileReporter.generateEvaluationDataArraysReportToFiles( outputReportFilenamePrefix); // --------------------------------------------------------------- }); diff --git a/packages/dispatcher/src/model/evaluation/report/AppModelMetaDataProfileReporter.ts b/packages/dispatcher/src/model/evaluation/report/AppModelMetaDataProfileReporter.ts index 3b9437f0e..7c084e927 100644 --- a/packages/dispatcher/src/model/evaluation/report/AppModelMetaDataProfileReporter.ts +++ b/packages/dispatcher/src/model/evaluation/report/AppModelMetaDataProfileReporter.ts @@ -88,6 +88,14 @@ export function mainModelMetaDataProfileReporter(): void { // ---- NOTE-TODO-PLACEHOLDER ---- }, // ---- NOTE-TODO-PLACEHOLDER ---- ); // ---- NOTE-TODO-PLACEHOLDER ---- parser.addArgument( + // ---- NOTE-TODO-PLACEHOLDER ---- ["-wi", "--weightColumnIndex"], + // ---- NOTE-TODO-PLACEHOLDER ---- { + // ---- NOTE-TODO-PLACEHOLDER ---- defaultValue: -1, + // ---- NOTE-TODO-PLACEHOLDER ---- help: "weight column index", + // ---- NOTE-TODO-PLACEHOLDER ---- required: false, + // ---- NOTE-TODO-PLACEHOLDER ---- }, + // ---- NOTE-TODO-PLACEHOLDER ---- ); + // ---- NOTE-TODO-PLACEHOLDER ---- parser.addArgument( // ---- NOTE-TODO-PLACEHOLDER ---- ["-ls", "--linesToSkip"], // ---- NOTE-TODO-PLACEHOLDER ---- { // ---- NOTE-TODO-PLACEHOLDER ---- defaultValue: 0, @@ -135,6 +143,7 @@ export function mainModelMetaDataProfileReporter(): void { } // ---- NOTE-TODO-PLACEHOLDER ---- const labelColumnIndex: number = +args.labelColumnIndex; // ---- NOTE-TODO-PLACEHOLDER ---- const textColumnIndex: number = +args.textColumnIndex; + // ---- NOTE-TODO-PLACEHOLDER ---- const weightColumnIndex: number = +args.weightColumnIndex; // ---- NOTE-TODO-PLACEHOLDER ---- const linesToSkip: number = +args.linesToSkip; // ---- NOTE-TODO-PLACEHOLDER ---- Utility.debuggingLog( // ---- NOTE-TODO-PLACEHOLDER ---- `filename=${filename}`); @@ -151,6 +160,8 @@ export function mainModelMetaDataProfileReporter(): void { // ---- NOTE-TODO-PLACEHOLDER ---- Utility.debuggingLog( // ---- NOTE-TODO-PLACEHOLDER ---- `textColumnIndex=${textColumnIndex}`); // ---- NOTE-TODO-PLACEHOLDER ---- Utility.debuggingLog( + // ---- NOTE-TODO-PLACEHOLDER ---- `weightColumnIndex=${weightColumnIndex}`); + // ---- NOTE-TODO-PLACEHOLDER ---- Utility.debuggingLog( // ---- NOTE-TODO-PLACEHOLDER ---- `linesToSkip=${linesToSkip}`); // ----------------------------------------------------------------------- const modelMetaDataProfileReporter: ModelMetaDataProfileReporter = diff --git a/packages/dispatcher/src/model/evaluation/report/AppThresholdReporter.ts b/packages/dispatcher/src/model/evaluation/report/AppThresholdReporter.ts index 7e6e0ff85..1ca180885 100644 --- a/packages/dispatcher/src/model/evaluation/report/AppThresholdReporter.ts +++ b/packages/dispatcher/src/model/evaluation/report/AppThresholdReporter.ts @@ -16,6 +16,8 @@ import { IDictionaryStringIdGenericValue } from "../../../data_structure/IDictio import { Utility } from "../../../utility/Utility"; +import { DictionaryMapUtility } from "../../../data_structure/DictionaryMapUtility"; + export function mainThresholdReporter(): void { // ----------------------------------------------------------------------- const dateTimeBeginInString: string = (new Date()).toISOString(); @@ -33,12 +35,28 @@ export function mainThresholdReporter(): void { }, ); parser.addArgument( - ["-x", "--featurizerFilename"], + ["-si", "--scoreColumnBeginIndex"], { - help: "serialized featurizer file", + help: "score column begin index", required: true, }, ); + parser.addArgument( + ["-l", "--labelFilename"], + { + defaultValue: "", + help: "an input label file", + required: false, + }, + ); + parser.addArgument( + ["-x", "--featurizerFilename"], + { + defaultValue: "", + help: "serialized featurizer file, we can use the label information from a featurizer if provided", + required: false, + }, + ); parser.addArgument( ["-o", "--outputReportFilenamePrefix"], { @@ -53,6 +71,62 @@ export function mainThresholdReporter(): void { required: false, }, ); + parser.addArgument( + ["-li", "--labelColumnIndex"], + { + defaultValue: 0, + help: "label column index", + required: false, + }, + ); + parser.addArgument( + ["-ti", "--textColumnIndex"], + { + defaultValue: 1, + help: "text/utterance column index", + required: false, + }, + ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); + parser.addArgument( + ["-ii", "--identifierColumnIndex"], + { + defaultValue: -1, + help: "identifier column index", + required: false, + }, + ); + parser.addArgument( + ["-pli", "--predictedLabelColumnIndex"], + { + defaultValue: -1, + help: "predicted label column index", + required: false, + }, + ); + parser.addArgument( + ["-rti", "--revisedTextColumnIndex"], + { + defaultValue: -1, + help: "revised text/utterance column index", + required: false, + }, + ); + parser.addArgument( + ["-ls", "--lineIndexToStart"], + { + defaultValue: 0, + help: "number of lines to skip from the input file", + required: false, + }, + ); const parsedKnownArgs: any[] = parser.parseKnownArgs(); const args: any = parsedKnownArgs[0]; const unknownArgs: any = parsedKnownArgs[1]; @@ -72,10 +146,12 @@ export function mainThresholdReporter(): void { } const featurizerFilename: string = args.featurizerFilename; - if (!Utility.exists(featurizerFilename)) { - Utility.debuggingThrow( - `The input featurizer file ${featurizerFilename} does not exist! process.cwd()=${process.cwd()}`); - } + // ---- NOTE-MAY-NOT-NEED-A-FEATURIZER-FOR-labelMap ---- if (!Utility.exists(featurizerFilename)) { + // ---- NOTE-MAY-NOT-NEED-A-FEATURIZER-FOR-labelMap ---- Utility.debuggingThrow( + // ---- NOTE-MAY-NOT-NEED-A-FEATURIZER-FOR-labelMap ---- `The input featurizer file ${featurizerFilename}` + + // ---- NOTE-MAY-NOT-NEED-A-FEATURIZER-FOR-labelMap ---- ` does not exist! ` + + // ---- NOTE-MAY-NOT-NEED-A-FEATURIZER-FOR-labelMap ---- `process.cwd()=${process.cwd()}`); + // ---- NOTE-MAY-NOT-NEED-A-FEATURIZER-FOR-labelMap ---- } let outputReportFilenamePrefix: string = args.outputReportFilenamePrefix; if (Utility.isEmptyString(outputReportFilenamePrefix)) { outputReportFilenamePrefix = Utility.getFilenameWithoutExtension(scoreFilename); @@ -89,15 +165,64 @@ export function mainThresholdReporter(): void { Utility.debuggingLog( `outputReportFilenamePrefix=${outputReportFilenamePrefix}`); // ----------------------------------------------------------------------- + const labelFilename: string = + args.labelFilename; + Utility.debuggingLog( + `labelFilename=${labelFilename}`); + // ----------------------------------------------------------------------- + const labelColumnIndex: number = +args.labelColumnIndex; + const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; + const identifierColumnIndex: number = +args.identifierColumnIndex; + const scoreColumnBeginIndex: number = +args.scoreColumnBeginIndex; + const predictedLabelColumnIndex: number = +args.predictedLabelColumnIndex; + const revisedTextColumnIndex: number = +args.revisedTextColumnIndex; + const lineIndexToStart: number = +args.lineIndexToStart; + Utility.debuggingLog( + `labelColumnIndex=${labelColumnIndex}`); + Utility.debuggingLog( + `textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog( + `weightColumnIndex=${weightColumnIndex}`); + Utility.debuggingLog( + `identifierColumnIndex=${identifierColumnIndex}`); + Utility.debuggingLog( + `scoreColumnBeginIndex=${scoreColumnBeginIndex}`); + Utility.debuggingLog( + `predictedLabelColumnIndex=${predictedLabelColumnIndex}`); + Utility.debuggingLog( + `revisedTextColumnIndex=${revisedTextColumnIndex}`); + Utility.debuggingLog( + `lineIndexToStart=${lineIndexToStart}`); + // ----------------------------------------------------------------------- + let labels: string[] = []; + let labelMap: { [id: string]: number; } = {}; + if (!Utility.isEmptyString(labelFilename)) { + const labelsAndLabelMap: { "stringArray": string[], "stringMap": { [id: string]: number; } } = + DictionaryMapUtility.buildStringIdNumberValueDictionaryFromUniqueStringArrayFile(labelFilename); + labels = labelsAndLabelMap.stringArray; + labelMap = labelsAndLabelMap.stringMap; + } + // ----------------------------------------------------------------------- const thresholdReporter: ThresholdReporter = new ThresholdReporter( "", featurizerFilename, null, null, - [], - {}); + labels, + labelMap); // ----------------------------------------------------------------------- + thresholdReporter.loadScoreFileAndPopulate( + scoreFilename, + labelColumnIndex, + textColumnIndex, + weightColumnIndex, + scoreColumnBeginIndex, + identifierColumnIndex, + predictedLabelColumnIndex, + revisedTextColumnIndex, + lineIndexToStart); const evaluationDataArraysReportResult: { "outputEvaluationReportDataArrays": IDictionaryStringIdGenericArrays, "outputFilenames": string[], diff --git a/packages/dispatcher/src/model/evaluation/report/ThresholdReporter.ts b/packages/dispatcher/src/model/evaluation/report/ThresholdReporter.ts index e878d9fa4..f4d7d5e95 100644 --- a/packages/dispatcher/src/model/evaluation/report/ThresholdReporter.ts +++ b/packages/dispatcher/src/model/evaluation/report/ThresholdReporter.ts @@ -25,6 +25,7 @@ export class ThresholdReporter extends AbstractBaseModelFeaturizerEvaluator { protected instanceGroudTruthLabelIds: number[] = []; protected instanceFeatureTexts: string[] = []; protected instanceIdentifiers: string[] = []; + protected instanceWeights: number[] = []; protected targetLabelBatchesToReport: string[][] = []; protected numberOfLabelsPerBatch: number = 8; @@ -154,11 +155,13 @@ export class ThresholdReporter extends AbstractBaseModelFeaturizerEvaluator { instancePredictedScoreArray: number[], instanceGroudTruthLabelId: number, instanceFeatureText: string, - instanceIdentifier: string): void { + instanceIdentifier: string, + instanceWeight: number): void { this.instancePredictedScoreArrays.push(instancePredictedScoreArray); this.instanceGroudTruthLabelIds.push(instanceGroudTruthLabelId); this.instanceFeatureTexts.push(instanceFeatureText); this.instanceIdentifiers.push(instanceIdentifier); + this.instanceWeights.push(instanceWeight); } public reportToDataArrays(): string[][] { @@ -341,6 +344,9 @@ export class ThresholdReporter extends AbstractBaseModelFeaturizerEvaluator { if (Utility.isEmptyStringArray(this.instanceIdentifiers)) { Utility.debuggingThrow("'this.instanceIdentifiers' array is empty"); } + if (Utility.isEmptyNumberArray(this.instanceWeights)) { + Utility.debuggingThrow("'this.instanceWeights' array is empty"); + } const numberInstances: number = this.getNumberInstances(); if (this.instanceGroudTruthLabelIds.length !== numberInstances) { Utility.debuggingThrow( @@ -354,38 +360,52 @@ export class ThresholdReporter extends AbstractBaseModelFeaturizerEvaluator { Utility.debuggingThrow( `this.instanceIdentifiers.length|${this.instanceIdentifiers.length}|!==numberInstances|${numberInstances}|`); } + if (this.instanceWeights.length !== numberInstances) { + Utility.debuggingThrow( + `this.instanceWeights.length|${this.instanceWeights.length}|!==numberInstances|${numberInstances}|`); + } } public loadScoreFileAndPopulate( scoreFilename: string, labelColumnIndex: number = 0, textColumnIndex: number = 1, - scoreColumnBeginIndex: number = 2, - // numberOfScoreColumns: number = 0, weightColumnIndex: number = -1, + scoreColumnBeginIndex: number = 2, + identifierColumnIndex: number = -1, + predictedLabelColumnIndex: number = -1, + revisedTextColumnIndex: number = -1, lineIndexToStart: number = 0): void { const scoreDataStructure: { - "intents": string[], - "utterances": string[], + "labels": string[], + "texts": string[], "weights": number[], - "scoreArrays": number[][] } = - Utility.loadLabelTextScoreContent( + "identifiers": string[], + "scoreArrays": number[][], + "predictedLabels": string[], + "revisedTexts": string[] } = + Utility.loadLabelTextScoreFile( scoreFilename, labelColumnIndex, textColumnIndex, + weightColumnIndex, scoreColumnBeginIndex, this.getNumberLabels(), - weightColumnIndex, + identifierColumnIndex, + predictedLabelColumnIndex, + revisedTextColumnIndex, lineIndexToStart); const labelMap: { [id: string]: number; } = this.getLabelMap(); - const numberInstances: number = scoreDataStructure.intents.length; + const numberInstances: number = scoreDataStructure.labels.length; for (let i = 0; i < numberInstances; i++) { - const label: string = scoreDataStructure.intents[i]; - const text: string = scoreDataStructure.utterances[i]; + const label: string = scoreDataStructure.labels[i]; + const text: string = scoreDataStructure.texts[i]; + const identifier: string = scoreDataStructure.identifiers[i]; const scoreArray: number[] = scoreDataStructure.scoreArrays[i]; const labelId: number = labelMap[label]; - this.addInstance(scoreArray, labelId, text, i.toString()); + const weight: number = scoreDataStructure.weights[i]; + this.addInstance(scoreArray, labelId, text, identifier, weight); } } } diff --git a/packages/dispatcher/src/model/evaluation/test/AppTester.ts b/packages/dispatcher/src/model/evaluation/test/AppTester.ts index dfadadb53..a778771bd 100644 --- a/packages/dispatcher/src/model/evaluation/test/AppTester.ts +++ b/packages/dispatcher/src/model/evaluation/test/AppTester.ts @@ -91,11 +91,19 @@ export async function mainTester(): Promise { parser.addArgument( ["-ti", "--textColumnIndex"], { - defaultValue: 0, + defaultValue: 1, help: "text/utterance column index", required: false, }, ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); parser.addArgument( ["-ls", "--linesToSkip"], { @@ -154,11 +162,14 @@ export async function mainTester(): Promise { // ----------------------------------------------------------------------- const labelColumnIndex: number = +args.labelColumnIndex; const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; const linesToSkip: number = +args.linesToSkip; Utility.debuggingLog( `labelColumnIndex=${labelColumnIndex}`); Utility.debuggingLog( `textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog( + `weightColumnIndex=${weightColumnIndex}`); Utility.debuggingLog( `linesToSkip=${linesToSkip}`); // ----------------------------------------------------------------------- @@ -171,11 +182,13 @@ export async function mainTester(): Promise { // Utility.debuggingLog( // `featurizer.getLabelMap()=${Utility.JSONstringify(featurizer.getLabelMap())}`); // ----------------------------------------------------------------------- - let intentsUtterances: { + let intentsUtterancesWeights: { "intents": string[], - "utterances": string[] } = { + "utterances": string[], + "weights": number[] } = { intents: [], - utterances: [] }; + utterances: [], + weights: [] }; let intentLabelIndexArray: number[] = []; let utteranceFeatureIndexArrays: number[][] = []; const data: Data = await DataUtility.LoadData( @@ -185,14 +198,16 @@ export async function mainTester(): Promise { filetype, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip); - intentsUtterances = data.getIntentsUtterances(); + intentsUtterancesWeights = data.getIntentsUtterancesWeights(); intentLabelIndexArray = data.getIntentLabelIndexArray(); utteranceFeatureIndexArrays = data.getUtteranceFeatureIndexArrays(); // ----------------------------------------------------------------------- tester.test( - intentsUtterances.intents, - intentsUtterances.utterances, + intentsUtterancesWeights.intents, + intentsUtterancesWeights.utterances, + intentsUtterancesWeights.weights, intentLabelIndexArray, utteranceFeatureIndexArrays); // ----------------------------------------------------------------------- diff --git a/packages/dispatcher/src/model/evaluation/test/Tester.ts b/packages/dispatcher/src/model/evaluation/test/Tester.ts index d05236d86..b923ab57b 100644 --- a/packages/dispatcher/src/model/evaluation/test/Tester.ts +++ b/packages/dispatcher/src/model/evaluation/test/Tester.ts @@ -40,6 +40,7 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { "thresholdReporterTest": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } = { @@ -47,6 +48,7 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { thresholdReporterTest: new ThresholdReporter("", "", null, null, [], {}), predictionLabels: [], predictionLabelIndexes: [], + instanceIndexes: [], groundTruthLabels: [], groundTruthLabelIndexes: [], predictions: [] }; @@ -62,6 +64,7 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { "thresholdReporterTest": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } { @@ -73,13 +76,15 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { { const predictionLabels: string[] = this.testResultCachedAfterTest.predictionLabels; const predictionLabelIndexes: number[] = this.testResultCachedAfterTest.predictionLabelIndexes; - const groundTruthLabels: string[] = this.testResultCachedAfterTest.predictionLabels; - const groundTruthLabelIndexes: number[] = this.testResultCachedAfterTest.predictionLabelIndexes; + const instanceIndexes: number[] = this.testResultCachedAfterTest.instanceIndexes; + const groundTruthLabels: string[] = this.testResultCachedAfterTest.groundTruthLabels; + const groundTruthLabelIndexes: number[] = this.testResultCachedAfterTest.groundTruthLabelIndexes; const predictions: number[][] = this.testResultCachedAfterTest.predictions; const outputEvaluationReportDataArraysScoreRecords: string[][] = []; for (let index: number = 0; index < this.intentsCachedAfterTest.length; index++) { - const intent: string = this.intentsCachedAfterTest[index]; - const utterance: string = this.utterancesCachedAfterTest[index]; + const instanceIndex: number = instanceIndexes[index]; + const intent: string = this.intentsCachedAfterTest[instanceIndex]; + const utterance: string = this.utterancesCachedAfterTest[instanceIndex]; const groundTruthLabel: string = groundTruthLabels[index]; const groundTruthLabelIndex: number = groundTruthLabelIndexes[index]; const predictionLabel: string = predictionLabels[index]; @@ -87,6 +92,7 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { const outputEvaluationReportDataArraysScoreRecord: string[] = []; outputEvaluationReportDataArraysScoreRecord.push(intent); outputEvaluationReportDataArraysScoreRecord.push(utterance); + outputEvaluationReportDataArraysScoreRecord.push(instanceIndex.toString()); outputEvaluationReportDataArraysScoreRecord.push(groundTruthLabel); outputEvaluationReportDataArraysScoreRecord.push(groundTruthLabelIndex.toString()); outputEvaluationReportDataArraysScoreRecord.push(predictionLabel); @@ -119,7 +125,7 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { } { let outputFilename: string = - `${outputReportFilenamePrefix}_TestScoreRecords.json`; + `${outputReportFilenamePrefix}_TestScoreRecords.txt`; outputFilename = Utility.storeDataArraysToTsvFile( outputFilename, outputEvaluationReportDataArrays.TestScoreRecords, @@ -204,12 +210,14 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { public test( intents: string[], utterances: string[], + weights: number[], labelIndexArray: number[], featureIndexArrays: number[][]): { "confusionMatrixTest": ConfusionMatrix, "thresholdReporterTest": ThresholdReporter, "predictionLabels": string[], "predictionLabelIndexes": number[], + "instanceIndexes": number[], "groundTruthLabels": string[], "groundTruthLabelIndexes": number[], "predictions": number[][] } { @@ -245,6 +253,14 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { `utterances.length|${utterances.length}|!==` + `numberInstances|${numberInstances}|`); } + const isWeightsArrayNotEmpty: boolean = !Utility.isEmptyNumberArray(weights); + if (isWeightsArrayNotEmpty) { + if (weights.length !== numberInstances) { + Utility.debuggingThrow( + `weights.length|${weights.length}|!==` + + `numberInstances|${numberInstances}|`); + } + } if (labelIndexArray.length !== numberInstances) { Utility.debuggingThrow( `labelIndexArray.length|${labelIndexArray.length}|!==` + @@ -261,6 +277,8 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { new Array(numberInstances); const predictionLabelIndexes: number[] = new Array(numberInstances); + const instanceIndexes: number[] = + new Array(numberInstances); const groundTruthLabels: string[] = new Array(numberInstances); const groundTruthLabelIndexes: number[] = @@ -269,11 +287,16 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { // --------------------------------------------------------------- // const intent: string = intents[index]; const utterance: string = utterances[index]; + let weight: number = 1; + if (isWeightsArrayNotEmpty) { + weight = weights[index]; + } const labelIndex: number = labelIndexArray[index]; // const featureIndexArray: number[] = featureIndexArrays[index]; const prediction: number[] = predictions[index]; // --------------------------------------------------------------- const groundTruthLabel: string = labels[labelIndex]; + instanceIndexes[index] = index; groundTruthLabels[index] = groundTruthLabel; groundTruthLabelIndexes[index] = labelIndex; // --------------------------------------------------------------- @@ -289,12 +312,14 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { predictionLabel; confusionMatrixTest.addInstanceByLabelIndex( labelIndex, - predictionLabelIndex); + predictionLabelIndex, + weight); thresholdReporterTest.addInstance( prediction, labelIndex, utterance, - `${index}`); + `${index}`, + weight); // --------------------------------------------------------------- } // ------------------------------------------------------------------- @@ -303,6 +328,7 @@ export class Tester extends AbstractBaseModelFeaturizerEvaluator { thresholdReporterTest, predictionLabels, predictionLabelIndexes, + instanceIndexes, groundTruthLabels, groundTruthLabelIndexes, predictions }; diff --git a/packages/dispatcher/src/model/language_understanding/featurizer/NgramSubwordFeaturizer.ts b/packages/dispatcher/src/model/language_understanding/featurizer/NgramSubwordFeaturizer.ts index ec6ad14fb..d9755f6e6 100644 --- a/packages/dispatcher/src/model/language_understanding/featurizer/NgramSubwordFeaturizer.ts +++ b/packages/dispatcher/src/model/language_understanding/featurizer/NgramSubwordFeaturizer.ts @@ -18,8 +18,11 @@ export class NgramSubwordFeaturizer { protected toRemoveEmptyElements: boolean = true; protected splitDelimiter: string = " "; - protected intentsUtterances: - { "intents": string[], "utterances": string[] } = { intents: [], utterances: [] }; + protected intentsUtterancesWeights: + { "intents": string[], "utterances": string[], "weights": number[] } = { + intents: [], + utterances: [], + weights: [] }; protected labels: string[] = []; protected labelMap: { [id: string]: number; } = {}; @@ -44,8 +47,8 @@ export class NgramSubwordFeaturizer { this.numberHashingFeaturesSetting = numberHashingFeaturesSetting; } - public getIntentsUtterances(): { "intents": string[], "utterances": string[] } { - return this.intentsUtterances; + public getIntentsUtterancesWeights(): { "intents": string[], "utterances": string[], "weights": number[] } { + return this.intentsUtterancesWeights; } public getLabels(): string[] { @@ -154,10 +157,11 @@ export class NgramSubwordFeaturizer { } public createIntentUtteranceSparseIndexArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }): + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): { "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArray: number[] = intents.map((intent) => this.getLabelIndex(intent), this); const utteranceFeatureIndexArrays: number[][] = @@ -192,12 +196,13 @@ export class NgramSubwordFeaturizer { } public createIntentUtteranceMiniBatchingSparseIndexArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }, + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }, miniBatchIndexBegin: number = 0, miniBatchIndexEnd: number = 0): { "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArray: number[] = intents .slice(miniBatchIndexBegin, miniBatchIndexEnd) .map((intent) => this.getLabelIndex(intent), this); @@ -231,10 +236,11 @@ export class NgramSubwordFeaturizer { } public createIntentUtteranceHashingSparseIndexArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }): + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): { "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArray: number[] = intents.map((intent) => this.getLabelIndex(intent), this); const utteranceFeatureIndexArrays: number[][] = @@ -350,10 +356,11 @@ export class NgramSubwordFeaturizer { } public createIntentUtteranceOneHotEncoderBooleanArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }): + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): { "intentLabelIndexArrays": boolean[][], "utteranceFeatureIndexArrays": boolean[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArrays: boolean[][] = intents.map((intent) => this.createLabelOneHotEncoderBooleanArray(intent), this); const utteranceFeatureIndexArrays: boolean[][] = @@ -361,10 +368,11 @@ export class NgramSubwordFeaturizer { return { intentLabelIndexArrays, utteranceFeatureIndexArrays }; } public createIntentUtteranceOneHotEncoderNumberArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }): + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): { "intentLabelIndexArrays": number[][], "utteranceFeatureIndexArrays": number[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArrays: number[][] = intents.map((intent) => this.createLabelOneHotEncoderNumberArray(intent), this); const utteranceFeatureIndexArrays: number[][] = @@ -422,12 +430,13 @@ export class NgramSubwordFeaturizer { } public createIntentUtteranceMiniBatchingOneHotEncoderBooleanArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }, + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }, miniBatchIndexBegin: number = 0, miniBatchIndexEnd: number = 0): { "intentLabelIndexArrays": boolean[][], "utteranceFeatureIndexArrays": boolean[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArrays: boolean[][] = intents .slice(miniBatchIndexBegin, miniBatchIndexEnd) .map((intent) => this.createLabelOneHotEncoderBooleanArray(intent), this); @@ -437,12 +446,13 @@ export class NgramSubwordFeaturizer { return { intentLabelIndexArrays, utteranceFeatureIndexArrays }; } public createIntentUtteranceMiniBatchingOneHotEncoderNumberArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }, + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }, miniBatchIndexBegin: number = 0, miniBatchIndexEnd: number = 0): { "intentLabelIndexArrays": number[][], "utteranceFeatureIndexArrays": number[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArrays: number[][] = intents .slice(miniBatchIndexBegin, miniBatchIndexEnd) .map((intent) => this.createLabelOneHotEncoderNumberArray(intent), this); @@ -493,10 +503,11 @@ export class NgramSubwordFeaturizer { } public createIntentUtteranceHashingOneHotEncoderBooleanArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }): + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): { "intentLabelIndexArrays": boolean[][], "utteranceFeatureIndexArrays": boolean[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArrays: boolean[][] = intents.map((intent) => this.createLabelOneHotEncoderBooleanArray(intent), this); const utteranceFeatureIndexArrays: boolean[][] = @@ -504,10 +515,11 @@ export class NgramSubwordFeaturizer { return { intentLabelIndexArrays, utteranceFeatureIndexArrays }; } public createIntentUtteranceHashingOneHotEncoderNumberArrays( - intentsUtterances: { "intents": string[], "utterances": string[] }): + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): { "intentLabelIndexArrays": number[][], "utteranceFeatureIndexArrays": number[][] } { - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; + const intents: string[] = intentsUtterancesWeights.intents; + const utterances: string[] = intentsUtterancesWeights.utterances; + const weights: number[] = intentsUtterancesWeights.weights; const intentLabelIndexArrays: number[][] = intents.map((intent) => this.createLabelOneHotEncoderNumberArray(intent), this); const utteranceFeatureIndexArrays: number[][] = @@ -530,6 +542,10 @@ export class NgramSubwordFeaturizer { } public split(input: string): string[] { + return this.splitRaw( + input).map((x: string) => x.trim()); + } + public splitRaw(input: string): string[] { return Utility.splitByPunctuation( input, this.splitDelimiter, @@ -550,13 +566,13 @@ export class NgramSubwordFeaturizer { } public resetLabelFeatureMaps( - intentsUtterances: { "intents": string[], "utterances": string[] }): void { + intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] }): void { // ------------------------------------------------------------------- - this.intentsUtterances = - intentsUtterances; + this.intentsUtterancesWeights = + intentsUtterancesWeights; // ------------------------------------------------------------------- const intents: string[] = - intentsUtterances.intents; + intentsUtterancesWeights.intents; const intentLabels: { "stringArray": string[], "stringMap": { [id: string]: number; } } = DictionaryMapUtility.buildStringIdNumberValueDictionaryFromStringArray(intents); this.labels = @@ -565,7 +581,7 @@ export class NgramSubwordFeaturizer { intentLabels.stringMap; // ------------------------------------------------------------------- const utterances: string[] = - intentsUtterances.utterances; + intentsUtterancesWeights.utterances; const featureArray: string[][] = utterances.map((text) => this.featurize(text)); // ---- NOTE-FOR-REFERENCE ---- let featureArrayFlattened: string[] = @@ -626,7 +642,7 @@ export class NgramSubwordFeaturizer { this.toRemovePunctuations = deserialized.toRemovePunctuations; this.toRemoveEmptyElements = deserialized.toRemoveEmptyElements; this.splitDelimiter = deserialized.splitDelimiter; - this.intentsUtterances = deserialized.intentsUtterances; + this.intentsUtterancesWeights = deserialized.intentsUtterancesWeights; this.labels = deserialized.labels; this.labelMap = deserialized.labelMap; this.features = deserialized.features; diff --git a/packages/dispatcher/src/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.ts b/packages/dispatcher/src/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.ts index 06a30650d..b78158a71 100644 --- a/packages/dispatcher/src/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.ts +++ b/packages/dispatcher/src/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.ts @@ -45,6 +45,7 @@ export class AppAutoActiveLearner { * @param data - a Data object whose label and text connect are used as input. * @param labelColumnIndex - label/intent column index. * @param textColumnIndex - text/utterace column index. + * @param weightColumnIndex - weight column index. * @param linesToSkip - number of header lines skipped before processing each line as an instance record. * @param doBootstrapResampling - boolean flag to activate bootstrap resampling (BRS) logic or not. * @param brsDistribution - explicit distribution to control bootstrap resampling process @@ -65,6 +66,7 @@ export class AppAutoActiveLearner { data: Data, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, doBootstrapResampling: boolean = AppAutoActiveLearner.defaultDoBootstrapResampling, @@ -126,6 +128,7 @@ export class AppAutoActiveLearner { data, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, samplingIndexArray, false); @@ -247,6 +250,7 @@ export class AppAutoActiveLearner { data, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, new Set(aalSampledInstanceIndexArray), false); @@ -349,6 +353,7 @@ export class AppAutoActiveLearner { luData, -1, // ---- NOTE-NO-NEED-FOR-LuData ---- labelColumnIndex, -1, // ---- NOTE-NO-NEED-FOR-LuData ---- textColumnIndex, + -1, // ---- NOTE-NO-NEED-FOR-LuData ---- weightColumnIndex, -1, // ---- NOTE-NO-NEED-FOR-LuData ---- linesToSkip, samplingIndexArray, false); @@ -488,6 +493,7 @@ export class AppAutoActiveLearner { * @param columnarContent - content of a TSV columnar file in string form as input. * @param labelColumnIndex - label/intent column index. * @param textColumnIndex - text/utterace column index. + * @param weightColumnIndex - weight column index. * @param linesToSkip - number of header lines skipped before processing each line as an instance record. * @param doBootstrapResampling - boolean flag to activate bootstrap resampling (BRS) logic or not. * @param brsDistribution - explicit distribution to control bootstrap resampling process @@ -508,6 +514,7 @@ export class AppAutoActiveLearner { columnarContent: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number, doBootstrapResampling: boolean = AppAutoActiveLearner.defaultDoBootstrapResampling, @@ -549,6 +556,7 @@ export class AppAutoActiveLearner { new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); // ------------------------------------------------------------------- @@ -578,6 +586,7 @@ export class AppAutoActiveLearner { columnarData, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, samplingIndexArray, false); @@ -700,6 +709,7 @@ export class AppAutoActiveLearner { columnarData, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, new Set(aalSampledInstanceIndexArray), false); @@ -918,11 +928,19 @@ export class AppAutoActiveLearner { parser.addArgument( ["-ti", "--textColumnIndex"], { - defaultValue: 0, + defaultValue: 1, help: "text/utterance column index", required: false, }, ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); parser.addArgument( ["-ls", "--linesToSkip"], { @@ -1025,11 +1043,14 @@ export class AppAutoActiveLearner { // ------------------------------------------------------------------- const labelColumnIndex: number = +args.labelColumnIndex; const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; const linesToSkip: number = +args.linesToSkip; Utility.debuggingLog( `labelColumnIndex=${labelColumnIndex}`); Utility.debuggingLog( `textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog( + `weightColumnIndex=${weightColumnIndex}`); Utility.debuggingLog( `linesToSkip=${linesToSkip}`); // ------------------------------------------------------------------- @@ -1049,11 +1070,13 @@ export class AppAutoActiveLearner { `bootstrapResamplingDistributionFileLinesToSkip=` + `${bootstrapResamplingDistributionFileLinesToSkip}`); // ------------------------------------------------------------------- - let intentsUtterances: { + let intentsUtterancesWeights: { "intents": string[], - "utterances": string[] } = { + "utterances": string[], + "weights": number[] } = { intents: [], - utterances: [] }; + utterances: [], + weights: [] }; let intentLabelIndexArray: number[] = []; let utteranceFeatureIndexArrays: number[][] = []; const data: Data = await DataUtility.LoadData( @@ -1063,8 +1086,9 @@ export class AppAutoActiveLearner { filetype, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip); - intentsUtterances = data.getIntentsUtterances(); + intentsUtterancesWeights = data.getIntentsUtterancesWeights(); intentLabelIndexArray = data.getIntentLabelIndexArray(); utteranceFeatureIndexArrays = data.getUtteranceFeatureIndexArrays(); // ------------------------------------------------------------------- @@ -1096,6 +1120,7 @@ export class AppAutoActiveLearner { data, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, doBootstrapResampling, bootstrapResamplingDistribution, diff --git a/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/AppSoftmaxRegressionSparse.ts b/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/AppSoftmaxRegressionSparse.ts index 7054dcc0f..fd747c24d 100644 --- a/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/AppSoftmaxRegressionSparse.ts +++ b/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/AppSoftmaxRegressionSparse.ts @@ -28,6 +28,7 @@ export class AppSoftmaxRegressionSparse { testDatasetFilename: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, lineIndexToStart: number, epochs: number = AppSoftmaxRegressionSparse.defaultEpochs, miniBatchSize: number = AppSoftmaxRegressionSparse.defaultMiniBatchSize, @@ -52,9 +53,10 @@ export class AppSoftmaxRegressionSparse { trainDatasetFilename, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart); - const intentsUtterances: { "intents": string[], "utterances": string[] } = - featurizer.getIntentsUtterances(); + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] } = + featurizer.getIntentsUtterancesWeights(); const labels: string[] = featurizer.getLabels(); const labelMap: { [id: string]: number; } = @@ -75,8 +77,9 @@ export class AppSoftmaxRegressionSparse { l2Regularization, lossEarlyStopRatio); // ------------------------------------------------------------------- - // const inputIntents: string[] = intentsUtterances.intents; - // const inputUtterances: string[] = intentsUtterances.utterances; + // const inputIntents: string[] = intentsUtterancesWeights.intents; + // const inputUtterances: string[] = intentsUtterancesWeights.utterances; + // const inputWeights: number[] = intentsUtterancesWeights.weights; // const numberInstances: number = inputIntents.length; { // --------------------------------------------------------------- @@ -84,7 +87,7 @@ export class AppSoftmaxRegressionSparse { // ----------------------------------------------------------- const intentUtteranceSparseIndexArrays = featurizer.createIntentUtteranceSparseIndexArrays( - intentsUtterances); + intentsUtterancesWeights); const intentLabelIndexArray: number[] = intentUtteranceSparseIndexArrays.intentLabelIndexArray; const utteranceFeatureIndexArrays: number[][] = @@ -120,6 +123,7 @@ export class AppSoftmaxRegressionSparse { testDatasetFilename, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart); // ------------------------------------------------------------------- const learner: SoftmaxRegressionSparse = diff --git a/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/UtilityLearner.ts b/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/UtilityLearner.ts index be277beed..06ca9b3b4 100644 --- a/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/UtilityLearner.ts +++ b/packages/dispatcher/src/model/supervised/classifier/neural_network/learner/UtilityLearner.ts @@ -19,37 +19,41 @@ export class LearnerUtility { testDatasetFilename: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, lineIndexToStart: number) { // ------------------------------------------------------------------- const labels: string[] = featurizer.getLabels(); // ------------------------------------------------------------------- - const intentsUtterancesDev: { "intents": string[], "utterances": string[] } = + const intentsUtterancesWeightsDev: { "intents": string[], "texts": string[], "weights": number[] } = LearnerUtility.exampleFunctionLoadTestDataset( testDatasetFilename, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart); const intents: string[] = - intentsUtterancesDev.intents; - const utterances: string[] = - intentsUtterancesDev.utterances; + intentsUtterancesWeightsDev.intents; + const texts: string[] = + intentsUtterancesWeightsDev.texts; + // const weights: string[] = + // intentsUtterancesWeightsDev.weights; const numberIntentUtterancesDev: number = intents.length; let countPredictionsCorrect = 0; for (let i: number = 0; i < numberIntentUtterancesDev; i++) { const intent: string = intents[i]; - const utterance: string = - utterances[i]; - const utteranceFeatureIndexArray: string[] = + const text: string = + texts[i]; + const textFeatureIndexArray: string[] = new Array(1); - utteranceFeatureIndexArray[0] = utterance; - const utteranceFeatures: number[][] = + textFeatureIndexArray[0] = text; + const textFeatures: number[][] = featurizer.createFeatureSparseIndexArrays( - utteranceFeatureIndexArray); + textFeatureIndexArray); const predictions: number[][] = - model.predict(utteranceFeatures); + model.predict(textFeatures); const predictionsDataArray: number[][] = predictions; const predictionLabelIndexMax: { "indexMax": number, "max": number } = @@ -84,37 +88,41 @@ export class LearnerUtility { testDatasetFilename: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, lineIndexToStart: number) { // ------------------------------------------------------------------- const labels: string[] = featurizer.getLabels(); // ------------------------------------------------------------------- - const intentsUtterancesDev: { "intents": string[], "utterances": string[] } = + const intentsUtterancesWeightsDev: { "intents": string[], "texts": string[], "weights": number[] } = LearnerUtility.exampleFunctionLoadTestDataset( testDatasetFilename, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart); const intents: string[] = - intentsUtterancesDev.intents; - const utterances: string[] = - intentsUtterancesDev.utterances; + intentsUtterancesWeightsDev.intents; + const texts: string[] = + intentsUtterancesWeightsDev.texts; + const weights: number[] = + intentsUtterancesWeightsDev.weights; const numberIntentUtterancesDev: number = intents.length; let countPredictionsCorrect = 0; for (let i: number = 0; i < numberIntentUtterancesDev; i++) { const intent: string = intents[i]; - const utterance: string = - utterances[i]; - const utteranceFeatureIndexArray: string[] = + const text: string = + texts[i]; + const textFeatureIndexArray: string[] = new Array(1); - utteranceFeatureIndexArray[0] = utterance; - const utteranceFeatures: number[][] = + textFeatureIndexArray[0] = text; + const textFeatures: number[][] = featurizer.createFeatureHashingSparseIndexArrays( - utteranceFeatureIndexArray); + textFeatureIndexArray); const predictions: number[][] = - model.predict(utteranceFeatures); + model.predict(textFeatures); const predictionsDataArray: number[][] = predictions; const predictionLabelIndexMax: { "indexMax": number, "max": number } = @@ -148,6 +156,7 @@ export class LearnerUtility { filename: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, lineIndexToStart: number, subwordNgramBegin: number = 3, subwordNgramEnd: number = 4, @@ -159,11 +168,12 @@ export class LearnerUtility { Utility.debuggingThrow( `The input dataset file ${filename} does not exist! process.cwd()=${process.cwd()}`); } - const intentsUtterances: { "intents": string[], "utterances": string[] } = - Utility.loadLabelTextColumnarFile( + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] } = + Utility.loadLabelUtteranceColumnarFile( filename, // ---- filename: string, labelColumnIndex, // ---- labelColumnIndex: number = 0, textColumnIndex, // ---- textColumnIndex: number = 1, + weightColumnIndex, // ---- weightColumnIndex: number = -1, lineIndexToStart, // ---- lineIndexToStart: number = 0, "\t", // ---- columnDelimiter: string = "\t", "\n", // ---- rowDelimiter: string = "\n", @@ -178,7 +188,7 @@ export class LearnerUtility { toRemoveEmptyElements, splitDelimiter, numberHashingFeaturesSetting); - featurizer.resetLabelFeatureMaps(intentsUtterances); + featurizer.resetLabelFeatureMaps(intentsUtterancesWeights); return featurizer; } @@ -186,23 +196,25 @@ export class LearnerUtility { filename: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, lineIndexToStart: number): - { "intents": string[], "utterances": string[] } { + { "intents": string[], "texts": string[], "weights": number[] } { if (!Utility.exists(filename)) { Utility.debuggingThrow( `The input dataset file ${filename} does not exist! process.cwd()=${process.cwd()}`); } - const intentsUtterances: { "intents": string[], "utterances": string[] } = + const intentsUtterancesWeights: { "intents": string[], "texts": string[], "weights": number[] } = Utility.loadLabelTextColumnarFile( filename, // ---- filename: string, labelColumnIndex, // ---- labelColumnIndex: number = 0, textColumnIndex, // ---- textColumnIndex: number = 1, + weightColumnIndex, // ---- weightColumnIndex: number = -1, lineIndexToStart, // ---- lineIndexToStart: number = 0, "\t", // ---- columnDelimiter: string = "\t", "\n", // ---- rowDelimiter: string = "\n", "utf8", // ---- encoding: string = "utf8", -1, // ---- lineIndexToEnd: number = -1 ); - return intentsUtterances; + return intentsUtterancesWeights; } } diff --git a/packages/dispatcher/src/utility/AppUtility.ts b/packages/dispatcher/src/utility/AppUtility.ts index 80444745e..a3d188750 100644 --- a/packages/dispatcher/src/utility/AppUtility.ts +++ b/packages/dispatcher/src/utility/AppUtility.ts @@ -13,11 +13,13 @@ export function exampleFunctionUtilityWithFilename( filename: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number): void { // ----------------------------------------------------------------------- Utility.debuggingLog(`filename=${filename}`); Utility.debuggingLog(`labelColumnIndex=${labelColumnIndex}`); Utility.debuggingLog(`textColumnIndex=${textColumnIndex}`); + Utility.debuggingLog(`weightColumnIndex=${weightColumnIndex}`); Utility.debuggingLog(`linesToSkip=${linesToSkip}`); // ----------------------------------------------------------------------- const labels: string[] = [ "label0", "label1", "label2" ]; @@ -27,20 +29,22 @@ export function exampleFunctionUtilityWithFilename( labelMap.label2 = 2; DictionaryMapUtility.validateStringArrayAndStringIdNumberValueDictionary(labels, labelMap); // ----------------------------------------------------------------------- - const intentsUtterances: { "intents": string[], "utterances": string[] } = + const intentsUtterancesWeights: { "intents": string[], "texts": string[], "weights": number[] } = Utility.loadLabelTextColumnarFile( - filename, // ---- filename: string, - labelColumnIndex, // ---- labelColumnIndex: number = 0, - textColumnIndex, // ---- textColumnIndex: number = 1, - linesToSkip, // ---- lineIndexToStart: number = 0, - "\t", // ---- columnDelimiter: string = "\t", - "\n", // ---- rowDelimiter: string = "\n", - "utf8", // ---- encoding: string = "utf8", - -1, // ---- lineIndexToEnd: number = -1 + filename, // ---- filename: string, + labelColumnIndex, // ---- labelColumnIndex: number = 0, + textColumnIndex, // ---- textColumnIndex: number = 1, + weightColumnIndex, // ---- weightColumnIndex: number = -1, + linesToSkip, // ---- lineIndexToStart: number = 0, + "\t", // ---- columnDelimiter: string = "\t", + "\n", // ---- rowDelimiter: string = "\n", + "utf8", // ---- encoding: string = "utf8", + -1, // ---- lineIndexToEnd: number = -1 ); - const intents: string[] = intentsUtterances.intents; - const utterances: string[] = intentsUtterances.utterances; - Utility.debuggingLog(`intents.length=${intents.length}, utterances.length=${utterances.length}`); + const intents: string[] = intentsUtterancesWeights.intents; + const texts: string[] = intentsUtterancesWeights.texts; + const weights: number[] = intentsUtterancesWeights.weights; + Utility.debuggingLog(`intents.length=${intents.length}, texts.length=${texts.length}`); // ----------------------------------------------------------------------- const filenameMd5: string = Utility.getStringMd5Hash(filename) as string; Utility.debuggingLog(`filenameMd5=${filenameMd5}`); @@ -86,6 +90,14 @@ export function exampleFunctionUtility(): void { required: false, }, ); + parser.addArgument( + ["-wi", "--weightColumnIndex"], + { + defaultValue: -1, + help: "weight column index", + required: false, + }, + ); parser.addArgument( ["-ls", "--linesToSkip"], { @@ -108,11 +120,13 @@ export function exampleFunctionUtility(): void { const filename: string = args.filename; const labelColumnIndex: number = +args.labelColumnIndex; const textColumnIndex: number = +args.textColumnIndex; + const weightColumnIndex: number = +args.weightColumnIndex; const linesToSkip: number = +args.linesToSkip; exampleFunctionUtilityWithFilename( filename, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip); // ----------------------------------------------------------------------- } diff --git a/packages/dispatcher/src/utility/utility.ts b/packages/dispatcher/src/utility/utility.ts index 98f8a8533..384b8a669 100644 --- a/packages/dispatcher/src/utility/utility.ts +++ b/packages/dispatcher/src/utility/utility.ts @@ -309,20 +309,20 @@ export class Utility { } public static mapToJsonSerialization(map: Map): string { - return JSON.stringify([...map]); + return Utility.JSONstringify([...map]); } public static jsonSerializationToMap(jsonString: string): Map { const jsonParsedObject: any = JSON.parse(jsonString); return new Map(jsonParsedObject); } public static setToJsonSerialization(set: Set): string { - return JSON.stringify([...set]); + return Utility.JSONstringify([...set]); } public static jsonSerializationToSet(jsonString: string): Set { return new Set(JSON.parse(jsonString)); } public static arrayToJsonSerialization(set: T[]): string { - return JSON.stringify([...set]); + return Utility.JSONstringify([...set]); } public static jsonSerializationToArray(jsonString: string): T[] { return new Array(JSON.parse(jsonString)); @@ -388,13 +388,13 @@ export class Utility { } public static stringMapSetToJson(stringMapSet: Map>): string { - return JSON.stringify(Utility.stringMapSetToObject(stringMapSet)); + return Utility.JSONstringify(Utility.stringMapSetToObject(stringMapSet)); } public static jsonToStringMapSet(jsonString: string): Map> { return Utility.objectToStringMapSet(JSON.parse(jsonString)); } public static stringMapArrayToJson(stringMapArray: Map): string { - return JSON.stringify(Utility.stringMapArrayToObject(stringMapArray)); + return Utility.JSONstringify(Utility.stringMapArrayToObject(stringMapArray)); } public static jsonToStringMapArray(jsonString: string): Map { return Utility.objectToStringMapArray(JSON.parse(jsonString)); @@ -791,18 +791,24 @@ export class Utility { filename: string, labelColumnIndex: number = 0, textColumnIndex: number = 1, + weightColumnIndex: number = -1, scoreColumnBeginIndex: number = 2, numberOfScoreColumns: number = -1, - weightColumnIndex: number = -1, + identifierColumnIndex: number = -1, + predictedLabelColumnIndex: number = -1, + revisedTextColumnIndex: number = -1, lineIndexToStart: number = 0, columnDelimiter: string = "\t", rowDelimiter: string = "\n", encoding: string = "utf8", lineIndexToEnd: number = -1): { - "intents": string[], - "utterances": string[], + "labels": string[], + "texts": string[], "weights": number[], - "scoreArrays": number[][] } { + "identifiers": string[], + "scoreArrays": number[][], + "predictedLabels": string[], + "revisedTexts": string[] } { if (encoding == null) { encoding = "utf8"; } @@ -813,9 +819,12 @@ export class Utility { fileContent, labelColumnIndex, textColumnIndex, + weightColumnIndex, scoreColumnBeginIndex, numberOfScoreColumns, - weightColumnIndex, + identifierColumnIndex, + predictedLabelColumnIndex, + revisedTextColumnIndex, lineIndexToStart, columnDelimiter, rowDelimiter, @@ -825,17 +834,23 @@ export class Utility { fileContent: string, labelColumnIndex: number = 0, textColumnIndex: number = 1, + weightColumnIndex: number = -1, scoreColumnBeginIndex: number = 2, numberOfScoreColumns: number = 0, - weightColumnIndex: number = -1, + identifierColumnIndex: number = -1, + predictedLabelColumnIndex: number = -1, + revisedTextColumnIndex: number = -1, lineIndexToStart: number = 0, columnDelimiter: string = "\t", rowDelimiter: string = "\n", lineIndexToEnd: number = -1): { - "intents": string[], - "utterances": string[], + "labels": string[], + "texts": string[], "weights": number[], - "scoreArrays": number[][] } { + "identifiers": string[], + "scoreArrays": number[][], + "predictedLabels": string[], + "revisedTexts": string[] } { if (labelColumnIndex < 0) { labelColumnIndex = 0; } @@ -854,11 +869,14 @@ export class Utility { if (rowDelimiter == null) { rowDelimiter = "\n"; } - const intents: string[] = []; - const utterances: string[] = []; + const labels: string[] = []; + const texts: string[] = []; const weights: number[] = []; + const identifiers: string[] = []; const scoreArrays: number[][] = []; - const fileLines: string[] = fileContent.split(rowDelimiter); + const predictedLabels: string[] = []; + const revisedTexts: string[] = []; + const fileLines: string[] = Utility.split(fileContent, rowDelimiter); for (let lineIndex = lineIndexToStart; (lineIndex < fileLines.length) && ((lineIndexToEnd < 0) || (lineIndex < lineIndexToEnd)); lineIndex++) { @@ -867,17 +885,17 @@ export class Utility { if (Utility.isEmptyString(line)) { continue; } - const lineColumns: string[] = line.split(columnDelimiter); + const lineColumns: string[] = Utility.split(line, columnDelimiter); // --------------------------------------------------------------- - const intent: string = lineColumns[labelColumnIndex]; + const label: string = lineColumns[labelColumnIndex]; // --------------------------------------------------------------- - const utterance: string = lineColumns[textColumnIndex]; - if (Utility.isEmptyString(intent)) { + const text: string = lineColumns[textColumnIndex]; + if (Utility.isEmptyString(label)) { Utility.debuggingThrow( - `LINE - INDEX=${lineIndex}, intent is empty` + + `LINE - INDEX=${lineIndex}, label is empty` + `, lineColumns.length=${lineColumns.length}` + - `, intent=$${intent}$` + - `, utterance=$${utterance}$` + + `, label=$${label}$` + + `, text=$${text}$` + `, line=$${line}$`); } // --------------------------------------------------------------- @@ -886,12 +904,27 @@ export class Utility { weight = +lineColumns[weightColumnIndex]; } // --------------------------------------------------------------- + let identifier: string = lineIndex.toString(); + if (identifierColumnIndex >= 0) { + identifier = lineColumns[identifierColumnIndex]; + } + let predictedLabel: string = ""; + if (predictedLabelColumnIndex >= 0) { + predictedLabel = lineColumns[predictedLabelColumnIndex]; + } + let revisedText: string = ""; + if (revisedTextColumnIndex >= 0) { + revisedText = lineColumns[revisedTextColumnIndex]; + } + // --------------------------------------------------------------- if (numberOfScoreColumns <= 0) { numberOfScoreColumns = lineColumns.length - scoreColumnBeginIndex; } if ((numberOfScoreColumns <= 0) || (numberOfScoreColumns > (lineColumns.length - scoreColumnBeginIndex))) { Utility.debuggingThrow( - `(numberOfScoreColumns<=0)||(numberOfScoreColumns|${numberOfScoreColumns}|>lineColumns.length|${lineColumns.length}|))`); + `(numberOfScoreColumns<=0)||(numberOfScoreColumns|${numberOfScoreColumns}|>lineColumns.length|${lineColumns.length}|))` + + `,scoreColumnBeginIndex=${scoreColumnBeginIndex}` + + `,line=${line}`); } const scoreArray: number[] = []; let scoreIndex = scoreColumnBeginIndex; @@ -904,30 +937,64 @@ export class Utility { // Utility.debuggingLog( // `LINE - INDEX=${lineIndex}` + // `, lineColumns.length=${lineColumns.length}` + - // `, intent=$${intent}$` + - // `, utterance=$${utterance}$` + + // `, label=$${label}$` + + // `, text=$${text}$` + // `, line=$${line}$`); // // } // --------------------------------------------------------------- - intents.push(intent); - utterances.push(utterance); + labels.push(label); + texts.push(text); weights.push(weight); + identifiers.push(identifier); scoreArrays.push(scoreArray); + predictedLabels.push(predictedLabel); + revisedTexts.push(revisedText); // --------------------------------------------------------------- } - return { intents, utterances, weights, scoreArrays }; + return { labels, texts, weights, identifiers, scoreArrays, predictedLabels, revisedTexts }; } + public static loadLabelUtteranceColumnarFile( + filename: string, + labelColumnIndex: number = 0, + textColumnIndex: number = 1, + weightColumnIndex: number = -1, + lineIndexToStart: number = 0, + columnDelimiter: string = "\t", + rowDelimiter: string = "\n", + encoding: string = "utf8", + lineIndexToEnd: number = -1): { "intents": string[], "utterances": string[], "weights": number[] } { + const intentsTextsWeights: { "intents": string[], "texts": string[], "weights": number[] } = + Utility.loadLabelTextColumnarFile( + filename, + labelColumnIndex, + textColumnIndex, + weightColumnIndex, + lineIndexToStart, + columnDelimiter, + rowDelimiter, + encoding, + lineIndexToEnd); + const intents: string[] = intentsTextsWeights.intents; + const utterances: string[] = intentsTextsWeights.texts; + const weights: number[] = intentsTextsWeights.weights; + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] } = { + intents, + utterances, + weights }; + return intentsUtterancesWeights; + } public static loadLabelTextColumnarFile( filename: string, labelColumnIndex: number = 0, textColumnIndex: number = 1, + weightColumnIndex: number = -1, lineIndexToStart: number = 0, columnDelimiter: string = "\t", rowDelimiter: string = "\n", encoding: string = "utf8", - lineIndexToEnd: number = -1): { "intents": string[], "utterances": string[] } { + lineIndexToEnd: number = -1): { "intents": string[], "texts": string[], "weights": number[] } { if (encoding == null) { encoding = "utf8"; } @@ -938,20 +1005,51 @@ export class Utility { fileContent, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart, columnDelimiter, rowDelimiter, lineIndexToEnd); } + public static loadLabelUtteranceColumnarContent( + fileContent: string, + labelColumnIndex: number = 0, + textColumnIndex: number = 1, + weightColumnIndex: number = -1, + lineIndexToStart: number = 0, + columnDelimiter: string = "\t", + rowDelimiter: string = "\n", + encoding: string = "utf8", + lineIndexToEnd: number = -1): { "intents": string[], "utterances": string[], "weights": number[] } { + const intentsTextsWeights: { "intents": string[], "texts": string[], "weights": number[] } = + Utility.loadLabelTextColumnarContent( + fileContent, + labelColumnIndex, + textColumnIndex, + weightColumnIndex, + lineIndexToStart, + columnDelimiter, + rowDelimiter, + lineIndexToEnd); + const intents: string[] = intentsTextsWeights.intents; + const utterances: string[] = intentsTextsWeights.texts; + const weights: number[] = intentsTextsWeights.weights; + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] } = { + intents, + utterances, + weights }; + return intentsUtterancesWeights; + } public static loadLabelTextColumnarContent( fileContent: string, labelColumnIndex: number = 0, textColumnIndex: number = 1, + weightColumnIndex: number = -1, lineIndexToStart: number = 0, columnDelimiter: string = "\t", rowDelimiter: string = "\n", lineIndexToEnd: number = -1): - { "intents": string[], "utterances": string[] } { + { "intents": string[], "texts": string[], "weights": number[] } { if (labelColumnIndex < 0) { labelColumnIndex = 0; } @@ -968,8 +1066,9 @@ export class Utility { rowDelimiter = "\n"; } const intents: string[] = []; - const utterances: string[] = []; - const fileLines: string[] = fileContent.split(rowDelimiter); + const texts: string[] = []; + const weights: number[] = []; + const fileLines: string[] = Utility.split(fileContent, rowDelimiter); for (let lineIndex = lineIndexToStart; (lineIndex < fileLines.length) && ((lineIndexToEnd < 0) || (lineIndex < lineIndexToEnd)); lineIndex++) { @@ -977,23 +1076,29 @@ export class Utility { if (Utility.isEmptyString(line)) { continue; } - const lineColumns: string[] = line.split(columnDelimiter); + const lineColumns: string[] = Utility.split(line, columnDelimiter); const intent: string = lineColumns[labelColumnIndex]; - const utterance: string = lineColumns[textColumnIndex]; + const text: string = lineColumns[textColumnIndex]; + let weight: number = 1; + if (weightColumnIndex >= 0) { + weight = +lineColumns[weightColumnIndex]; + } if (Utility.isEmptyString(intent)) { Utility.debuggingThrow( `LINE - INDEX=${lineIndex}, intent is empty` + `, lineColumns.length=${lineColumns.length}` + `, intent=$${intent}$` + - `, utterance=$${utterance}$` + + `, text=$${text}$` + + `, weight=$${weight}$` + `, line=$${line}$`); } - if (Utility.isEmptyString(utterance)) { + if (Utility.isEmptyString(text)) { Utility.debuggingThrow( - `LINE - INDEX=${lineIndex}, utterance is empty` + + `LINE - INDEX=${lineIndex}, text is empty` + `, lineColumns.length=${lineColumns.length}` + - `, utterance=$${utterance}$` + - `, utterance=$${utterance}$` + + `, intent=$${intent}$` + + `, text=$${text}$` + + `, weight=$${weight}$` + `, line=$${line}$`); } // { @@ -1001,14 +1106,16 @@ export class Utility { // `LINE - INDEX=${lineIndex}` + // `, lineColumns.length=${lineColumns.length}` + // `, intent=$${intent}$` + - // `, utterance=$${utterance}$` + + // `, text=$${text}$` + + // `, weight=$${weight}$` + // `, line=$${line}$`); // // } intents.push(intent); - utterances.push(utterance); + texts.push(text); + weights.push(weight); } - return { intents, utterances }; + return { intents, texts, weights }; } public static storeDataArraysToTsvFile( @@ -1055,7 +1162,8 @@ export class Utility { "endPos": number, }>, "intent": string, - "text": string }>, + "text": string, + "weight": number }>, utteranceReconstructionDelimiter: string = " ", defaultEntityTag: string = "O", defaultPartOfSpeechTag: string = "", @@ -1082,7 +1190,8 @@ export class Utility { "endPos": number, }>, "intent": string, - "text": string } = luUtterances[index]; + "text": string, + "weight": number } = luUtterances[index]; const text: string = luUtterance.text; const intent: string = luUtterance.intent; const entities: Array<{ @@ -1175,7 +1284,8 @@ export class Utility { "endPos": number, }>, "intent": string, - "text": string }> { + "text": string, + "weight": number }> { if (Utility.isEmptyString(utteranceReconstructionDelimiter)) { utteranceReconstructionDelimiter = " "; } @@ -1212,6 +1322,7 @@ export class Utility { Utility.debuggingThrow( `entityTagArrays.length|${entityTagArrays.length}|!==numberInstances|${numberInstances}|`); } + const weight: number = 1; const luUtterances: Array<{ "entities": Array<{ "entity": string, @@ -1224,7 +1335,8 @@ export class Utility { "endPos": number, }>, "intent": string, - "text": string }> = []; + "text": string, + "weight": number }> = []; for (let index: number = 0; index < numberInstances; index++) { let intent = ""; const id: string = ids[index]; @@ -1313,11 +1425,13 @@ export class Utility { "endPos": number, }>, "intent": string, - "text": string } = { + "text": string, + "weight": number } = { entities, partOfSpeechTags, intent, text, + weight, }; luUtterances.push(luUtterance); } @@ -1376,7 +1490,7 @@ export class Utility { let currentPartOfSpeechTagArray: string[] = []; let currentTagArray: string[] = []; let isFirst: boolean = true; - const fileLines: string[] = fileContent.split(rowDelimiter); + const fileLines: string[] = Utility.split(fileContent, rowDelimiter); for (let lineIndex = lineIndexToStart; (lineIndex < fileLines.length) && ((lineIndexToEnd < 0) || (lineIndex < lineIndexToEnd)); lineIndex++) { @@ -1462,7 +1576,7 @@ export class Utility { public static stringToLineArray( stringContent: string): string[] { - const lineArray: string[] = stringContent.split("\n"); + const lineArray: string[] = Utility.split(stringContent, "\n"); const lineTrimedArray: string[] = lineArray.map((x) => x.trim()); return lineTrimedArray; } @@ -1522,7 +1636,7 @@ export class Utility { } public static getObjectMd5Hash(objectValue: object): string|Int32Array { - return Utility.getStringMd5Hash(JSON.stringify(objectValue)); + return Utility.getStringMd5Hash(Utility.JSONstringify(objectValue)); } public static getStringMd5Hash(feature: string): string | Int32Array { return md5.Md5.hashStr(feature); @@ -1532,7 +1646,7 @@ export class Utility { return Math.abs(Utility.getObjectHashCode(objectValue)); } public static getObjectHashCode(objectValue: object): number { - return Utility.getStringHashCode(JSON.stringify(objectValue).toString()); + return Utility.getStringHashCode(Utility.JSONstringify(objectValue).toString()); } public static getPositiveStringHashCode(feature: string): number { return Math.abs(Utility.getStringHashCode(feature)); @@ -1604,13 +1718,14 @@ export class Utility { } public static debuggingLog( - message: any): void { + message: any): string { const dateTimeString: string = (new Date()).toISOString(); const logMessage: string = `[${dateTimeString}] LOG-MESSAGE: ${message}`; if (Utility.toPrintDebuggingLogToConsole) { // tslint:disable-next-line:no-console console.log(logMessage); } + return logMessage; } public static debuggingThrow( @@ -1700,7 +1815,7 @@ export class Utility { for (let i = 0; i < numberDelimiters; i++) { input = input.replace(delimiters[i], replacementDelimiters[i]); } - let result: string[] = input.split(splitDelimiter); + let result: string[] = Utility.split(input, splitDelimiter); if (toRemoveEmptyElements) { result = result.filter((element: string) => { return (element && (element !== "")); @@ -1883,6 +1998,13 @@ export class Utility { return JSON.stringify(input, null, 4); } + public static split(input: string, delimiter: string): string[] { + return Utility.splitRaw(input, delimiter).map((x: string) => x.trim()); + } + public static splitRaw(input: string, delimiter: string): string[] { + return input.split(delimiter); + } + protected static rngBurninIterations: number = 16384; protected static rngBurninDone: boolean = false; protected static xorshift128plusState0: number = 1; diff --git a/packages/dispatcher/test/data/AppColumnarData.test.ts b/packages/dispatcher/test/data/AppColumnarData.test.ts index 20e324c0d..f254c6074 100644 --- a/packages/dispatcher/test/data/AppColumnarData.test.ts +++ b/packages/dispatcher/test/data/AppColumnarData.test.ts @@ -22,6 +22,8 @@ describe("Test Suite - data/AppColumnarData", () => { process.argv.push("0"); process.argv.push("--textColumnIndex"); process.argv.push("2"); + process.argv.push("--weightColumnIndex"); + process.argv.push("1"); process.argv.push("--linesToSkip"); process.argv.push("1"); exampleFunctionData(); diff --git a/packages/dispatcher/test/data/ColumnarData.test.ts b/packages/dispatcher/test/data/ColumnarData.test.ts index e0754a0d7..8abe31411 100644 --- a/packages/dispatcher/test/data/ColumnarData.test.ts +++ b/packages/dispatcher/test/data/ColumnarData.test.ts @@ -626,6 +626,7 @@ export function exampleFunctionDataWithColumnarContent( columnarContent: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number): ColumnarData { // ----------------------------------------------------------------------- const columnarData: ColumnarData = ColumnarData.createColumnarData( @@ -633,6 +634,7 @@ export function exampleFunctionDataWithColumnarContent( new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); const luUtterances: Array<{ @@ -647,7 +649,8 @@ export function exampleFunctionDataWithColumnarContent( "endPos": number, }>, "intent": string, - "text": string }> = columnarData.getLuUtterances(); + "text": string, + "weight": number }> = columnarData.getLuUtterances(); // Utility.debuggingLog(`luUtterances=` + // `${Utility.getJsonStringified(luUtterances)}`); assert.ok(luUtterances, `luUtterances=${luUtterances}`); @@ -795,6 +798,7 @@ describe("Test Suite - data/ColumnarData/ColumnarData - Email", () => { ColumnarContentEmail, 0, 2, + 1, 1); }); }); diff --git a/packages/dispatcher/test/data/ColumnarDataGlueSst2.test.ts b/packages/dispatcher/test/data/ColumnarDataGlueSst2.test.ts index 349069a5b..411d859ad 100644 --- a/packages/dispatcher/test/data/ColumnarDataGlueSst2.test.ts +++ b/packages/dispatcher/test/data/ColumnarDataGlueSst2.test.ts @@ -67377,6 +67377,7 @@ export function exampleFunctionDataWithColumnarContent( columnarContent: string, labelColumnIndex: number, textColumnIndex: number, + weightColumnIndex: number, linesToSkip: number): ColumnarData { // ----------------------------------------------------------------------- const columnarData: ColumnarData = ColumnarData.createColumnarData( @@ -67384,6 +67385,7 @@ export function exampleFunctionDataWithColumnarContent( new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); const luUtterances: Array<{ @@ -67398,7 +67400,8 @@ export function exampleFunctionDataWithColumnarContent( "endPos": number, }>, "intent": string, - "text": string }> = columnarData.getLuUtterances(); + "text": string, + "weight": number }> = columnarData.getLuUtterances(); // Utility.debuggingLog(`luUtterances=` + // `${Utility.getJsonStringified(luUtterances)}`); assert.ok(luUtterances, `luUtterances=${luUtterances}`); @@ -67546,6 +67549,7 @@ describe("Test Suite - data/ColumnarData/ColumnarData - GLUE SST2", () => { ColumnarContentGlueSst2, 1, 0, + -1, 1); }); }); diff --git a/packages/dispatcher/test/mathematics/confusion_matrix/ConfusionMatrix.test.ts b/packages/dispatcher/test/mathematics/confusion_matrix/ConfusionMatrix.test.ts index c7a03f9d3..dc9b8df5a 100644 --- a/packages/dispatcher/test/mathematics/confusion_matrix/ConfusionMatrix.test.ts +++ b/packages/dispatcher/test/mathematics/confusion_matrix/ConfusionMatrix.test.ts @@ -34,7 +34,7 @@ function getTestingConfusionMatrix(): ConfusionMatrix { Utility.debuggingLog( "labels=" + confusionMatrix.getLabels()); Utility.debuggingLog( - confusionMatrix.getLabelMap()); + Utility.JSONstringify(confusionMatrix.getLabelMap())); Utility.debuggingLog( "rows=" + confusionMatrix.getConfusionMatrixRows()); Utility.debuggingLog( diff --git a/packages/dispatcher/test/model/evaluation/cross_validation/AppCrossValidator.test.ts b/packages/dispatcher/test/model/evaluation/cross_validation/AppCrossValidator.test.ts index 105d74dd5..3e7e21ccf 100644 --- a/packages/dispatcher/test/model/evaluation/cross_validation/AppCrossValidator.test.ts +++ b/packages/dispatcher/test/model/evaluation/cross_validation/AppCrossValidator.test.ts @@ -46,6 +46,7 @@ describe("Test Suite - model/evaluation/cross_validator/AppCrossValidator", () = 0, 2, 1, + 1, 5, 10, 100, @@ -54,7 +55,7 @@ describe("Test Suite - model/evaluation/cross_validator/AppCrossValidator", () = 32); }); - it("Test.0200 mainCrossValidator()", async function() { + it("Test.0200 mainCrossValidator() - Email.tsv", async function() { Utility.toPrintDebuggingLogToConsole = UnitTestHelper.getDefaultUnitTestDebuggingLogFlag(); this.timeout(UnitTestHelper.getDefaultUnitTestTimeout()); const filename: string = @@ -69,6 +70,8 @@ describe("Test Suite - model/evaluation/cross_validator/AppCrossValidator", () = process.argv.push("0"); process.argv.push("--textColumnIndex"); process.argv.push("2"); + process.argv.push("--weightColumnIndex"); + process.argv.push("1"); process.argv.push("--linesToSkip"); process.argv.push("1"); const mainCrossValidatorResult: { @@ -91,7 +94,7 @@ describe("Test Suite - model/evaluation/cross_validator/AppCrossValidator", () = } } }); - it("Test.0201 mainCrossValidator()", async function() { + it("Test.0201 mainCrossValidator() - Email.lu", async function() { Utility.toPrintDebuggingLogToConsole = UnitTestHelper.getDefaultUnitTestDebuggingLogFlag(); this.timeout(UnitTestHelper.getDefaultUnitTestTimeout()); const filename: string = diff --git a/packages/dispatcher/test/model/evaluation/cross_validation/CrossValidator.test.ts b/packages/dispatcher/test/model/evaluation/cross_validation/CrossValidator.test.ts index 7b60c117d..dacf48f79 100644 --- a/packages/dispatcher/test/model/evaluation/cross_validation/CrossValidator.test.ts +++ b/packages/dispatcher/test/model/evaluation/cross_validation/CrossValidator.test.ts @@ -28,7 +28,7 @@ import { Utility } from "../../../../src/utility/Utility"; import { UnitTestHelper } from "../../../utility/Utility.test"; describe("Test Suite - model/evaluation/cross_validator/CrossValidator", async () => { - it("Test.0000 crossValidate()", async function() { + it("Test.0000 crossValidate() - LuContentEmail", async function() { Utility.toPrintDebuggingLogToConsole = UnitTestHelper.getDefaultUnitTestDebuggingLogFlag(); this.timeout(UnitTestHelper.getDefaultUnitTestTimeout()); const luContent: string = LuContentEmail; @@ -96,12 +96,13 @@ describe("Test Suite - model/evaluation/cross_validator/CrossValidator", async ( `,crossValidationResult.confusionMatrixCrossValidation.getWeightedMacroAverageMetrics()=` + `${crossValidationResult.confusionMatrixCrossValidation.getWeightedMacroAverageMetrics()}`); }); - it("Test.0001 crossValidate()", function() { + it("Test.0001 crossValidate() - ColumnarContentEmail", function() { Utility.toPrintDebuggingLogToConsole = UnitTestHelper.getDefaultUnitTestDebuggingLogFlag(); this.timeout(UnitTestHelper.getDefaultUnitTestTimeout()); const columnarContent: string = ColumnarContentEmail; const labelColumnIndex: number = 0; const textColumnIndex: number = 2; + const weightColumnIndex: number = 1; const linesToSkip: number = 1; const numberOfCrossValidationFolds: number = CrossValidator.defaultNumberOfCrossValidationFolds; @@ -125,6 +126,7 @@ describe("Test Suite - model/evaluation/cross_validator/CrossValidator", async ( new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); const intentLabelIndexArray: number[] = diff --git a/packages/dispatcher/test/model/language_understanding/featurizer/NgramSubwordFeaturizer.test.ts b/packages/dispatcher/test/model/language_understanding/featurizer/NgramSubwordFeaturizer.test.ts index d80d0f37a..b965d66db 100644 --- a/packages/dispatcher/test/model/language_understanding/featurizer/NgramSubwordFeaturizer.test.ts +++ b/packages/dispatcher/test/model/language_understanding/featurizer/NgramSubwordFeaturizer.test.ts @@ -39,6 +39,7 @@ function getNgramSubwordFeaturizerForUnitTests( 0, 2, 1, + 1, true); return featurizer; } @@ -65,6 +66,7 @@ function getHashingNgramSubwordFeaturizerForUnitTests( 0, 2, 1, + 1, true); return featurizer; } @@ -84,15 +86,15 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea Utility.debuggingLog("hashing code = " + Utility.getPositiveStringHashCode(input)); }); - it("Test.0100 getIntentsUtterances()", function() { + it("Test.0100 getIntentsUtterancesWeights()", function() { Utility.toPrintDebuggingLogToConsole = UnitTestHelper.getDefaultUnitTestDebuggingLogFlag(); this.timeout(UnitTestHelper.getDefaultUnitTestTimeout()); const featurizer: NgramSubwordFeaturizer = featurizerColumnarContentEmail; - const intentsUtterances: { "intents": string[], "utterances": string[] } = - featurizer.getIntentsUtterances(); + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], "weights": number[] } = + featurizer.getIntentsUtterancesWeights(); Utility.debuggingLog( - `intentsUtterances=${Utility.JSONstringify(intentsUtterances)}`); + `intentsUtterancesWeights=${Utility.JSONstringify(intentsUtterancesWeights)}`); }); it("Test.0200 getLabels()", function() { @@ -341,7 +343,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][], } = featurizer.createIntentUtteranceSparseIndexArrays( - featurizer.getIntentsUtterances()); + featurizer.getIntentsUtterancesWeights()); Utility.debuggingLog( `intentUtteranceSparseIndexArrays=${intentUtteranceSparseIndexArrays}`); const intentLabelIndexArray: number[] = @@ -383,15 +385,16 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea this.timeout(UnitTestHelper.getDefaultUnitTestTimeout()); const featurizer: NgramSubwordFeaturizer = featurizerColumnarContentEmail; - const intentsUtterances: { + const intentsUtterancesWeights: { "intents": string[], "utterances": string[], - } = featurizer.getIntentsUtterances(); + "weights": number[], + } = featurizer.getIntentsUtterancesWeights(); const intentUtteranceMiniBatchingSparseIndexArrays: { "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][], } = featurizer.createIntentUtteranceMiniBatchingSparseIndexArrays( - intentsUtterances, 0, 2); + intentsUtterancesWeights, 0, 2); Utility.debuggingLog( `intentUtteranceMiniBatchingSparseIndexArrays=${intentUtteranceMiniBatchingSparseIndexArrays}`); const intentLabelIndexArray: number[] = @@ -460,7 +463,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArray": number[], "utteranceFeatureIndexArrays": number[][], } = featurizer.createIntentUtteranceHashingSparseIndexArrays( - featurizer.getIntentsUtterances()); + featurizer.getIntentsUtterancesWeights()); Utility.debuggingLog( `intentUtteranceSparseIndexArrays=${intentUtteranceSparseIndexArrays}`); const intentLabelIndexArray: number[] = @@ -605,7 +608,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArrays": boolean[][], "utteranceFeatureIndexArrays": boolean[][], } = featurizer.createIntentUtteranceOneHotEncoderBooleanArrays( - featurizer.getIntentsUtterances()); + featurizer.getIntentsUtterancesWeights()); Utility.debuggingLog( `intentUtteranceOneHotEncoderBooleanArrays=${intentUtteranceOneHotEncoderBooleanArrays}`); const intentLabelIndexArrays: boolean[][] = @@ -634,7 +637,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArrays": number[][], "utteranceFeatureIndexArrays": number[][], } = featurizer.createIntentUtteranceOneHotEncoderNumberArrays( - featurizer.getIntentsUtterances()); + featurizer.getIntentsUtterancesWeights()); Utility.debuggingLog( `intentUtteranceOneHotEncoderBooleanArrays=${intentUtteranceOneHotEncoderBooleanArrays}`); const intentLabelIndexArrays: number[][] = @@ -699,7 +702,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArrays": boolean[][], "utteranceFeatureIndexArrays": boolean[][], } = featurizer.createIntentUtteranceMiniBatchingOneHotEncoderBooleanArrays( - featurizer.getIntentsUtterances(), 0, 2); + featurizer.getIntentsUtterancesWeights(), 0, 2); Utility.debuggingLog( `intentUtteranceOneHotEncoderBooleanArrays=${intentUtteranceOneHotEncoderBooleanArrays}`); const intentLabelIndexArrays: boolean[][] = @@ -728,7 +731,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArrays": number[][], "utteranceFeatureIndexArrays": number[][], } = featurizer.createIntentUtteranceMiniBatchingOneHotEncoderNumberArrays( - featurizer.getIntentsUtterances(), 0, 2); + featurizer.getIntentsUtterancesWeights(), 0, 2); Utility.debuggingLog( `intentUtteranceOneHotEncoderBooleanArrays=${intentUtteranceOneHotEncoderBooleanArrays}`); const intentLabelIndexArrays: number[][] = @@ -840,7 +843,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArrays": boolean[][], "utteranceFeatureIndexArrays": boolean[][], } = featurizer.createIntentUtteranceHashingOneHotEncoderBooleanArrays( - featurizer.getIntentsUtterances()); + featurizer.getIntentsUtterancesWeights()); Utility.debuggingLog( `intentUtteranceOneHotEncoderBooleanArrays=${intentUtteranceOneHotEncoderBooleanArrays}`); const intentLabelIndexArrays: boolean[][] = @@ -869,7 +872,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea "intentLabelIndexArrays": number[][], "utteranceFeatureIndexArrays": number[][], } = featurizer.createIntentUtteranceHashingOneHotEncoderNumberArrays( - featurizer.getIntentsUtterances()); + featurizer.getIntentsUtterancesWeights()); Utility.debuggingLog( `intentUtteranceOneHotEncoderBooleanArrays=${intentUtteranceOneHotEncoderBooleanArrays}`); const intentLabelIndexArrays: number[][] = @@ -953,6 +956,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea 0, 2, 1, + 1, true); }); @@ -980,6 +984,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea 0, 2, 1, + 1, true); const serializedJsonString: string = featurizer.serializeToJsonString(undefined, 4); Utility.debuggingLog( @@ -1010,6 +1015,7 @@ describe("Test Suite - model/language_understanding/featurizer/ngram_subword_fea 0, 2, 1, + 1, true); const serializedJsonString: string = featurizer.serializeToJsonString(undefined, 4); Utility.debuggingLog( diff --git a/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.test.ts b/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.test.ts index e32615f6c..fa11f6a13 100644 --- a/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.test.ts +++ b/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AppAutoActiveLearner.test.ts @@ -61,6 +61,7 @@ describe("Test Suite - model/supervised/classifier/auto_active_learning/AppAutoA 0, 2, 1, + 1, AppAutoActiveLearner.defaultDoBootstrapResampling, DictionaryMapUtility.newTMapStringKeyGenericValue(), AutoActiveLearner.defaultDoAutoActiveLearning, @@ -140,6 +141,8 @@ describe("Test Suite - model/supervised/classifier/auto_active_learning/AppAutoA process.argv.push("0"); process.argv.push("--textColumnIndex"); process.argv.push("2"); + process.argv.push("--weightColumnIndex"); + process.argv.push("1"); process.argv.push("--linesToSkip"); process.argv.push("1"); const outputFilenames: string[] = diff --git a/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AutoActiveLearner.test.ts b/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AutoActiveLearner.test.ts index 09c1c77db..2d8198895 100644 --- a/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AutoActiveLearner.test.ts +++ b/packages/dispatcher/test/model/supervised/classifier/auto_active_learning/AutoActiveLearner.test.ts @@ -114,6 +114,7 @@ describe("Test Suite - model/supervised/classifier/auto_active_learning/auto_act const columnarContent: string = ColumnarContentEmail; const labelColumnIndex: number = 0; const textColumnIndex: number = 2; + const weightColumnIndex: number = 1; const linesToSkip: number = 1; const columnarData: ColumnarData = ColumnarData.createColumnarData( @@ -121,6 +122,7 @@ describe("Test Suite - model/supervised/classifier/auto_active_learning/auto_act new NgramSubwordFeaturizer(), labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip, true); // ----------------------------------------------------------------------- @@ -217,6 +219,7 @@ describe("Test Suite - model/supervised/classifier/auto_active_learning/auto_act // columnarData, // labelColumnIndex, // textColumnIndex, + // weightColumnIndex, // linesToSkip, // new Set(aalSampledInstanceIndexArray)); }); diff --git a/packages/dispatcher/test/model/supervised/classifier/neural_network/learner/UtilityLearner.test.ts b/packages/dispatcher/test/model/supervised/classifier/neural_network/learner/UtilityLearner.test.ts index b4486de22..0431e3ca6 100644 --- a/packages/dispatcher/test/model/supervised/classifier/neural_network/learner/UtilityLearner.test.ts +++ b/packages/dispatcher/test/model/supervised/classifier/neural_network/learner/UtilityLearner.test.ts @@ -30,6 +30,7 @@ describe("Test Suite - model/supervised/classifier/neural_network/learner/utilit "resources/data/Columnar/Email.tsv", 0, 2, + 1, 1); const l1Regularization: number = AppSoftmaxRegressionSparse.defaultL1Regularization; const l2Regularization: number = AppSoftmaxRegressionSparse.defaultL2Regularization; @@ -51,6 +52,7 @@ describe("Test Suite - model/supervised/classifier/neural_network/learner/utilit "resources/data/Columnar/EmailTest.tsv", 0, 2, + 1, 1); }); @@ -63,6 +65,7 @@ describe("Test Suite - model/supervised/classifier/neural_network/learner/utilit "resources/data/Columnar/Email.tsv", 0, 2, + 1, 1); const l1Regularization: number = AppSoftmaxRegressionSparse.defaultL1Regularization; const l2Regularization: number = AppSoftmaxRegressionSparse.defaultL2Regularization; @@ -84,6 +87,7 @@ describe("Test Suite - model/supervised/classifier/neural_network/learner/utilit "resources/data/Columnar/EmailTest.tsv", 0, 2, + 1, 1); }); @@ -95,6 +99,7 @@ describe("Test Suite - model/supervised/classifier/neural_network/learner/utilit "resources/data/Columnar/Email.tsv", 0, 2, + 1, 1); }); @@ -105,6 +110,7 @@ describe("Test Suite - model/supervised/classifier/neural_network/learner/utilit "resources/data/Columnar/EmailTest.tsv", 0, 2, + 1, 1); }); }); diff --git a/packages/dispatcher/test/utility/AppUtility.test.ts b/packages/dispatcher/test/utility/AppUtility.test.ts index b7bfc378a..d18b3697b 100644 --- a/packages/dispatcher/test/utility/AppUtility.test.ts +++ b/packages/dispatcher/test/utility/AppUtility.test.ts @@ -19,11 +19,13 @@ describe("Test Suite - utility/app_utility", () => { const filename: string = "resources/data/Columnar/Email.tsv"; const labelColumnIndex: number = 0; const textColumnIndex: number = 2; + const weightColumnIndex: number = 1; const linesToSkip: number = 1; exampleFunctionUtilityWithFilename( filename, labelColumnIndex, textColumnIndex, + weightColumnIndex, linesToSkip); }); it("Test.0001 exampleFunctionUtility()", function() { @@ -40,6 +42,8 @@ describe("Test Suite - utility/app_utility", () => { process.argv.push("0"); process.argv.push("--textColumnIndex"); process.argv.push("2"); + process.argv.push("--weightColumnIndex"); + process.argv.push("1"); process.argv.push("--linesToSkip"); process.argv.push("1"); exampleFunctionUtility(); diff --git a/packages/dispatcher/test/utility/utility.test.ts b/packages/dispatcher/test/utility/utility.test.ts index 74ee3b80f..a25470d66 100644 --- a/packages/dispatcher/test/utility/utility.test.ts +++ b/packages/dispatcher/test/utility/utility.test.ts @@ -1477,16 +1477,18 @@ describe("Test Suite - utility/Utility", () => { const filename: string = "resources/data/Columnar/Email.tsv"; const labelColumnIndex: number = 0; const textColumnIndex: number = 2; + const weightColumnIndex: number = 1; const lineIndexToStart: number = 1; const columnDelimiter: string = "\t"; const rowDelimiter: string = "\n"; const encoding: string = "utf8"; const lineIndexToEnd: number = -1; - const result: { "intents": string[], "utterances": string[] } = + const result: { "intents": string[], "texts": string[], "weights": number[] } = Utility.loadLabelTextColumnarFile( filename, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart, columnDelimiter, rowDelimiter, @@ -1494,12 +1496,12 @@ describe("Test Suite - utility/Utility", () => { lineIndexToEnd); const intents: string[] = result.intents; - const utterances: string[] = - result.utterances; + const texts: string[] = + result.texts; assert.ok(intents.length === 601, `intents.length=${intents.length}`); - assert.ok(utterances.length === 601, - `utterances.length=${utterances.length}`); + assert.ok(texts.length === 601, + `utterances.length=${texts.length}`); }); it("Test.1001 loadLabelTextColumnarContent()", function() { Utility.toPrintDebuggingLogToConsole = UnitTestHelper.getDefaultUnitTestDebuggingLogFlag(); @@ -1511,27 +1513,33 @@ describe("Test Suite - utility/Utility", () => { Utility.loadFile(filename); const labelColumnIndex: number = 0; const textColumnIndex: number = 2; + const weightColumnIndex: number = 1; const lineIndexToStart: number = 1; const columnDelimiter: string = "\t"; const rowDelimiter: string = "\n"; const lineIndexToEnd: number = -1; - const result: { "intents": string[], "utterances": string[] } = + const result: { "intents": string[], "texts": string[], "weights": number[] } = Utility.loadLabelTextColumnarContent( fileContent, labelColumnIndex, textColumnIndex, + weightColumnIndex, lineIndexToStart, columnDelimiter, rowDelimiter, lineIndexToEnd); const intents: string[] = result.intents; - const utterances: string[] = - result.utterances; + const texts: string[] = + result.texts; + const weights: number[] = + result.weights; assert.ok(intents.length === 601, `intents.length=${intents.length}`); - assert.ok(utterances.length === 601, - `utterances.length=${utterances.length}`); + assert.ok(texts.length === 601, + `texts.length=${texts.length}`); + assert.ok(weights.length === 601, + `weights.length=${weights.length}`); }); it("Test.1100 loadEntityAnnotatedCorpusFile()", function() { diff --git a/packages/dispatcher/tsconfig.json b/packages/dispatcher/tsconfig.json index b4c5d7686..ba8817822 100644 --- a/packages/dispatcher/tsconfig.json +++ b/packages/dispatcher/tsconfig.json @@ -6,7 +6,12 @@ "outDir": "lib", "rootDir": "src", "strict": true, - "target": "es2017" + "target": "es2017", + "sourceMap": true, + "allowJs": false, + "traceResolution": false, + "noImplicitReturns": true, + "noFallthroughCasesInSwitch": true, }, "include": [ "src/**/*"