clean.xmltags = headline|dateline|text|post clean.singlesentencetags = HEADLINE|DATELINE|SPEAKER|POSTER|POSTDATE clean.sentenceendingtags = P|POST|QUOTE clean.turntags = TURN|POST|QUOTE clean.speakertags = SPEAKER|POSTER clean.docidtags = DOCID clean.datetags = DATETIME|DATE|DATELINE clean.doctypetags = DOCTYPE clean.docAnnotations = docID=doc[id],doctype=doc[type],docsourcetype=doctype[source] clean.sectiontags = HEADLINE|DATELINE|POST clean.sectionAnnotations = sectionID=post[id],sectionDate=post[date|datetime],sectionDate=postdate,author=post[author],author=poster clean.quotetags = quote clean.quoteauthorattributes = orig_author clean.tokenAnnotations = link=a[href],speaker=post[author],speaker=quote[orig_author] clean.ssplitDiscardTokens = \\n|\\*NL\\*