# # KEHOME/src/symbol.icn # Nov/4/2005 Jul/22/2009 $include "keparam.h" global VERBhint global PS # prompt string used by prompt() in myio.icn # global varibles for Merr global yystate # parser state global yytoken # parser token global errors # current no of errors = KERROR global fName # current file name = FNAME global statestk # state stack global yychar # token list global yylineno # current lineno global yytext # input string global yylinenotab # for per file counts global yyfnametab # for per file counts #=============================# # pattern matching: # # TOKEN list -> SYMBOL list # #=============================# # NOTE: new grammar uses tokens # "w" word # "x" extended word which includes +-*: record SYMBOL ( stype, # string svalue # list (or string ???) ) record m_BSE ( mbse_separator, mbse_begin, mbse_list, mbse_end ) #=========# # methods # #=========# # token_htag(tokenlist) # token_hend(tokenlist) # token_harg(tokenlist) # get_svalue(symbol) # get_stype(symbol) # is_null_symbol(x) # find_stype(pattern,symbol) # get_symbol(tokenlist) # tsize(symbol) # map_symbol(tsym,tok,finish) # parse_file(fd,kformat,tokentype,ps,option) # parse_line(line) # unparse(symbol) # unparse.icn # interpret_symbol(symbol,stype) # sentence.icn # interpret_line(line,kformat,dollar) # symbol.icn # parse_list(line,sep) # parse_quantifier(line,sep) # parse_value(line) # parse_nv(line) # parse_nrel(line,option) # parse_relation(line,sep) # parse_hierarchy(line,sep) #--------------------------------------------# #--------------------------------------------# #--------------------------------------------# procedure is_null_symbol(x) #========================== if type(x)=="SYMBOL" & x.stype=="null" then { return x } else { fail } end #===================================================================# #===================================================================# # KFORMAT == "ku" # SYMBOL(stype,svalue) #===================================================================# # SYMBOL procedure m_proplist() #===================== # proplist ::= # proposition # proposition proplist static PROPLIST initial { PROPLIST := TOKEN("w","propositionlist") } yystate := "m_proplist" suspend \ SYMBOL("prop",[m_proposition()]) | SYMBOL("proplist",[m_proposition()] ||| [m_proplist()] ) #####| #####SYMBOL("grammar hint",match_token(PROPLIST)) end # SYMBOL procedure m_proposition() #======================== # propname ::= phrase # proposition ::= # propname ; # propname :: proposition # xml_version # xml_doctype # rdf_namespace # rdf_end # html_statement # context brace ; # context ; # sentence # does NOT include brace # compound_sentence # includes brace "and" "or" "not" # qword; # "true"|"false"|"&null"|"proposition" # brace ::= # { proplist } static s,PROP initial { s := ";" PROP := TOKEN("w","proposition") } yystate := "m_proposition" suspend \ #m_xml_version() | # <?xml n=v n=v?> #m_xml_doctype() | # <!t n v> #m_rdf_namespace() | # <rdf:RDF n=v ... > #m_rdf_end() | # </rdf:RDF> ###m_html_statement() | # <t n=v /> #####m_iteration() | # for ... { ... } SYMBOL("context sentence",[m_context(),m_pipeline(),=";"]) | SYMBOL("context",[m_context(),=";"]) | SYMBOL("compound sentence",m_compound_sentence()) | SYMBOL("sentence",m_sentence()) #| #####SYMBOL("wordproposition",[m_qword(),=";"]) #| #####SYMBOL("grammar hint",[match_token(PROP),=";"]) ([m_propname(),=s]) end # list procedure m_context() #==================== # context ::= # at nvobject yystate := "m_context" suspend \ [[="A"],m_nvobject()] end # list procedure m_pipeline() #===================== # pipeline ::= brace # brace | pipeline yystate := "m_pipeline" suspend \ [m_brace()] | [m_brace()] ||| [="|"] ||| m_pipeline() end # list procedure m_compound_sentence() #============================== # for now: treat "|" like ";" # NOTE: sentence ::= clause ; # conjunction ::= and | or # compound_sentence ::= # brace ; # "not" brace ; # brace conjunction compound_sentence # "not" brace conjunction compound_sentence # brace ::= # { proplist } yystate := "m_compound_sentence" suspend \ [m_brace(),=";"] | [m_brace(),="|"] | [m_notbrace(),=";"] | [m_notbrace(),="|"] | [m_brace(),m_conjunction()] ||| m_compound_sentence() | [m_notbrace(),m_conjunction()] ||| m_compound_sentence() end # SYMBOL procedure m_conjunction() #======================== # NOTE: relverb includes "J" # conjunction ::= # "and", "or" # "j" # # "&" # "&" # "|" # "|" # pipeline # "iff" # "J" # "implies" # "J" # "like" # "J" # NSM yystate := "m_conjunction" suspend \ #SYMBOL("conjunction",[="&"]) | SYMBOL("conjunction",[="|"]) | SYMBOL("conjunction",[="j"]) | SYMBOL("conjunction",[="J"]) | SYMBOL("conjunction",[="~J"]) end # SYMBOL procedure m_sentence() #===================== # includes terminating semicolon (or ">" for HTML) # does NOT include brace # propname ::= phrase # sentence ::= # propname ; # SENTENCE # TOKEN("w","sentence") # html_statement # simple commands # kustatement # question # command # assignment # production # product := producer # control statement # control statement ::= # conditional # iteration # exit # group # group_begin # group_statement <<== only via new parse_file() # ... # group_end static s,SENTENCE initial { s := ";" SENTENCE := TOKEN("w","sentence") } yystate := "m_sentence" suspend \ m_exit() | # exit; break m_command() | # do ... of out with od from to done; m_conditional() | # if then else fi m_quantification() | # for quantifier m_iteration() | # every while until when m_assignment() | # let name = value; return suspend m_group_begin() | # begin gtype gname ; m_group_end() | # end gtype gname ; m_production() | # product := sentence m_nameprop() | # name :: proposition m_ku_statement() | # subject verb object [pplist] [done] ; ##m_ho_statement() | # phrase ; ##m_nrel_statement() | # nest ; ([m_propname(),=s]) # propname ; #m_question() | # embedded "?" end #----------------------------------------------# # group # hierarchy,relation # triple,mcf,rdf,owl,... # list procedure m_group(kformat) #========================= yystate := "m_group" suspend \ m_group_begin() | m_group_end() | m_group_statement(kformat) end # GROUP procedure m_group_begin() #======================== # group_begin ::= # begin gtype gname ; # parsed with kformat=ku yystate := "m_group_begin" suspend \ GROUP("begin",[="Y",m_gtype(),m_phrase(),=";"]) end # GROUP procedure m_group_end() #====================== # Note: ";" is parsed at this level # group_end ::= # end gtype gname ; # parsed with kformat= ho|nrel|... yystate := "m_group_end" suspend \ GROUP("end",[="Z",m_gtype(),m_phrase(),=";"]) end # GROUP procedure m_group_tail(kformat) #============================== # group_tail ::= # m_group_end # m_group_statement /kformat := "ku" yystate := "m_group_tail" suspend \ m_group_end() | m_group_statement(kformat) end # string procedure m_gtype() #================== yystate := "m_gtype" suspend \ ="w" # other word end # list procedure m_gslist() #=================== # gslist ::= # group_statement # group_statement gslist yystate := "m_gslist" suspend \ [m_group_statement()] | [m_group_statement()] ||| m_gslist() end # GROUP procedure m_group_statement(kformat) #=================================== # group_statement ::= # subject ; # allow null and nested list in relation yystate := "m_group_statement" suspend \ case kformat of { default: { fail } "ho": { GROUP("ho", [m_ho_statement()]) } "nrel": { GROUP("nrel",[m_nrel_statement()]) } } end #===================================================================# #===================================================================# # hierarchy # KFORMAT == "ho" # SYMBOL("ho",svalue) #===================================================================# #===================================================================# # GROUP procedure m_ho_statement() #========================= suspend \ GROUP("ho",[m_phrase(),=";"]) end #===================================================================# #===================================================================# # relation # KFORMAT == "nrel" # SYMBOL("nrel",svalue) #===================================================================# #===================================================================# # GROUP procedure m_nrel_statement() #=========================== suspend \ GROUP("nrel",[m_phraselist(),=";"]) end #----------------------------------------------# #----------------------------------------------# #----------------------------------------------# # SYMBOL procedure m_exit() #================= # exit; # exit KE # break; # exit every,while,until,when yystate := "m_exit" suspend \ SYMBOL("exit",[="z",=";"]) end # SYMBOL procedure m_conditional() #======================== # conditional ::= # "if" proplist "then" proplist "else" proplist "fi;" # "if" proplist "then" proplist "fi;" # "if" proplist "else" proplist "fi;" # "if" proplist "fi;" # "either" brace "or" brace "or" ...; # "all" brace "and" brace "and" ...; yystate := "m_conditional" suspend \ SYMBOL("itef",[m_IF(),m_THEN(),m_ELSE(),m_FI()]) end # SYMBOL procedure m_IF() #=============== yystate := "m_IF" suspend \ SYMBOL("if",[="I",m_proplist()]) end # SYMBOL procedure m_THEN() #=============== yystate := "m_THEN" suspend \ SYMBOL("then",[="T",m_proplist()]) | SYMBOL("then",[]) end # SYMBOL procedure m_ELSE() #=============== yystate := "m_ELSE" suspend \ SYMBOL("else",[="E",m_proplist()]) | SYMBOL("else",[]) end # SYMBOL procedure m_FI() #=============== yystate := "m_FI" suspend \ SYMBOL("fi",[="F",=";"]) end ###### SYMBOL #####procedure m_FI() ######=============== #####yystate := "m_FI" #####suspend \ ##### SYMBOL("fi",[="F",=";"]) | ##### SYMBOL("thenelse",[="T",m_proplist(),="E",m_proplist(),="F",=";"]) | ##### SYMBOL("then",[="T",m_proplist(),="F",=";"]) | ##### SYMBOL("else",[="E",m_proplist(),="F",=";"]) #####end # SYMBOL procedure m_iteration() #====================== # NOTE: generator ::= variable verb object ";" # iteration ::= # "every" generator "{" proplist "};" <== old format # "while" proposition "{" proplist "};" # "until" proposition "{" proplist "};" # "when" proposition "{" proplist "};" <== NOT implemented yystate := "m_iteration" suspend \ SYMBOL("every",[ ="G",m_generator(),m_brace(),=";" ]) | SYMBOL("every",[ ="I",m_proposition(),m_brace(),=";" ]) end # SYMBOL procedure m_quantification() #=========================== # NOTE: generator ::= variable verb object ";" # quantification ::= # "for" Q generator "{" proplist "};" <== new format yystate := "m_quantification" suspend \ SYMBOL("for", [ ="g",="Q",m_generator(),m_brace(),=";" ]) end # list procedure m_generator_list() #=========================== yystate := "m_generator_list" suspend \ [m_generator] | [m_generator] ||| m_generator_list() end # list procedure m_for() #================ yystate := "m_for" suspend \ [="g"] end # list procedure m_quantifier() #======================= yystate := "m_quantifier" suspend \ [="Q"] end # SYMBOL procedure m_generator() #====================== # generator ::= # variable genverb object; # proposition format # variable pplist; # qualified variable # pplist ::= # from initial to final # is integer from ... to ... # of gdbmtable # is index of array yystate := "m_generator" suspend \ SYMBOL("generator",[m_qword(),m_pplist(),=";"]) | SYMBOL("generator",[m_qword(),m_genverb(),m_object(),=";"]) end #------------------------------------------------------------------- # SYMBOL procedure m_assignment() #======================= # "let nvobject" literally means "ke has nvobject" # assignment ::= # let nvobject # vlet nvobject # all views # unlet nvobject # return nvobject # suspend nvobject yystate := "m_assignment" suspend \ SYMBOL("assign",[="r",m_phrase(),="S",m_phrase(),=";"]) | SYMBOL("assign",[="r",m_phrase(),=";"]) end # string procedure m_nvop() #================= # for get_symbol() # nvop ::= # = | += | -= | *= yystate := "m_nvop" suspend \ ="S" end # SYMBOL or string procedure m_value() #================== # value ::= # angle <htxt> RDF-style comment # phrase includes quotes # bracket set hfocus = [concept, ...] # brace x has meaning = { proplist } for method,relation # ? set parameter = ? <= NO - in qword yystate := "m_value" suspend \ #####m_html_statement() | SYMBOL("angle",m_ANGLE()) | m_bracket() | m_brace() | m_phrase() end # string procedure m_kedo() #================= # kedo ::= # "do" # ke do command pplist done # "!" # sh do command pplist done yystate := "m_kedo" suspend \ ="D" | ="!" end # SYMBOL procedure m_command() #==================== # command ::= # do cmdlist done; # do cmdlist pplist done; # do diverb pplist done; # isd,isi # vdo exec od {proposition list} done; # do ::= # do # current view # vdo # all views # ! # sh command # # "do command ... done;" # literally means "ke do command ... done;" # # NOTE: # relation,hierarchy blocks are # control structures not commands yystate := "m_command" suspend \ #SYMBOL("Dcd",[m_do(),m_brace(),m_done()]) | #SYMBOL("Dcd",[m_do(),m_proplist(),m_done()]) | SYMBOL("Dcd",[m_do(),m_object(),m_done()]) | SYMBOL("Dcpd",[m_do(),m_object(),m_pplist(),m_done()]) #| ##SYMBOL("Dcpd",[m_do(),m_nvobject(),m_pplist(),m_done()]) end # SYMBOL procedure m_ku_statement() #========================= # Feb/25/2007 # subject HAS verbphrase list; # Mar/28/2003 consider using nvobject for all objects # (old objects will be nvnulllist) # kustatement ::= # wnunit; # WordNet definition # subject exists; # existence # subject is object; # identity # subject is object pplist; # definition # subject inrel nvobject; # n-ary relation # subject inho nvobject; # ternary relation # subject brel nvobject; # binary relation # subject urel nvobject; # unary relation # subject has nvobject; # attribute # subject do nvobject done; # action # subject do nvobject pplist done; # action # subject rel relverb object, ... ; # binary relation # subject relverb object; # binary relation # subject haspart partlist; # part # partlist isapart object; # part # verb verb verb; # verb properties # relverb ::= # isa isa* isa**n iss iss* isu isu* # isc isc* isc**n isg isg* isp isp* # isd isi # nrel trel brel urel # "L" # causes because # "R" # isapart haspart # "H" # ... # pplist ::= # NULL # pp # pp pplist # pp ::= # prep nvlist # prep {proposition list} # prep ::= # of # with # out # od # from # to yystate := "m_ku_statement" suspend \ SYMBOL("wnunit",m_wnunit()) | SYMBOL("sVo", [m_subject(),m_verb(), m_object(),[=";"]]) | SYMBOL("sVop", [m_subject(),m_verb(), m_object(),m_pplist(),[=";"]]) | SYMBOL("sDad", [m_subject(),m_doverb(), m_object(),m_done()]) | SYMBOL("sDapd",[m_subject(),m_doverb(), m_object(),m_pplist(),m_done()]) | # SYMBOL("sVo", [m_subject(),m_verb(), m_nvobject(),[=";"]]) | # SYMBOL("sDad", [m_subject(),m_doverb(), m_nvobject(),m_done()]) | # SYMBOL("sDapd",[m_subject(),m_doverb(), m_nvobject(),m_pplist(),m_done()]) | #SYMBOL("gs", [m_subject(),=";"]) | #SYMBOL("tail",m_group_tail()) | # group_end | group_statement SYMBOL("vvv", [m_verb(),m_verb(),m_verb(),=";"]) | SYMBOL("vvv", [m_verb(),m_verb(),m_phrase(),=";"]) | SYMBOL("vvv", [m_phrase(),m_verb(),m_verb(),=";"]) end # SYMBOL procedure m_question() #===================== # question ::= # assignment qassignment (internal "?") # command qcommand (internal "?") # statement qstatement (internal "?") # if statement fi; qstm # statement "?" qstm <<<=== OBSOLETE !!! yystate := "m_question" suspend \ SYMBOL("qstm",[m_ku_statement(),m_qmark()]) end #=================================================================== #=================================================================== #=================================================================== # basic stuff: # pvalue # only excludes is,has,do,... # qword # includes quote # nvphrase, nvlist # prepphrase, pplist # partphrase, partlist # htmlword, html_phrase, html_command, html_group # # qword isa string # nvphrase isa SYMBOL; nvlist isa list # prepphrase isa SYMBOL; PPLIST isa list; pplist isa SYMBOL # partphrase isa SYMBOL; partlist isa list # htmlword isa string # HTMLPHRASE isa list; html_phrase isa list ################ Mar/6/2003 new simple phrase definition ################### # list procedure m_phraselist(sep) #========================== # phraselist ::= # phrase # phrase , phraselist /sep := ListSeparator # default "," set by init_word() in token.icn yystate := "m_phraselist" suspend \ ([m_phrase()]) | ([m_phrase(),=sep] ||| m_phraselist()) end # PHRASE procedure m_phrase() #=================== # phrase ::= # lphrase # "" # <<== do NOT like this # use lphrase not sphrase # NOTE: sphrase isa string (token.icn) # lphrase isa list (symbol.icn) yystate := "m_lphase" suspend \ PHRASE(m_lphrase()) # | # blank-separated words # PHRASE(="") # null string (for relations) end # list procedure m_lphrase() #==================== # do NOT use pword # allow and,or,not # phrase ::= # qword # qword lphrase # yystate := "m_lphrase" suspend \ ([m_qword()]) | ([m_qword()] ||| m_lphrase()) end ################ Mar/6/2003 new simple phrase definition ################### ################ Aug/13/2005 begin variable arrays ################### # list procedure m_aaphrase() #===================== # name[char][i] syntax for concept characteristics # (associative array notation) yystate := "m_aaphrase" suspend \ [AAPHRASE(m_phrase(),m_index_list())] end # list procedure m_arrayval() #===================== # con[char][i] := arrayval; # arrayval ::= # phrase ; yystate := "m_arrayval" suspend \ ###SYMBOL("arrayval", [m_bracket(),=";"]) | # con[char] := [phraselist] [m_phrase(),=";"] # con[char][i] := phrase end # list procedure m_arrayref() #===================== # value := con[char][i]; # arrayref ::= # aaphrase ; yystate := "m_arrayref" suspend \ #SYMBOL("arrayref",[m_phrase(),m_index(),=";"]) [m_aaphrase(),=";"] end # list procedure m_arrayset() #===================== # con[char][i] := value; # arrayset ::= # aaphrase yystate := "m_arrayset" suspend \ #SYMBOL("arrayset",[m_phrase(),m_index()]) [m_aaphrase()] end # SYMBOL procedure m_variable() #===================== # variable ::= # name # name[i] # aaphrase yystate := "m_variable" suspend \ #SYMBOL("arrayref",[m_qword(),m_index()]) | #SYMBOL("variable",m_qword()) SYMBOL("variable",m_aaphrase()) | SYMBOL("variable",m_phrase()) end # list procedure m_index_list() #======================= # index_list ::= # index # index index_list yystate := "m_index_list" suspend \ [m_index()] | [m_index()] ||| m_index_list() end # SYMBOL procedure m_index() #================== # index ::= [char] # serious problem: BSE "unparses" index -- when,how ??? # I need to go back and try BSE again # problem seems to be -- must wrap list around record when parsing yystate := "m_index" suspend \ SYMBOL("index",[="[",m_phraselist(),="]"]) | SYMBOL("index",[="[[",m_phraselist(),="]]"]) ###SYMBOL("index",m_bracket()) ###SYMBOL("index",m_BSE(",",="[",m_phraselist(),="]")) end ################ Aug/13/2005 end variable arrays ################### # list procedure m_nvlist() #=================== # nvlist ::= # nvphrase # nvphrase sep nvlist static sep initial sep := "," yystate := "m_nvlist" suspend \ ( [m_nvphrase()] ) | ( [m_nvphrase()] ||| [=sep] ||| m_nvlist() ) end # list procedure m_nvphrase_list() #========================== # sep ::= " " yystate := "m_nvphrase_list" suspend \ [m_nvphrase()] | [m_nvphrase()] ||| m_nvphrase_list() end # NVPHRASE procedure m_nvphrase() #===================== # nvphrase ::= # variable nvop value # variable yystate := "m_nvphrase" suspend \ NVPHRASE("nv",[m_phrase(),m_nvop(),m_value()]) | NVPHRASE("nvnull",[m_phrase()]) end #------------------------------------------------------------------- #------------------------------------------------------------------- # word phrase # string procedure m_qword() #================== # excludes most MKR special words (is,has,do, ...) # # qword ::= # e exists # C rdfs:Class # p rdf:Property # $ dollar variable <== not separator # / hierarchy <== not separator # . dot variable <== not separator # ? form-based question <== not separator # Q quantifier (check semantics later) # w includes quotes # x includes +-*: # A at <== OK only required at beginning of proposition # g group: hierarchy, relation, etc. # S set,vset,unset # s t v space,time,view # h r m hierarchy,relation,method # u unit # a angle <...> # b paren ( ... ) # hlist qword HTML command # + - * arithmetic operator # & | ~ logical operator (not used for logic) # j and,or # N not,non yystate := "m_qword" suspend \ #="G" | # for ###m_dollarword() | # includes embedded separator form ${w} ="$" | # not separator ="/" | # not separator, but distinguish use in HTML ="." | # not separator ="a" | # angle <...> ="b" | # paren (...) ="Q" | # quantifiers: a,some,the,any,all,either, no ="j" | # conjunctions: and,or ="N" | # complements: not,non ="?" | # question variable ="w" | # ordinary words (includes quotes "..." and '...') ="x" | # extended words (includes +-*:) #="A" | # at #="S" | # = ="r" | # return, suspend, let ="B" # BinaryRelation end # string procedure m_dollarword() #======================= yystate := "m_dollarword" suspend \ ="$"||m_qword() | ="$"||="{"||m_qword()||"}"||m_qword() | ="$"||="{"||m_qword()||"}" end # string procedure m_dword() #================== yystate := "m_dword" suspend \ m_qword() | m_dollarword() end procedure m_pword() #================== # exclude all MKR special words static pvalue initial pvalue := &cset -- SEPARATOR -- 'iR' -- # is,isa,iss,isu,isd,... 'HpS' -- # has,haspart,nrel,set,unset,return 'Dd' -- # do,done 'AP' -- # at,out,of,with,od,from,to 'ITEF' -- # if,then,else,fi 'G' -- # every,when,while 'C' -- # causes 'YZ' -- # begin,end 'z' # exit,break yystate := "m_pword" suspend \ any(pvalue) end #------------------------------------------------------------------- #------------------------------------------------------------------- #------------------------------------------------------------------- #------------------------------------------------------------------- # preposition phrase # SYMBOL procedure m_pplist() #=================== # important to use SYMBOL for blank-separated list # pplist ::= # prepphrase # prepphrase pplist yystate := "m_pplist" suspend \ SYMBOL("pplist",m_PPLIST()) end # list procedure m_PPLIST() #=================== # important to use SYMBOL for blank-separated list # pplist ::= # pp # pp pplist yystate := "m_PPLIST" suspend \ ( [m_pp()] ) | ( [m_pp()] ||| m_PPLIST() ) end # SYMBOL procedure m_pp() #=============== # NOTE: "at" nvlist is context, not prepphrase # NOTE: allow prep {proposition list} # e.g.: they do say od {...} done # e.g.: do delete od {sentence} done # prepphrase ::= # prep {proplist} # prep nvlist # prep ::= "of" | "with" | "out" | "od" | "from" | "to" # "in" yystate := "m_pp" suspend \ SYMBOL("pp",[[="P"],m_brace()]) | SYMBOL("pp",[[="P"],m_nvlist()]) end #------------------------------------------------------------------- # part phrase # list procedure m_partlist() #===================== # partlist ::= # partphrase # partphrase sep partlist static sep initial sep := "," yystate := "m_partlist" suspend \ #####( [m_partphrase()] ) | #####( [m_partphrase(),=sep]||| m_partlist() ) m_nvobject() end # SYMBOL procedure m_partphrase() #======================= # partphrase ::= # qphrase # qphrase prep qphrase yystate := "m_partphrase" suspend \ SYMBOL("partpp",[m_phrase(),="P",m_phrase()]) | SYMBOL("part", [m_phrase()]) end ###### string #####procedure m_PARTPHRASE() ######======================= ###### important to use SYMBOL for blank-separated phrase ###### insert blank between words ###### requires special treatment in map_symbol() count ###### partphrase ::= ###### qphrase ###### qphrase prep qphrase #####static psep #####initial psep := dequote(PSEPARATOR) # blank #####suspend \ ##### (m_QPHRASE()||psep||="P"||psep||m_QPHRASE()) | ##### (m_QPHRASE()) #####end #=================================================================== #=================================================================== #=================================================================== # basic stuff: # bracket, brace, angle, paren # # bracket: [phrase, ...] # brace: {proposition; ...} # angle: <HTML command> # paren: (anything) # SYMBOL procedure m_bracket() #==================== # bracket ::= # [ phraselist ] # [ nvphrase_list ] # [ ] static sep,size initial { sep := "," size := 0 } yystate := "m_bracket" suspend \ #SYMBOL("bracket",[ ="[", m_phraselist(), ="]"]) #SYMBOL("bracket",m_BSE(",",="[",m_phraselist(),="]")) m_BSE(",",="[",m_phraselist(),="]") | m_BSE(",",="[",m_nvphrase_list(),="]") | m_BSE(",",="[",[],="]") end # SYMBOL procedure m_brace() #================== # brace ::= # { proplist } # { } yystate := "m_brace" suspend \ #SYMBOL("brace",[ ="{", m_proplist(), ="}" ]) #SYMBOL("brace",m_BSE(" ",="{",m_proplist(),="}")) m_BSE(" ",="{",[m_proplist()],="}") | m_BSE(" ",="{",[],="}") end # SYMBOL procedure m_notbrace() #===================== # notbrace ::= "not" brace yystate := "m_notbrace" suspend \ SYMBOL("notbrace",[="N",m_brace()]) end # SYMBOL procedure m_ANGLE() #================== # angle ::= # < htxt > # RDF-style comment # < phraselist > # < html_phrase > yystate := "m_ANGLE" suspend \ #SYMBOL("angle",[="<",m_HTXT(),=">"]) | ###m_BSE(" ",[="<",m_HTXT(),=">"]) | (="a") | (="<>") | #####m_BSE(" ",="<",[m_htxt()],=">") | m_BSE(" ",="<",[],=">") #SYMBOL("angle",[="<",m_htxt(),=">"]) | #SYMBOL("angle",m_html_statement()) ###SYMBOL("angle",[="<",m_phraselist(),=">"]) | ###SYMBOL("angle",[="<",m_HTMLPHRASE(),=">"]) | # m_html_phrase() ??? ###SYMBOL("angle",[="<",[],=">"]) end ##### SYMBOL ####procedure m_paren() #####================== ##### paren ::= ( phraselist ) ####yystate := "m_paren" ####suspend \ #### SYMBOL("paren",[="(",m_phraselist(),=")"]) ####end #=================================================================== #=================================================================== #=================================================================== # basic stuff: # subject, verb, object # SYMBOL procedure m_nameprop() #===================== yystate := "m_nameprop" suspend \ SYMBOL("NameProp",[m_propname(),="L",m_proposition()]) end # list procedure m_propname() #===================== yystate := "m_propname" suspend \ SYMBOL("propname",[m_phrase()]) end # SYMBOL procedure m_production() #======================= # production ::= # product := producer yystate := "m_production" suspend \ SYMBOL("production",[m_product(),="R",m_producer()]) SYMBOL("production",[m_product(),="R",m_object()]) end # list procedure m_product() #==================== # product ::= # arrayset # phrase # phrase pplist # permit x of y yystate := "m_product" suspend \ [SYMBOL("phrase",m_phrase())] | [SYMBOL("phrase_pplist",[m_phrase(),m_pplist()])] | [SYMBOL("arrayset",m_arrayset())] end # SYMBOL procedure m_producer() #===================== # NOTE: includes ";" # producer ::= # sentence # aaphrase # phrase # phrase pplist yystate := "m_producer" suspend \ SYMBOL("producer",SYMBOL("phrase",m_phrase())) | SYMBOL("producer",SYMBOL("phrase_pplist",[m_phrase(),m_pplist()])) | SYMBOL("producer",SYMBOL("sentence",[m_sentence()])) | SYMBOL("producer",SYMBOL("arrayref",[m_arrayref()])) | SYMBOL("producer",SYMBOL("arrayval",[m_arrayval()])) #| ###SYMBOL("producer",SYMBOL("context",[m_context(),=";"])) | ###SYMBOL("producer",SYMBOL("proposition",[m_proposition()])) end # SYMBOL procedure m_subject() #==================== # brackets permitted # verb etc. permitted # subject ::= # phrase pplist # phraselist # includes and,or,not # bselist # [phraselist] or {propositionlist} # bracket # [phraselist] # brace # {propositionlist} # angle # <anything> # partlist # includes preposition # variable static sep initial sep := "" yystate := "m_subject" suspend \ SYMBOL("subject",m_bracket()) | SYMBOL("subject",m_brace()) | SYMBOL("subject",m_ANGLE()) | SYMBOL("subject",m_phraselist()) | SYMBOL("subject",SYMBOL("phrase_pplist",[m_phrase(),m_pplist()])) | SYMBOL("subject",[m_variable()]) | ##SYMBOL("subject",[m_relverb()]) | # cyc verb SYMBOL("subject",[="G"]) | # cyc forall SYMBOL("subject",[="Q"]) # cyc Quantifier end # string procedure m_qmark() #================== yystate := "m_qmark" suspend \ ="?" end # SYMBOL procedure m_object() #=================== # brackets permitted # object ::= # bracket # [phraselist] # brace # {propositionlist} # angle # <anything> # nvlist static sep initial sep := "," yystate := "m_object" suspend \ SYMBOL("object",m_bracket()) | SYMBOL("object",m_brace()) | SYMBOL("object",m_ANGLE()) | SYMBOL("object",m_nvlist()) | ##SYMBOL("object",[m_relverb()]) | # cyc verb SYMBOL("object",[="G"]) | # cyc forall SYMBOL("subject",[="Q"]) # cyc Quantifier end # SYMBOL procedure m_nvobject() #===================== # brackets permitted # nvobject ::= # nvlist # bracket # [nvlist] # brace # {propositionlist} # angle # <anything> static sep initial sep := "," yystate := "m_nvobject" suspend \ SYMBOL("nvobject",m_bracket())| SYMBOL("nvobject",m_brace()) | SYMBOL("nvobject",m_ANGLE()) | SYMBOL("nvobject",m_nvlist()) end # list procedure m_cmdobject() #====================== # cmdobject ::= # phraselist yystate := "m_cmdobject" suspend \ m_phraselist() end # SYMBOL procedure m_verb() #================= #####static VERBhint initial { VERBhint := TOKEN("R","verb") } yystate := "m_verb" suspend \ SYMBOL("verb",="V") | SYMBOL("verb",="D") end # SYMBOL procedure m_doverb() #=================== # doverb ::= # "do" | "can" | "hdo" | "vdo" | "ido" # x do action pplist done # not doverb # "!" # sh do command pplist done yystate := "m_doverb" suspend \ SYMBOL("verb",="ND") | SYMBOL("verb",="D") | SYMBOL("verb",="!") end # SYMBOL procedure m_relverb() #==================== # relverb ::= # hop # "not" hop # binaryrelation # "causes" # #####conj # <<== not a verb! # ? #####static VERBhint initial { VERBhint := TOKEN("R","verb") } yystate := "m_relverb" suspend \ #####SYMBOL("verb",="N"||m_hop()) | #####SYMBOL("verb",m_hop()) | SYMBOL("verb",="B") | SYMBOL("verb",="C") | SYMBOL("verb",="J") | #####SYMBOL("verb",="j") | SYMBOL("verb",="~J") ##| ##SYMBOL("verb",m_qmark()) #####| #####SYMBOL("grammar hint",match_token(VERBhint)) end # SYMBOL procedure m_genverb() #==================== # subject genverb object # verb ::= "isa"|"isu" | "ismem"|"isalt" | "nrel"|"trel"|"brel"|"urel" #####static VERBhint initial { VERBhint := TOKEN("R","verb") } yystate := "m_genverb" suspend \ SYMBOL("verb",="V") #####SYMBOL("verb",="i") | #####SYMBOL("verb",="R") | #####SYMBOL("verb",="H") #####| # nrel #####SYMBOL("grammar hint",match_token(VERBhint)) end ###### SYMBOL #####procedure m_partverb() ######===================== ###### partverb ::= haspart | isapart #####suspend \ ##### SYMBOL("verb",="H") #####end #------------------------------------------------------------------- # string procedure m_hop() #================ # hierarchy verbs # hop ::= # "RG" e.g. "is either","is all","is any" <<== NO !!! # "RQ" e.g. "is a","is some","is the","is no" <<== NO !!! # "lw" e.g. "isa**n" # "l?" e.g. "isa**?" # "R" e.g. "isa*","isa+", "isc", ... # "R" e.g. "ismem","isall", ... # "R" e.g. "isalt","isany", ... # "i" e.g. "is","isd","isi","isa","isu" yystate := "m_hop" suspend \ ="V" | ="Vw" | ="V?" ##### ="R" | ="lw" | ="l?" | ="i" end #------------------------------------------------------------------- # string procedure m_do() #=============== # NO optional ";" before "do" # do ::= # do # action # can # "power" to do action # hdo # hierarchy walk # vdo # all views # ido # interaction # ! # sh command # out product do yystate := "m_do" suspend \ ="D" | ="!" | m_pplist() || m_do() end # string procedure m_done() #================= # NO optional ";" before "done" # ';' without 'done' for error recovery yystate := "m_done" suspend \ ="d;" | =";" end # string procedure m_then() #================= # NO optional ";" before "then" yystate := "m_then" suspend \ ="T" end # string procedure m_else() #================= # NO optional ";" before "else" yystate := "m_else" suspend \ ="E" end # string procedure m_fi() #================= # NO optional ";" before "fi" yystate := "m_fi" suspend \ ="F;" end #===================================================================# #===================================================================# # HTML command # html_command ::= <htag harg> | <htag> # html_group ::= htxt </htag> #===================================================================# #===================================================================# # new simple code # SYMBOL procedure m_xml_version() #======================== # <?xml version="1.0" encoding="UTF-8"?> yystate := "m_xml_version" suspend \ SYMBOL("xml_version",[ ="<?", ="w=",m_qword(), ="w=",m_qword(), ="?>"]) end ###### SYMBOL #####procedure m_rdf_namespace() ######========================== ###### <rdf:RDF n=v ... > #####yystate := "m_rdf_namespace" #####suspend \ ##### SYMBOL("rdf_namespace",[ ##### ="<w", ##### m_harg(), ##### =">"]) #####end ##### ###### SYMBOL #####procedure m_rdf_end() ######==================== ###### </rdf:RDF> ###### NOTE: / is not SEPARATOR #####yystate := "m_rdf_end" #####suspend \ ##### SYMBOL("rdf_end",[="<",="/",="w",=">"]) #####end ##### ###### list #####procedure m_namespace_list() ######=========================== #####yystate := "m_namespace_list" #####suspend \ ##### [m_namespace()] | ##### [m_namespace()] ||| m_namespace_list() #####end ##### ###### NVPHRASE #####procedure m_namespace() ######====================== #####yystate := "m_namespace" #####suspend \ ##### NVPHRASE("nv",[="w",="S",m_qword()]) #####end ###### SYMBOL #####procedure m_xml_doctype() ######======================== ###### <!DOCTYPE name [ entity_list ]> #####yystate := "m_xml_doctype" #####suspend \ ##### SYMBOL("DOCTYPE",[="<",="M",m_qword(), ##### ="[",m_xml_entity_list(),="]", ##### ###m_BSE(" ",="[",m_xml_entity_list(),="]"), ##### =">"]) #####end ##### ###### list #####procedure m_xml_entity_list() ######============================ #####yystate := "m_xml_entity_list" #####suspend \ ##### [m_xml_entity()] | ##### [m_xml_entity()] ||| m_xml_entity_list() #####end ##### ###### SYMBOL #####procedure m_xml_entity() ######======================= ###### <!ENTITY name uri> #####yystate := "m_xml_entity" #####suspend \ ##### SYMBOL("ENTITY",[="<",="M",m_qword(),m_qword(),=">"]) #####end ##### ##### ###### SYMBOL #####procedure m_html_comment() ######========================= ###### html_comment ::= ###### <!-- text --> #####yystate := "m_html_comment" #####suspend \ ##### SYMBOL("html_comment",[="<!--",m_htxt(),="-->"]) #####end #================================================================# #================================================================# #================================================================# # nested SYMBOL # SYMBOL procedure find_stype(pattern,symbol) #=================================== local sym,x yystate := "find_stype" case type(symbol) of { default: { fail } ("list"|"set"): { every sym := !symbol do if x := find_stype(pattern,sym) then return x } "SYMBOL": { if DEBUG=="STYPE" then write(mybug,"# INFO: find_stype: pattern <",pattern,"> stype <",symbol.stype,">") if symbol.stype == pattern then return symbol else return find_stype(pattern,symbol.svalue) } } end # string procedure get_stype(symbol) #========================== case type(symbol) of { default: { fail } "list": { return get_stype(symbol[1]) } "SYMBOL": { return symbol.stype } } end # list procedure get_svalue(symbol) #=========================== case type(symbol) of { default: { fail } "list": { return get_svalue(symbol[1]) } "SYMBOL": { return symbol.svalue } } end #===========================================================# #===========================================================# #===========================================================# # parsing # SYMBOL list procedure get_symbol(fd,tok,kformat) #=================================== # tok ::= tokenlist local firstword local saveKFORMAT local merrmode local t,efd local tline,tsymbol,symbol local m_symbol,tsym local KHRV,prog local line local ke_kformat,param_kformat local info,ierror,warning,error,serror static SaveStack initial { statestk := list(1) /SaveStack := [] } /kformat := KFORMAT yystate := "get_symbol" case kformat of { "ho": { return parse_ho(tok) } # ho.icn "nrel":{ return parse_nrel(tok) } # nrel.icn default:{ } } push(SaveStack,KFORMAT) # KHRV := kformat||HOSEPARATOR||RELSEPARATOR||NVSEPARATOR prog := "get_symbol("||kformat||"): " info := "INFO: "||prog warning := "WARNING: "||prog error := "ERROR: "||prog ierror := "Internal ERROR: "||prog serror := "Syntax ERROR: " #if DEBUG == ("READ"|"LINE"|"WORD"|"TOKEN"|"SYMBOL") then writes_type(mylog,tok,info||"tokenlist") case t := type(tok) of { default: { writes_type_all(tok,ierror||"unexpected type tok") #return [] fail } "list": { if *tok = 0 then { writes_type_all(tok,ierror||"empty list tok") #return [] fail } } # continue below "TOKEN": { tok := [tok] } # continue below } #####if *tok = 1 & tok[1].ttype==("X"|"W") then { ##### return SYMBOL("error",tok[1].tvalue) #####} ke_kformat := unparse(get_char("attr","ke","kformat")) param_kformat := KFORMAT if DEBUG=="KFORMAT" then { writes_all([TypeComment||info, "ke_kformat<"||ke_kformat||"> ", "param_kformat<"||param_kformat||"> ", "kformat<"||kformat||">"]) } KFORMAT := kformat #####case kformat of { case KFORMAT of { #"mkr": { m_symbol := m_proplist } #"ku": { m_symbol := m_proplist } #"cu": { m_symbol := m_proplist } #"ho": { m_symbol := m_proplist } #"nrel": { m_symbol := m_proplist } "mkr": { m_symbol := m_proposition } "ku": { m_symbol := m_proposition } #"cu": { m_symbol := m_proposition } #"mkr": { m_symbol := m_ku_statement } #"ku": { m_symbol := m_ku_statement } #"cu": { m_symbol := m_ku_statement } #"ho": { m_symbol := m_group } #"nrel": { m_symbol := m_group } #"tail": { m_symbol := m_group } "ho": { m_symbol := m_group_tail } #"nrel": { m_symbol := m_group_tail } "tail": { m_symbol := m_group_tail } "nrel": { m_symbol := m_nest } # allow null phrase, embedded list #"hotail":{ m_symbol := m_ho_tail } #"nreltail":{ m_symbol := m_nrel_tail } "nv": { m_symbol := m_nvphrase } "list": { m_symbol := m_nest } # allow null phrase, embedded list "wordnet": { m_symbol := m_wnunit } # wordnet definition #"": { m_symbol := m_proplist } default: { writes_type(myerr,kformat,warning||"unknown kformat") writes_type(mylog,kformat,warning||"unknown kformat") kformat := "ku" m_symbol := m_proplist } } # end case kformat # no special meaning for words in HO and REL if #####(*CurrentGroup > 0) | (kformat == ("ho"|"hounit"|"nrel")) then tok := horel_token(tok) # token.icn tline := "" tsymbol := [] symbol := [] every t := !tok do { tline ||:= t.ttype put(tsymbol,t.tvalue) put(symbol,[t.tvalue]) # each token a list (if no match below) } if DEBUG == ("TLINE"|"TSYMBOL"|"TOKEN"|"SYMBOL") then writes_type_all(tline,info||"input tline") #################################################### #&trace := 10 symbol := SYMBOL("serror",symbol) if tline ? { ( tsym <- m_symbol(kformat) ) & pos(0) } then { # match if DEBUG == ("TLINE"|"TSYMBOL"|"TOKEN"|"SYMBOL") then writes_type_all(tsym,info||"parse result tsym") symbol := map_symbol(tsym,tok,*tok) if DEBUG == ("TLINE"|"TSYMBOL"|"TOKEN"|"SYMBOL") then writes_type_all(symbol,info||"parse result symbol") } else { # no match if DEBUG == ("TLINE"|"TSYMBOL"|"TOKEN"|"SYMBOL") then writes_type_all(symbol,info||"parse error symbol") fName := yyfnametab[image(FD)] # myio.icn yylineno := yylinenotab[image(FD)] # myio.icn yytext := unparse(tok," ") # unparse.icn yystate := string(yystate) # name of matching program yychar := string(tline) # token list KERROR +:= 1 SERROR +:= 1 merrmode := getenv("merrmode") | "ke" if DEBUG=="MERR" then { writes_type_all(merrmode,info||"merrmode") } case merrmode of { "merr": { # setup using file "err.state" # make command: merrmode=merr merr ke yystate := forcequote(yystate) yychar := forcequote(yychar) efd := open("err.state","w") # Merr setup file write(efd,"(",yystate,";",yychar,")") # yyerror() uses &errout close(efd) } ("ke"|"ksc"): { # run-time using yyerror(s) } default: { } } #======================================# # recover from parser "grammar" errors # #======================================# symbol := syntax_error(kformat,line,symbol,"error") case type(symbol) of { default: { writes_type_all(symbol,ierror||"unexpected type symbol") } "GROUP": { KERROR -:= 1 } "SYMBOL": { case symbol.stype of { "gs": { KERROR -:= 1 } "wordnet": { KERROR -:= 1 } ("serror"|"error"): { writes(&errout,"# "||serror) statestk[1] := yystate yyerror("syntax error") # yyerror.icn (generated by Merr) ##yyerror("syntax error("||kformat||")") # yyerror.icn (generated by Merr) } } # end case stype } } # end case stype if DEBUG==("SYMBOL"|"TOKEN"|"PARSE") then { writes_type_all(yychar,info||"tokenlist") } #####case kformat of { case KFORMAT of { default: { # reset all completion counters NEWcomplete("_restart_") } ##### "ho": { ##### NEWcomplete("_restart_") ##### if type(symbol) ~== "GROUP" then ##### symbol := GROUP("hostm",symbol) ##### writes_type_all(symbol,info||"symbol") ##### } ##### "nrel": { ##### NEWcomplete("_restart_") ##### if type(symbol) ~== "GROUP" then ##### symbol := GROUP("nrelstm",symbol) ##### writes_type_all(symbol,info||"symbol") ##### } } # end case KFORMAT } # end if tline ? { } #&trace := 0 #################################################### if kformat==("nv") & # do htm,html as ku *symbol=1 & *symbol[1]=1 then { writes_type(mylog,symbol,ierror||"null symbol") put(symbol[1],"") # [[name],[""]] } #if DEBUG==("READ"|"LINE"|"WORD"|"TOKEN"|"SYMBOL") then writes_type(mylog,symbol,info||"symbol") KFORMAT := pop(SaveStack) return symbol end # SYMBOL or list or string procedure map_symbol(tsym,tok,finish) #==================================== # tsym ::= SYMBOL # tok ::= list of TOKEN local head,middle,tail local s,t,x,y,z,val local L,size,B,S,E local mL,dL static itoken, ntoken,ssize local sline,tline,wline local lookfor,found,ufound static info,ierror,warning,hint static SaveStack initial { /SaveStack := [] info := "INFO: map_symbol: " ierror := "Internal ERROR: map_symbol: " warning := "WARNING: map_symbol: " hint := "grammar HINT: map_symbol: " itoken := 0 ntoken := 0 } ##push(SaveStack,KFORMAT) if \finish then { # initialize ntoken := finish itoken := 0 if DEBUG == "MAP" then { writes_type(mybug,tsym,info||"tsym") writes_type(mybug,tok,info||"tsym") writes_type(mylog,tsym,info||"tsym") writes_type(mylog,tok,info||"tsym") } # check size if (ssize := tsize(tsym)) ~= (*tok) then { head := TypeComment||ierror||"token size mismatch: tsize(tsym) <" middle := "> *tok <" tail := ">\n" writes(myerr,head,ssize,middle,*tok,tail) writes(mylog,head,ssize,middle,*tok,tail) if DEBUG=="TOKEN" then { writes_type(myerr,tsym,info||"tsym") writes_type(myerr,tok,info||"tok") writes_type(mylog,tsym,info||"tsym") writes_type(mylog,tok,info||"tok") } else { sline := unparse(tsym) tline := token_type(tok) wline := token_value(tok) writes_type(myerr,sline,info||"symbol line") writes_type(myerr,tline,info||"token line") writes_type(myerr,wline,info||"word line") writes_type(mylog,sline,info||"symbol line") writes_type(mylog,tline,info||"token line") writes_type(mylog,wline,info||"word line") } } } case t := type(tsym) of { "TOKEN": { # match_token() output - match tvalue itoken +:= 1 lookfor := tsym.tvalue found := tok[itoken].tvalue ###if found == lookfor then { ### writes_all([TypeComment||hint,lookfor]) ###} else { ### writes_all([TypeComment||hint,found]) ###} ###ufound := unique_name(found) ###writes_all([TypeComment||"generic name <",found, ### "> replaced by unit name <",ufound,">\n"]) #####return ufound return found } "null": { writes_type(myerr,tsym,ierror||"unexpected null: tsym") writes_type(mylog,tsym,ierror||"unexpected null: tsym") return "" # map to empty string } default: { writes_type(myerr,tsym,ierror||"unexpected type("||t||") tsym") writes_type(mylog,tsym,ierror||"unexpected type("||t||") tsym") return "(UNEXPECTED TYPE("||t||") "||unparse(tsym)||")" } "string": { s := "" every x := ! tsym do { if x == PSEPARATOR then { s ||:= PSEPARATOR # possibly added during phrase parsing } else { itoken +:= 1 if itoken > ntoken then { val := "<no more tokens>" writes_type(myerr,itoken,ierror||val||": itoken") writes_type(mylog,itoken,ierror||val||": itoken") } else { val := tok[itoken].tvalue } s ||:= val } } # end every x return s } "list": { x := [] every y := ! tsym do { z := map_symbol(y,tok) if type(z)=="string" & z=="," then { case KFORMAT of { ("ho"|"hounit"): { # keep comma #####put(x,z) # (problem: always kformat=ku) } default: { # delete comma } } # end case KFORMAT } else if type(z)=="string" & z==";" then { put(x,z) # keep semicolon (need proposition terminator) } else { put(x,z) } } return x } "SYMBOL": { case tsym.stype of { default: { return SYMBOL(tsym.stype, map_symbol(tsym.svalue,tok)) } #####"htxt": { return htxt_map_symbol(tsym,tok) } } } "PROPOSITION":{ return prop_map_symbol(tsym,tok) } # proplist.icn "NEST": { return nest_map_symbol(tsym,tok) } # nest.icn "PHRASE": { return phrase_map_symbol(tsym,tok) } # symbol.icn "BSE": { return bse_map_symbol(tsym,tok) } # bselist.icn "m_BSE": { return bse_map_symbol(mbse2bse(tsym),tok) } # bselist.icn "NVPHRASE": { return nv_map_symbol(tsym,tok) } # nvlist.icn "VOPHRASE": { return vo_map_symbol(tsym,tok) } # binrel.icn "AAPHRASE": { return aa_map_symbol(tsym,tok) } # array.icn "GROUP": { return group_map_symbol(tsym,tok) } # begin.icn "HOUNIT": { return ho_map_symbol(tsym,tok) } # hwalk.icn "RELUNIT": { return nrel_map_symbol(tsym,tok) } # relation.icn #####"HTMLUNIT": { return html_map_symbol(tsym,tok) } # html.icn #####"HTMLTEXT": { return html_map_symbol(tsym,tok) } # html.icn } # end case t ##KFORMAT := pop(SaveStack) end # integer procedure tsize(symbol) #====================== # count number of tokens in a symbol list # called by map_symbol() in symbol.icn local t,ntoken,x local L,s,B,S,E static info,ierror,slevel initial { info := "INFO: tsize: " ierror := "Internal ERROR: tsize: " slevel := 0 } if DEBUG == "TSIZE" then { writes_type(mybug,symbol,info||"slevel <"||slevel||"> symbol") } slevel +:= 1 case t := type(symbol) of { "TOKEN": { # match_token() output ntoken := tsize(symbol.ttype) } "null": { writes_type(myerr,symbol,ierror||"unexpected null: symbol") writes_type(mylog,symbol,ierror||"unexpected null: symbol") ntoken := 0 } default: { writes_type(myerr,symbol,ierror||"unexpected type("||t||") symbol") writes_type(mylog,symbol,ierror||"unexpected type("||t||") symbol") ntoken := 0 } "string": { ntoken := 0 every x := ! symbol do if x == PSEPARATOR then { # do NOT count (possibly added during phrase parsing) } else { ntoken +:= 1 } } "list": { ntoken := 0 every ntoken +:= tsize(!symbol) } "SYMBOL": { ntoken := tsize(symbol.svalue) } # symbol.icn "NEST": { ntoken := nest_tsize(symbol) } # symbol.icn "PHRASE": { ntoken := phrase_tsize(symbol) } # symbol.icn "BSE": { ntoken := bse_tsize(symbol) } # bselist.icn "m_BSE": { ntoken := bse_tsize(mbse2bse(symbol)) } # bselist.icn "NVPHRASE":{ ntoken := nv_tsize(symbol) } # nvlist.icn "VOPHRASE":{ ntoken := vo_tsize(symbol) } # binrel.icn "AAPHRASE":{ ntoken := aa_tsize(symbol) } # array.icn "GROUP": { ntoken := group_tsize(symbol) } # begin.icn "HOUNIT": { ntoken := ho_tsize(symbol) } # method.icn "RELUNIT": { ntoken := nrel_tsize(symbol) } # relation.icn #####"HTMLUNIT":{ ntoken := html_tsize(symbol) } # html.icn #####"HTMLTEXT":{ ntoken := html_tsize(symbol) } # html.icn } # end case t slevel -:= 1 if DEBUG == "TSIZE" then writes_type(mybug,ntoken,info||"slevel <"||slevel||"> ntoken") return ntoken end #================================================================# #================================================================# # LINE procedure get_line(fd) #===================== local line /fd := FD | myin while line := read(fd) do { if line == ("" | "#") then next suspend line } fail end # WORD list procedure get_wordList(fd,line) #============================== local wlist /fd := FD | myin wlist := [] every wlist ||| get_word(fd,line) return wlist end # TOKEN list procedure get_tokenList(fd,wlist) #================================ local tlist /fd := FD | myin tlist := [] every put(tlist, get_token(wlist)) return tlist end # SYMBOL list procedure get_symbolList(fd,tlist) #================================= local slist /fd := FD | myin slist := [] every slist ||| get_symbol(tlist) return slist end #================================================================# # list procedure parse_nv(line) #======================= # name = value # SYMBOL("nv",[name,nvop,value]) local nvlist,i,name,nvop,value if *line = 0 then return [] if line[1] == "#" then return [] # comment if i := upto('=',line) then { name := trimws(line[1:i]) nvop := line[i] value := trimws(line[i+1:0]) nvlist := [name,nvop,value] } else { nvlist := [line] } return nvlist end # SYMBOL procedure parse_value(line) #========================== # name = value # SYMBOL("nv",[name,nvop,value]) local saveKFORMAT,nvlist,value if *line = 0 then return [] if line[1] == "#" then return [] # comment nvlist := parse_nv("x = "||line) if DEBUG == "VALUE" then writes_type(mybug,nvlist,"INFO: parse_value: nvlist") value := nvlist[3] return value end #--------------------------------------------------# # list procedure parse_quantifier(line,sep) #=================================== # quantifier - word # SYMBOL("nrel",[quantifier,"-",word]) local saveRELSEPARATOR,symbol,tuple static info initial { info := "INFO: parse_quantifier: " } /sep := dequote(QSEPARATOR) if *line = 0 then return [] if line[1] == "#" then return [] # comment saveRELSEPARATOR := RELSEPARATOR; RELSEPARATOR := sep symbol := parse_nrel(line,"list") RELSEPARATOR := saveRELSEPARATOR if DEBUG == "QUANTIFIER" then writes_type(mybug,symbol,info||"symbol") tuple := symbol return tuple end #------------------------------------------------------------------# #------------------------------------------------------------------# # string procedure interpret_line(line,kformat,dollar) #============================================ # interpret_line() called by new_concept() in concept.icn # NOTE: parse_file() indirectly calls # get_symbol() & interpret_symbol() local infd,continue local uline,rline local symbol,sline local saveKFORMAT static info,head,tail,error initial { info := "INFO: interpret_line: " error := "ERROR: interpret_line: " head := " ## interpret_line: " tail := " ##" #####init_parser() } /kformat := "ku" /dollar := "ignore" ##yystate := "interpret_line" uline := unparse(line) if *line = 0 then return line case dollar of { "replace": { rline := replace_dollar_line(uline) } default: { rline := uline } } if DEBUG == "INTERPRET_LINE" then writes_type_all(rline,info||"rline") saveKFORMAT := KFORMAT; KFORMAT := kformat parse_file(rline) KFORMAT := saveKFORMAT return rline end # SYMBOL list procedure parse_line(line) #========================= # called from ifthen() in ifthen.icn # parse single line # parse external representation of KnowledgeUnit # do NOT substitute dollar and dot variables # SYMBOL(stype,svalue) local infd,continue local token,tokenlist,symbollist local mline,rline local linetoken,tline,symbol,linesymbol local lineword local info,head,tail yystate := "parse_line" info := "INFO: parse_line("||KFORMAT||"): " head := " ## parse_line("||KFORMAT||"): " tail := " ##" infd := myin case KFORMAT of { default: { continue := "no" } "ku": { continue := "yes" } "cu": { continue := "yes" } "ho": { continue := "no" } ##"hounit":{ continue := "no" } "nrel": { continue := "no" } "nv": { continue := "no" } ##"html": { continue := "yes" } ##"htm": { continue := "yes" } ##"xml": { continue := "yes" } ##"rdf": { continue := "yes" } ##"mcf": { continue := "no" } ##"nt": { continue := "no" } } # end case KFORMAT if DEBUG==("YES"|"PARSE") then { writes_type(mybug,line,info||"line") } if DEBUG=="TIME" then write(mybug,"## ",&time,head,"trimcomment",tail) #line := trimcomment(line) # <== in prompt() if DEBUG=="TIME" then write(mybug,"## ",&time,head,"trimws",tail) line := trimws(line) if *line = 0 then fail if line[1] == "#" then fail # comment if DEBUG=="TIME" then write(mybug,"## ",&time,head,"map to lower case",tail) mline := line case KCASE of { ("LOWER"|"lower"): { mline := map(mline) } #("UPPER"|"upper"): { mline := map(mline,&lcase,&ucase) } # <<== won't work !!! } if DEBUG=="PARSE" then writes_type(mybug,mline,info||"mline") if DEBUG=="TIME" then write(mybug,"## ",&time,head,"remove HTML",tail) rline := mline case REMOVEHTML of { ("yes"|"YES"): { #####rline := remove_html(rline) # <<== NO -- SAVEHREF in map_token() if *rline=0 then fail if rline[1] == "#" then fail # comment } } if DEBUG=="PARSE" then writes_type(mybug,rline,info||"rline") if DEBUG=="TIME" then write(mybug,"## ",&time,head,"get_word",tail) # new tokenlist := [] while token := get_token(line) do put(tokenlist,token) symbollist := get_symbol(infd,tokenlist) return symbollist end #