00001 <?php
00002
00030 class CommonParser
00031 {
00084 private $csvRecords = array();
00085
00169 private $commonRecords = array();
00170
00268 private $commonLinkageSchema = array();
00269
00271 private $errors = array();
00272
00283 function __construct($content)
00284 {
00285 $this->content = $content;
00286
00287
00288 $this->csvParser();
00289
00290
00291 $this->commonParser();
00292 }
00293
00306 public function getCsvRecords()
00307 {
00308 return($this->csvRecords);
00309 }
00310
00311
00324 public function getCommonRecords()
00325 {
00326 return($this->commonRecords);
00327 }
00328
00341 public function getLinkageSchema()
00342 {
00343 return($this->commonLinkageSchema);
00344 }
00345
00356 public function getErrors()
00357 {
00358 if(count($this->errors) == 0)
00359 {
00360 return(FALSE);
00361 }
00362
00363 return($this->errors);
00364 }
00365
00376 private function csvParser()
00377 {
00378
00379 $startRecord = 0;
00380
00381
00382 $endRecord = 0;
00383
00384
00385 $start = TRUE;
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419 $record = array();
00420
00421
00422 $inDoubleQuotes = FALSE;
00423
00424
00425 $this->content = preg_replace("/[\r\n]+/", "\r", $this->content);
00426
00427 for($i = 0; $i < strlen($this->content); $i++)
00428 {
00429 if($inDoubleQuotes)
00430 {
00431
00432 if($this->content[$i] == '"')
00433 {
00434
00435 if($this->content[$i+1] != '"')
00436 {
00437 $inDoubleQuotes = FALSE;
00438
00439
00440 if(($this->content[$i+1] != "," && ($this->content[$i+1] == " " && $this->content[$i+2] != ",")) &&
00441 ($this->content[$i+1] != "\r" && ($this->content[$i+1] == " " && $this->content[$i+2] != "\r")))
00442 {
00443 array_push($this->errors, "CSV parser: A comma or a return carrier is expected after an un-escaped double quotes.");
00444 return;
00445 }
00446 }
00447 else
00448 {
00449
00450 $i++;
00451 }
00452 }
00453 }
00454 elseif($start && substr($this->content, 0, 1) == '"')
00455 {
00456
00457 $inDoubleQuotes = TRUE;
00458
00459 $startRecord++;
00460 $start = FALSE;
00461 }
00462 else
00463 {
00464
00465 if(($this->content[$i] == "\n") || ($this->content[$i] == "\r") || ($this->content[$i] == "\r" && $this->content[$i+1] == "\n"))
00466 {
00467 if($this->content[$i-1] == '"')
00468 {
00469 $endRecord = $i-1;
00470 }
00471 else
00472 {
00473 $endRecord = $i;
00474 }
00475
00476 array_push($record, str_replace('""', '"', substr($this->content, $startRecord, ($endRecord - $startRecord))));
00477
00478 $startRecord = $i+1;
00479
00480
00481
00482 array_push($this->csvRecords, $record);
00483 $record = array();
00484
00485 if($this->content[$i] == "\r" && $this->content[$i+1] == "\n")
00486 {
00487 $i++;
00488 }
00489 }
00490 elseif($this->content[$i] == ",")
00491 {
00492 if($this->content[$i-1] == '"')
00493 {
00494 $endRecord = $i-1;
00495 }
00496 else
00497 {
00498 $endRecord = $i;
00499 }
00500
00501 array_push($record, str_replace('""', '"', substr($this->content, $startRecord, ($endRecord - $startRecord))));
00502
00503 $startRecord = $i+1;
00504 }
00505 elseif($this->content[$i] == '"')
00506 {
00507
00508 if($this->content[$i-1] == " ")
00509 {
00510 if($this->content[$i-2] == ",")
00511 {
00512 $inDoubleQuotes = TRUE;
00513 $startRecord = $i+1;
00514 }
00515 else
00516 {
00517 array_push($this->errors, "CSV parser: An un-escaped double quote has been detected.");
00518 return;
00519
00520 }
00521 }
00522 else
00523 {
00524 if($this->content[$i-1] == "," || $this->content[$i-1] == "\r")
00525 {
00526 $inDoubleQuotes = TRUE;
00527 $startRecord = $i+1;
00528 }
00529 else
00530 {
00531 array_push($this->errors, "CSV parser: An un-escaped double quote has been detected (around: '... ".str_replace(array("\n", "\r"), " ", substr($this->content, $i-5, 10))." ... (char #$i)').");
00532 return;
00533 }
00534 }
00535 }
00536 }
00537 }
00538 }
00539
00550 private function commonParser()
00551 {
00552
00553 $currentSection = "";
00554
00555
00556 $currentRecord = "";
00557
00558
00559 $commonRecord = array();
00560
00561
00562 $recordStructure = array();
00563
00564 $shouldBeRecordDescription = FALSE;
00565
00566 foreach($this->csvRecords as $record)
00567 {
00568
00569 $blank = TRUE;
00570
00571 foreach($record as $value)
00572 {
00573 if($value != "")
00574 {
00575 $blank = FALSE;
00576 break;
00577 }
00578 }
00579
00580
00581 if($blank)
00582 {
00583 continue;
00584 }
00585
00586
00587 if($record[0][0] == "&" && $record[0][1] == "&")
00588 {
00589 switch($record[0])
00590 {
00591 case "&&recordList":
00592 $currentSection = "record";
00593 $shouldBeRecordDescription = TRUE;
00594 break;
00595 case "&&dataset":
00596 $currentSection = "dataset";
00597 break;
00598 case "&&linkage":
00599 $currentSection = "linkage";
00600 $shouldBeRecordDescription = TRUE;
00601 break;
00602
00603 default:
00604 return("Unknown section $record[0]");
00605 break;
00606 }
00607 }
00608 else
00609 {
00610 if($shouldBeRecordDescription === FALSE && $currentSection == "linkage" && $record[0][0] == "&")
00611 {
00612
00613 $shouldBeRecordDescription = TRUE;
00614 }
00615
00616 if($shouldBeRecordDescription)
00617 {
00618 $recordStructure = array();
00619
00620
00621 foreach($record as $property)
00622 {
00623 if($property != "")
00624 {
00625 if($property[0] == "&")
00626 {
00627 array_push($recordStructure, $property);
00628 }
00629 else
00630 {
00631 array_push($this->errors, "commON Parser: A record structure property has been defined without starting with '&' ($property)");
00632 return;
00633 }
00634 }
00635 else
00636 {
00637
00638
00639
00640
00641
00642
00643
00644
00645
00646
00647 array_push($recordStructure, "");
00648 }
00649 }
00650
00651 if(count($recordStructure) <= 0)
00652 {
00653 array_push($this->errors, "commON Parser: No properties defined for this record structure");
00654 return;
00655 }
00656
00657 $shouldBeRecordDescription = FALSE;
00658 }
00659 else
00660 {
00661
00662 switch($currentSection)
00663 {
00664
00665 case "record":
00666 if(count($recordStructure) > count($record))
00667 {
00668
00669 for($i = 0; $i < (count($recordStructure) - count($record)); $i++)
00670 {
00671 array_push($record, "");
00672 }
00673 }
00674
00675 if(count($recordStructure) < count($record))
00676 {
00677 array_push($this->errors, "commON Parser: Too many properties defined for the record according to the record structure");
00678 return;
00679 }
00680
00681 foreach($recordStructure as $key => $rs)
00682 {
00683
00684 if($rs == "")
00685 {
00686 continue;
00687 }
00688
00689 if($rs == "&id")
00690 {
00691 if($currentRecord != $record[$key] && $record[$key] != "")
00692 {
00693 if($currentRecord == "")
00694 {
00695
00696 $currentRecord = $record[$key];
00697
00698
00699 $commonRecord[$rs] = array(array("value" => $record[$key], "reify" => ""));
00700 }
00701 else
00702 {
00703
00704 $currentRecord = $record[$key];
00705
00706
00707 array_push($this->commonRecords, $commonRecord);
00708
00709
00710 $commonRecord = array();
00711
00712
00713 $commonRecord[$rs] = array(array("value" => $record[$key], "reify" => ""));
00714 }
00715 }
00716 }
00717
00718 elseif(($reifiedAttribute = $this->getReifiedAttribute($rs)) !== FALSE)
00719 {
00720 if(isset($commonRecord[$reifiedAttribute["attribute"]]))
00721 {
00722 if(strpos($record[$key], "|") === FALSE)
00723 {
00724 $reificationStatementId = count($commonRecord[$reifiedAttribute["attribute"]]) - 1;
00725
00726 if($record[$key] != "")
00727 {
00728 if(is_array($commonRecord[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"]))
00729 {
00730 array_push( $commonRecord[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"][$reifiedAttribute["reifiedAttribute"]],
00731 $record[$key]);
00732 }
00733 else
00734 {
00735 $commonRecord[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"][$reifiedAttribute["reifiedAttribute"]] = array($record[$key]);
00736 }
00737 }
00738 }
00739 else
00740 {
00741 if(!is_array($commonRecord[$reifiedAttribute["attribute"]]["reify"]))
00742 {
00743 $commonRecord[$reifiedAttribute["attribute"]]["reify"] = array();
00744 }
00745
00746 $vs = explode("|", $record[$key]);
00747
00748 foreach($vs as $v)
00749 {
00750 array_push($commonRecord[$reifiedAttribute["attribute"]]["reify"], $v);
00751 }
00752 }
00753 }
00754 }
00755 else
00756 {
00757 if(strpos($record[$key], "|") === FALSE)
00758 {
00759 if($record[$key] != "")
00760 {
00761 if(is_array($commonRecord[$rs]))
00762 {
00763 array_push($commonRecord[$rs], array("value" => $record[$key], "reify" => ""));
00764 }
00765 else
00766 {
00767 $commonRecord[$rs] = array(array("value" => $record[$key], "reify" => ""));
00768 }
00769 }
00770 }
00771 else
00772 {
00773 if(!is_array($commonRecord[$rs]))
00774 {
00775 $commonRecord[$rs] = array();
00776 }
00777
00778 $vs = explode("|", $record[$key]);
00779
00780 foreach($vs as $v)
00781 {
00782 array_push($commonRecord[$rs], array("value" => $v, "reify" => ""));
00783 }
00784 }
00785 }
00786 }
00787 break;
00788
00789
00790 case "linkage":
00791 if(array_search("&attributeList", $recordStructure) !== FALSE)
00792 {
00793
00794 if(count($recordStructure) > count($record))
00795 {
00796
00797 for($i = 0; $i < (count($recordStructure) - count($record)); $i++)
00798 {
00799 array_push($record, "");
00800 }
00801 }
00802
00803 if(count($recordStructure) < count($record))
00804 {
00805 array_push($this->errors, "commON Parser: Too many properties defined for the record according to the linkage schema record structure");
00806 return;
00807 }
00808
00809 if(!is_array($this->commonLinkageSchema["properties"]))
00810 {
00811 $this->commonLinkageSchema["properties"] = array();
00812 }
00813
00814 $propertiesRecord = array();
00815 foreach($recordStructure as $key => $rs)
00816 {
00817 if(strpos($record[$key], "|") === FALSE)
00818 {
00819 $propertiesRecord[$rs] = array($record[$key]);
00820 }
00821 else
00822 {
00823 $propertiesRecord[$rs] = explode("|", $record[$key]);
00824 }
00825 }
00826
00827 array_push($this->commonLinkageSchema["properties"], $propertiesRecord);
00828 }
00829 elseif(array_search("&typeList", $recordStructure) !== FALSE)
00830 {
00831
00832 if(count($recordStructure) > count($record))
00833 {
00834
00835 for($i = 0; $i < (count($recordStructure) - count($record)); $i++)
00836 {
00837 array_push($record, "");
00838 }
00839 }
00840
00841 if(count($recordStructure) < count($record))
00842 {
00843 array_push($this->errors, "commON Parser: Too many properties defined for the record according to the linkage schema record structure");
00844 return;
00845 }
00846
00847 if(!is_array($this->commonLinkageSchema["types"]))
00848 {
00849 $this->commonLinkageSchema["types"] = array();
00850 }
00851
00852 $typesRecord = array();
00853 foreach($recordStructure as $key => $rs)
00854 {
00855 if(strpos($record[$key], "|") === FALSE)
00856 {
00857 $typesRecord[$rs] = array($record[$key]);
00858 }
00859 else
00860 {
00861 $typesRecord[$rs] = explode("|", $record[$key]);
00862 }
00863 }
00864
00865 array_push($this->commonLinkageSchema["types"], $typesRecord);
00866 }
00867 else
00868 {
00869
00870 if(count($recordStructure) > count($record))
00871 {
00872
00873 for($i = 0; $i < (count($recordStructure) - count($record)); $i++)
00874 {
00875 array_push($record, "");
00876 }
00877 }
00878
00879 if(count($recordStructure) < count($record))
00880 {
00881 array_push($this->errors, "commON Parser: Too many properties defined for the record according to the linkage schema record structure");
00882 return;
00883 }
00884
00885 if(!is_array($this->commonLinkageSchema["description"]))
00886 {
00887 $this->commonLinkageSchema["description"] = array();
00888 }
00889
00890 foreach($recordStructure as $key => $rs)
00891 {
00892 if(strpos($rs, "|") === FALSE)
00893 {
00894 $this->commonLinkageSchema["description"][$rs] = array($record[$key]);
00895 }
00896 else
00897 {
00898 $this->commonLinkageSchema["description"][$rs] = explode("|", $record[$key]);
00899
00900 }
00901 }
00902
00903 $shouldBeRecordDescription = TRUE;
00904 }
00905 break;
00906 }
00907 }
00908 }
00909 }
00910
00911 array_push($this->commonRecords, $commonRecord);
00912 }
00913
00924 private function getReifiedAttribute($attribute)
00925 {
00926
00927 if(($pos = strpos($attribute, "&", 1)) !== FALSE)
00928 {
00929 return(array("attribute" => substr($attribute, 0, $pos), "reifiedAttribute" => substr($attribute, $pos, strlen($attribute) - $pos)));
00930 }
00931
00932 return(FALSE);
00933 }
00934
00949 public function getRdfN3($baseInstance, $baseOntology)
00950 {
00951
00952 $n3 = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n\n";
00953
00954
00955 $n3ReificationStatements = "";
00956
00957
00958 foreach($this->commonRecords as $record)
00959 {
00960
00961 $type = array();
00962 if(count($record["&type"]) == 0)
00963 {
00964 array_push($type, "http://www.w3.org/2002/07/owl#Thing");
00965 }
00966 elseif(count($record["&type"]) >= 1)
00967 {
00968 foreach($record["&type"] as $rt)
00969 {
00970
00971 $t = $this->getLinkedType($rt["value"]);
00972
00973 if($t == "")
00974 {
00975
00976 array_push($type, "http://www.w3.org/2002/07/owl#Thing");
00977 }
00978 else
00979 {
00980
00981 array_push($type, $t);
00982 }
00983 }
00984 }
00985
00986
00987 $recordId = $baseInstance.$record["&id"][0]["value"];
00988
00989
00990 if(count($type) == 1)
00991 {
00992 $n3 .= "\n<".$recordId."> a <".$type[0]."> .\n";
00993 }
00994 else
00995 {
00996 $n3 .= "\n";
00997
00998 foreach($type as $key => $t)
00999 {
01000 $n3 .= "<".$recordId."> a <$t> .\n";
01001 }
01002 }
01003
01004
01005 foreach($record as $property => $values)
01006 {
01007
01008 if($property != "&id" && $property != "&type")
01009 {
01010 foreach($values as $value)
01011 {
01012 if($value != "")
01013 {
01014
01015 $p = $this->getLinkedProperty($property);
01016
01017 if($p == "")
01018 {
01019
01020
01021 $p = $baseOntology.substr($property, 1, strlen($property) - 1);
01022
01023
01024 $n3 .= "<".$recordId."> <".$p."> \"\"\"".$this->escapeN3($value["value"])."\"\"\" .\n";
01025 }
01026 else
01027 {
01028
01029 $n3 .= "<".$recordId."> <".$p."> \"\"\"".$this->escapeN3($value["value"])."\"\"\" .\n";
01030 }
01031
01032
01033 if(is_array($value["reify"]))
01034 {
01035 foreach($value["reify"] as $reifiedAttribute => $reiValues)
01036 {
01037 $rp = $this->getLinkedProperty($reifiedAttribute);
01038
01039
01040
01041
01042
01043 if($rp == "")
01044 {
01045 $reiProperty = $baseOntology.substr($reifiedAttribute, 1, strlen($reifiedAttribute) - 1);
01046
01047
01048 foreach($reiValues as $reiValue)
01049 {
01050 $n3ReificationStatements .= "_:".md5($recordId.$p.$value["value"])." a rdf:Statement ;\n";
01051
01052 $n3ReificationStatements .= " rdf:subject <".$recordId."> ;\n";
01053 $n3ReificationStatements .= " rdf:predicate <".$p."> ;\n";
01054 $n3ReificationStatements .= " rdf:object \"\"\"".$this->escapeN3($value["value"])."\"\"\" ;\n";
01055 $n3ReificationStatements .= " <".$reiProperty."> \"\"\"".$this->escapeN3($reiValue)."\"\"\" .\n\n";
01056 }
01057 }
01058 else
01059 {
01060
01061 foreach($reiValues as $reiValue)
01062 {
01063 $n3ReificationStatements .= "_:".md5($recordId.$p.$value["value"])." a rdf:Statement ;\n";
01064
01065 $n3ReificationStatements .= " rdf:subject <".$recordId."> ;\n";
01066 $n3ReificationStatements .= " rdf:predicate <".$p."> ;\n";
01067 $n3ReificationStatements .= " rdf:object \"\"\"".$this->escapeN3($value["value"])."\"\"\" ;\n";
01068 $n3ReificationStatements .= " <".$rp."> \"\"\"".$this->escapeN3($reiValue)."\"\"\" .\n\n";
01069 }
01070 }
01071 }
01072 }
01073 }
01074 }
01075 }
01076 }
01077 }
01078
01079 return($n3.$n3ReificationStatements);
01080 }
01081
01094 private function getLinkedProperty($targetAttribute)
01095 {
01096
01097 if(substr($targetAttribute, 0, 1) == "&")
01098 {
01099 $targetAttribute = substr($targetAttribute, 1, strlen($targetAttribute) - 1);
01100 }
01101
01102 if(is_array($this->commonLinkageSchema["properties"]))
01103 {
01104 foreach($this->commonLinkageSchema["properties"] as $property)
01105 {
01106 if($property["&attributeList"][0] == $targetAttribute)
01107 {
01108 return($property["&mapTo"][0]);
01109 }
01110 }
01111 }
01112
01113
01114 return("");
01115 }
01116
01129 private function getLinkedType($targetType)
01130 {
01131
01132 if(substr($targetType, 0, 1) == "&")
01133 {
01134 $targetType = substr($targetType, 1, strlen($targetType) - 1);
01135 }
01136
01137 if(is_array($this->commonLinkageSchema["types"]))
01138 {
01139 foreach($this->commonLinkageSchema["types"] as $type)
01140 {
01141 if($type["&typeList"][0] == $targetType)
01142 {
01143 return($type["&mapTo"][0]);
01144 }
01145 }
01146 }
01147
01148
01149 return("");
01150 }
01151
01164 private function escapeN3($literal)
01165 {
01166 $literal = str_replace("\\","\\\\", $literal);
01167
01168 return str_replace(array('"', "'"), array('\\"', "\\'"), $literal);
01169 }
01170
01171 }
01172
01173 ?>