CommonParser.php
Go to the documentation of this file.
00001 <?php 00002 00029 class CommonParser 00030 { 00083 private $csvRecords = array(); 00084 00168 private $commonRecords = array(); 00169 00267 private $commonLinkageSchema = array(); 00268 00270 private $commonDataset = array(); 00271 00273 private $csvErrors = array(); 00274 00276 private $commonErrors = array(); 00277 00288 function __construct($content) 00289 { 00290 $this->content = $content; 00291 00292 // Parse the CSV file 00293 $this->csvParser(); 00294 00295 // Parse the commON records 00296 $this->commonParser(); 00297 } 00298 00311 public function getCsvRecords() { return ($this->csvRecords); } 00312 00313 00326 public function getCommonRecords() { return ($this->commonRecords); } 00327 00340 public function getLinkageSchema() { return ($this->commonLinkageSchema); } 00341 00352 public function getDataset() { return ($this->dataset); } 00353 00364 public function getCsvErrors() 00365 { 00366 if(count($this->csvErrors) == 0) 00367 { 00368 return (FALSE); 00369 } 00370 00371 return ($this->csvErrors); 00372 } 00373 00384 public function getCommonErrors() 00385 { 00386 if(count($this->commonErrors) == 0) 00387 { 00388 return (FALSE); 00389 } 00390 00391 return ($this->commonErrors); 00392 } 00393 00404 private function csvParser() 00405 { 00406 /* Index pointing to the begenning of a record in the CSV file string */ 00407 $startRecord = 0; 00408 00409 /* Index pointing to the end of a record in the CSV file string */ 00410 $endRecord = 0; 00411 00412 /* Tells the parser if we started to parse the CSV file */ 00413 $start = TRUE; 00414 00415 /* A single record that get extracted from the CSV file */ 00416 00417 /* 00418 The structure of this record looks like: 00419 00420 Array 00421 ( 00422 [0] => info:lib:am:2009-02-18:maria_francisca_abad_garcia 00423 [1] => Person 00424 [2] => Maria Francisca Abad-Garcia 00425 [3] => 00426 [4] => http://authorclaim.org/profile/pab1/ 00427 [5] => Acceso Abierto y revistas médicas españolas 00428 [6] => http://eprints.rclis.org/11490/1/open_acces_Medicina_Cl%C3%ADnica_2006_versi%C3%B3n_aceptada_del_autor.pdf 00429 ) 00430 00431 Each keys refer to columns description such as: 00432 00433 Array 00434 ( 00435 [0] => &id 00436 [1] => &type 00437 [2] => &prefLabel 00438 [3] => &homepage 00439 [4] => &authorClaimsPage 00440 [5] => &isAuthorOfTitle 00441 [6] => &isAuthorOfTitle&prefURL 00442 ) 00443 00444 Key/value pairs can be recreated by binding the keys, such as: "&type -> Person" 00445 00446 */ 00447 $record = array(); 00448 00449 /* Check if a string is in double quotes (necessary for proper escaping) */ 00450 $inDoubleQuotes = FALSE; 00451 00452 // Remove all extra carrier return. We normalize with "\r" 00453 $this->content = preg_replace("/[\r\n]+/", "\r", $this->content); 00454 00455 for($i = 0; $i < strlen($this->content); $i++) 00456 { 00457 if($inDoubleQuotes) 00458 { 00459 // If we are in double quotes, we get everything until we read the other double quotes. 00460 if($this->content[$i] == '"') 00461 { 00462 // check if the next char is another double quote, if it is, we ignore it 00463 if($this->content[$i + 1] != '"') 00464 { 00465 $inDoubleQuotes = FALSE; 00466 00467 // Check if the next character is a comma, or a return charrier. If it is not, we got an error 00468 if(($this->content[$i + 1] != "," && ($this->content[$i + 1] == " " && $this->content[$i + 2] != ",")) 00469 && ($this->content[$i + 1] != "\r" && ($this->content[$i + 1] == " " && $this->content[$i + 2] != "\r"))) 00470 { 00471 array_push($this->csvErrors, 00472 "CSV parser (001): A comma or a return carrier is expected after an un-escaped double quotes."); 00473 return; 00474 } 00475 } 00476 else 00477 { 00478 // We move the pointer to skip the next double quote 00479 $i++; 00480 } 00481 } 00482 } 00483 elseif($start && substr($this->content, 0, 1) == '"') 00484 { 00485 // First thing we have to check is if we start with double quotes 00486 $inDoubleQuotes = TRUE; 00487 00488 $startRecord++; 00489 $start = FALSE; 00490 } 00491 else 00492 { 00493 // If we are not in double quotes, we get everything until we reach a comma or a line break. 00494 if(($this->content[$i] == "\n") || ($this->content[$i] == "\r") 00495 || ($this->content[$i] == "\r" && $this->content[$i + 1] == "\n")) 00496 { 00497 if($this->content[$i - 1] == '"') 00498 { 00499 $endRecord = $i - 1; 00500 } 00501 else 00502 { 00503 $endRecord = $i; 00504 } 00505 00506 array_push($record, 00507 str_replace('""', '"', substr($this->content, $startRecord, ($endRecord - $startRecord)))); 00508 00509 $startRecord = $i + 1; 00510 00511 00512 // Add this new record to the records list 00513 array_push($this->csvRecords, $record); 00514 $record = array(); 00515 00516 if($this->content[$i] == "\r" && $this->content[$i + 1] == "\n") 00517 { 00518 $i++; 00519 } 00520 } 00521 elseif($this->content[$i] == ",") 00522 { 00523 if($this->content[$i - 1] == '"') 00524 { 00525 $endRecord = $i - 1; 00526 } 00527 else 00528 { 00529 $endRecord = $i; 00530 } 00531 00532 array_push($record, 00533 str_replace('""', '"', substr($this->content, $startRecord, ($endRecord - $startRecord)))); 00534 00535 $startRecord = $i + 1; 00536 } 00537 elseif($this->content[$i] == '"') 00538 { 00539 if($this->content[$i - 1] == " ") 00540 { 00541 if($this->content[$i - 2] == ",") 00542 { 00543 $inDoubleQuotes = TRUE; 00544 $startRecord = $i + 1; 00545 } 00546 else 00547 { 00548 array_push($this->csvErrors, "CSV parser (002): An un-escaped double quote has been detected."); 00549 return; 00550 } 00551 } 00552 else 00553 { 00554 if($this->content[$i - 1] == "," || $this->content[$i - 1] == "\r") 00555 { 00556 $inDoubleQuotes = TRUE; 00557 $startRecord = $i + 1; 00558 } 00559 else 00560 { 00561 array_push($this->csvErrors, "CSV parser (003): An un-escaped double quote has been detected (around: '... " 00562 . str_replace(array ("\n", "\r"), " ", substr($this->content, $i - 5, 10)) . " ... (char #$i)')."); 00563 return; 00564 } 00565 } 00566 } 00567 } 00568 } 00569 } 00570 00581 private function commonParser() 00582 { 00583 /* Check what is the current section being processed: (1) record, (2) dataset or (3) linkage */ 00584 $currentSection = ""; 00585 00586 /* Reference on the current record being processed */ 00587 $currentRecord = ""; 00588 00589 /* A commON record description */ 00590 $commonRecord = array(); 00591 00592 /* The record structure where to match commON record descriptions to their values */ 00593 $recordStructure = array(); 00594 00595 $shouldBeRecordDescription = FALSE; 00596 00597 foreach($this->csvRecords as $record) 00598 { 00599 // Check for blank lines. 00600 $blank = TRUE; 00601 00602 foreach($record as $value) 00603 { 00604 if($value != "") 00605 { 00606 $blank = FALSE; 00607 break; 00608 } 00609 } 00610 00611 // If we have a blank line, with skip it and continue 00612 if($blank) 00613 { 00614 continue; 00615 } 00616 00617 // Change the section pointer. 00618 if($record[0][0] == "&" && $record[0][1] == "&") 00619 { 00620 switch($record[0]) 00621 { 00622 case "&&recordList": 00623 $currentSection = "record"; 00624 $shouldBeRecordDescription = TRUE; 00625 break; 00626 00627 case "&&dataset": 00628 $currentSection = "dataset"; 00629 $shouldBeRecordDescription = TRUE; 00630 break; 00631 00632 case "&&linkage": 00633 $currentSection = "linkage"; 00634 $shouldBeRecordDescription = TRUE; 00635 break; 00636 00637 default: 00638 return ("Unknown section $record[0]"); 00639 break; 00640 } 00641 } 00642 else 00643 { 00644 if($shouldBeRecordDescription === FALSE && $currentSection == "linkage" && $record[0][0] == "&") 00645 { 00646 // We are expecting the description of a commonRecord 00647 $shouldBeRecordDescription = TRUE; 00648 } 00649 00650 if($shouldBeRecordDescription) 00651 { 00652 $recordStructure = array(); 00653 00654 // Lets define the record structure for the next records to parse 00655 foreach($record as $property) 00656 { 00657 if($property != "") 00658 { 00659 if($property[0] == "&") 00660 { 00661 array_push($recordStructure, $property); 00662 } 00663 else 00664 { 00665 array_push($this->commonErrors, 00666 "commON Parser (001): A record structure property has been defined without starting with '&' ($property)"); 00667 return; 00668 } 00669 } 00670 else 00671 { 00672 // If an empty column is defined for the structure section of a record, we 00673 // add it in the $recordStructure array. We then consider that the data publisher 00674 // needed it for its own spreadsheet layout. 00675 00676 // This ensure that if "padding" empty-column is added to all records of the file, that it doesn't 00677 // raise the "Too many properties defined for the record according to the linkage schema record structure" 00678 // commON parsing error 00679 00680 // Additionally this ensure a compatibility with some spreadsheet software such as Excel. 00681 00682 array_push($recordStructure, ""); 00683 } 00684 } 00685 00686 if(count($recordStructure) <= 0) 00687 { 00688 array_push($this->commonErrors, "commON Parser (002): No properties defined for this record structure"); 00689 return; 00690 } 00691 00692 $shouldBeRecordDescription = FALSE; 00693 } 00694 else 00695 { 00696 // Depending on the processing section, we populate different parsing structures 00697 switch($currentSection) 00698 { 00699 // We are parsing the dataset description 00700 case "dataset": 00701 if(count($recordStructure) > count($record)) 00702 { 00703 // Pad the record with empty properties values 00704 for($i = 0; $i < (count($recordStructure) - count($record)); $i++) 00705 { 00706 array_push($record, ""); 00707 } 00708 } 00709 00710 if(count($recordStructure) < count($record)) 00711 { 00712 array_push($this->commonErrors, 00713 "commON Parser (003): Too many properties defined for the record according to the record structure. 00714 Please make sure that you don't have empty cells in ending columns for your records, that are 00715 not defined in the attribute definition line."); 00716 return; 00717 } 00718 00719 foreach($recordStructure as $key => $rs) 00720 { 00721 // We simply skip empty recordStructure columns. 00722 if($rs == "") 00723 { 00724 continue; 00725 } 00726 00727 if($rs == "&id") 00728 { 00729 // Set the ID 00730 $this->dataset["&id"] = array(array ("value" => $record[$key], "reify" => "") ); 00731 } 00732 00733 // Check if it is a reification attribute 00734 elseif(($reifiedAttribute = $this->getReifiedAttribute($rs)) !== FALSE) 00735 { 00736 if(isset($this->dataset[$reifiedAttribute["attribute"]])) 00737 { 00738 if(strpos($record[$key], "|") === FALSE) 00739 { 00740 $reificationStatementId = count($this->dataset[$reifiedAttribute["attribute"]]) - 1; 00741 00742 if($record[$key] != "") 00743 { 00744 if(is_array($this->dataset[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"])) 00745 { 00746 array_push($this->dataset[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"][ 00747 $reifiedAttribute["reifiedAttribute"]], $record[$key]); 00748 } 00749 else 00750 { 00751 $this->dataset[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"][ 00752 $reifiedAttribute["reifiedAttribute"]] = array( $record[$key] ); 00753 } 00754 } 00755 } 00756 else 00757 { 00758 if(!is_array($this->dataset[$reifiedAttribute["attribute"]]["reify"])) 00759 { 00760 $this->dataset[$reifiedAttribute["attribute"]]["reify"] = array(); 00761 } 00762 00763 $vs = explode("|", $record[$key]); 00764 00765 foreach($vs as $v) 00766 { 00767 array_push($this->dataset[$reifiedAttribute["attribute"]]["reify"], $v); 00768 } 00769 } 00770 } 00771 } 00772 else 00773 { 00774 if(strpos($record[$key], "|") === FALSE) 00775 { 00776 if($record[$key] != "") 00777 { 00778 if(is_array($this->dataset[$rs])) 00779 { 00780 array_push($this->dataset[$rs], array ("value" => $record[$key], "reify" => "")); 00781 } 00782 else 00783 { 00784 $this->dataset[$rs] = array( array ("value" => $record[$key], "reify" => "") ); 00785 } 00786 } 00787 } 00788 else 00789 { 00790 if(!is_array($this->dataset[$rs])) 00791 { 00792 $this->dataset[$rs] = array(); 00793 } 00794 00795 $vs = explode("|", $record[$key]); 00796 00797 foreach($vs as $v) 00798 { 00799 array_push($this->dataset[$rs], array ("value" => $v, "reify" => "")); 00800 } 00801 } 00802 } 00803 } 00804 break; 00805 00806 // We are parsing a record. 00807 case "record": 00808 if(count($recordStructure) > count($record)) 00809 { 00810 // Pad the record with empty properties values 00811 for($i = 0; $i < (count($recordStructure) - count($record)); $i++) 00812 { 00813 array_push($record, ""); 00814 } 00815 } 00816 00817 if(count($recordStructure) < count($record)) 00818 { 00819 array_push($this->commonErrors, 00820 "commON Parser (004): Too many properties defined for the record according to the record structure. 00821 Please make sure that you don't have empty cells in ending columns for your records, that are 00822 not defined in the attribute definition line."); 00823 return; 00824 } 00825 00826 foreach($recordStructure as $key => $rs) 00827 { 00828 // We simply skip empty recordStructure columns. 00829 if($rs == "") 00830 { 00831 continue; 00832 } 00833 00834 if($rs == "&id") 00835 { 00836 if($currentRecord != $record[$key] && $record[$key] != "") 00837 { 00838 if($currentRecord == "") 00839 { 00840 // Change the reference 00841 $currentRecord = $record[$key]; 00842 00843 // Set th ID 00844 $commonRecord[$rs] = array( array ("value" => $record[$key], "reify" => "") ); 00845 } 00846 else 00847 { 00848 // Change the reference 00849 $currentRecord = $record[$key]; 00850 00851 // Archive the record before processing the next one 00852 array_push($this->commonRecords, $commonRecord); 00853 00854 // Reinitialize the commRecord structure 00855 $commonRecord = array(); 00856 00857 // Set th ID 00858 $commonRecord[$rs] = array( array ("value" => $record[$key], "reify" => "") ); 00859 } 00860 } 00861 } 00862 // Check if it is a reification attribute 00863 elseif(($reifiedAttribute = $this->getReifiedAttribute($rs)) !== FALSE) 00864 { 00865 if(isset($commonRecord[$reifiedAttribute["attribute"]])) 00866 { 00867 if(strpos($record[$key], "|") === FALSE) 00868 { 00869 $reificationStatementId = count($commonRecord[$reifiedAttribute["attribute"]]) - 1; 00870 00871 if($record[$key] != "") 00872 { 00873 if(is_array($commonRecord[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"])) 00874 { 00875 array_push($commonRecord[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"][ 00876 $reifiedAttribute["reifiedAttribute"]], $record[$key]); 00877 } 00878 else 00879 { 00880 $commonRecord[$reifiedAttribute["attribute"]][$reificationStatementId]["reify"][ 00881 $reifiedAttribute["reifiedAttribute"]] = array( $record[$key] ); 00882 } 00883 } 00884 } 00885 else 00886 { 00887 if(!is_array($commonRecord[$reifiedAttribute["attribute"]]["reify"])) 00888 { 00889 $commonRecord[$reifiedAttribute["attribute"]]["reify"] = array(); 00890 } 00891 00892 $vs = explode("|", $record[$key]); 00893 00894 foreach($vs as $v) 00895 { 00896 array_push($commonRecord[$reifiedAttribute["attribute"]]["reify"], $v); 00897 } 00898 } 00899 } 00900 } 00901 else 00902 { 00903 if(strpos($record[$key], "|") === FALSE) 00904 { 00905 if($record[$key] != "") 00906 { 00907 if(is_array($commonRecord[$rs])) 00908 { 00909 array_push($commonRecord[$rs], array ("value" => $record[$key], "reify" => "")); 00910 } 00911 else 00912 { 00913 $commonRecord[$rs] = array( array ("value" => $record[$key], "reify" => "") ); 00914 } 00915 } 00916 } 00917 else 00918 { 00919 if(!is_array($commonRecord[$rs])) 00920 { 00921 $commonRecord[$rs] = array(); 00922 } 00923 00924 $vs = explode("|", $record[$key]); 00925 00926 foreach($vs as $v) 00927 { 00928 array_push($commonRecord[$rs], array ("value" => $v, "reify" => "")); 00929 } 00930 } 00931 } 00932 } 00933 break; 00934 00935 // We are parsing a linkage schema 00936 case "linkage": 00937 if(array_search("&attributeList", $recordStructure) !== FALSE) 00938 { 00939 // Description of the linkage schema. 00940 if(count($recordStructure) > count($record)) 00941 { 00942 // Pad the record with empty properties values 00943 for($i = 0; $i < (count($recordStructure) - count($record)); $i++) 00944 { 00945 array_push($record, ""); 00946 } 00947 } 00948 00949 if(count($recordStructure) < count($record)) 00950 { 00951 array_push($this->commonErrors, 00952 "commON Parser (005): Too many properties defined for the record according to the linkage schema 00953 record structure. Please make sure that you don't have empty cells in ending columns for your 00954 records, that are not defined in the attribute definition line."); 00955 return; 00956 } 00957 00958 if(!is_array($this->commonLinkageSchema["properties"])) 00959 { 00960 $this->commonLinkageSchema["properties"] = array(); 00961 } 00962 00963 $propertiesRecord = array(); 00964 00965 foreach($recordStructure as $key => $rs) 00966 { 00967 if(strpos($record[$key], "|") === FALSE) 00968 { 00969 $propertiesRecord[$rs] = array( $record[$key] ); 00970 } 00971 else 00972 { 00973 $propertiesRecord[$rs] = explode("|", $record[$key]); 00974 } 00975 } 00976 00977 array_push($this->commonLinkageSchema["properties"], $propertiesRecord); 00978 } 00979 elseif(array_search("&prefixList", $recordStructure) !== FALSE) 00980 { 00981 if(!is_array($this->commonLinkageSchema["prefixes"])) 00982 { 00983 $this->commonLinkageSchema["prefixes"] = array(); 00984 } 00985 00986 $prefixesRecord = array(); 00987 00988 foreach($recordStructure as $key => $rs) 00989 { 00990 if(strpos($record[$key], "|") === FALSE) 00991 { 00992 $prefixesRecord[$rs] = array( $record[$key] ); 00993 } 00994 else 00995 { 00996 $prefixesRecord[$rs] = explode("|", $record[$key]); 00997 } 00998 } 00999 01000 array_push($this->commonLinkageSchema["prefixes"], $prefixesRecord); 01001 } 01002 elseif(array_search("&typeList", $recordStructure) !== FALSE) 01003 { 01004 // Description of the linkage schema. 01005 if(count($recordStructure) > count($record)) 01006 { 01007 // Pad the record with empty properties values 01008 for($i = 0; $i < (count($recordStructure) - count($record)); $i++) 01009 { 01010 array_push($record, ""); 01011 } 01012 } 01013 01014 if(count($recordStructure) < count($record)) 01015 { 01016 array_push($this->commonErrors, 01017 "commON Parser (006): Too many properties defined for the record according to the linkage schema 01018 record structure. Please make sure that you don't have empty cells in ending columns for your 01019 records, that are not defined in the attribute definition line."); 01020 return; 01021 } 01022 01023 if(!is_array($this->commonLinkageSchema["types"])) 01024 { 01025 $this->commonLinkageSchema["types"] = array(); 01026 } 01027 01028 $typesRecord = array(); 01029 01030 foreach($recordStructure as $key => $rs) 01031 { 01032 if(strpos($record[$key], "|") === FALSE) 01033 { 01034 $typesRecord[$rs] = array( $record[$key] ); 01035 } 01036 else 01037 { 01038 $typesRecord[$rs] = explode("|", $record[$key]); 01039 } 01040 } 01041 01042 array_push($this->commonLinkageSchema["types"], $typesRecord); 01043 } 01044 else 01045 { 01046 // Description of the linkage schema. 01047 if(count($recordStructure) > count($record)) 01048 { 01049 // Pad the record with empty properties values 01050 for($i = 0; $i < (count($recordStructure) - count($record)); $i++) 01051 { 01052 array_push($record, ""); 01053 } 01054 } 01055 01056 if(count($recordStructure) < count($record)) 01057 { 01058 array_push($this->commonErrors, 01059 "commON Parser (007): Too many properties defined for the record according to the linkage schema 01060 record structure. Please make sure that you don't have empty cells in ending columns for your 01061 records, that are not defined in the attribute definition line."); 01062 return; 01063 } 01064 01065 if(!is_array($this->commonLinkageSchema["description"])) 01066 { 01067 $this->commonLinkageSchema["description"] = array(); 01068 } 01069 01070 foreach($recordStructure as $key => $rs) 01071 { 01072 if(strpos($rs, "|") === FALSE) 01073 { 01074 $this->commonLinkageSchema["description"][$rs] = array( $record[$key] ); 01075 } 01076 else 01077 { 01078 $this->commonLinkageSchema["description"][$rs] = explode("|", $record[$key]); 01079 } 01080 } 01081 01082 $shouldBeRecordDescription = TRUE; 01083 } 01084 break; 01085 } 01086 } 01087 } 01088 } 01089 01090 array_push($this->commonRecords, $commonRecord); 01091 01092 // Fix the attributeList structure with the prefixes if any have been defined. 01093 if(count($this->commonLinkageSchema["prefixes"][0]["&prefixList"]) > 0) 01094 { 01095 // Fix types 01096 foreach($this->commonLinkageSchema["types"] as $keyType => $type) 01097 { 01098 if(substr($type["&mapTo"][0], 0, 7) != "http://") 01099 { 01100 $pos = stripos($type["&mapTo"][0], ":"); 01101 01102 if($pos !== FALSE) 01103 { 01104 $prefix = substr($type["&mapTo"][0], 0, $pos); 01105 01106 foreach($this->commonLinkageSchema["prefixes"] as $keyPrefix => $pref) 01107 { 01108 if($pref["&prefixList"][0] == $prefix) 01109 { 01110 $this->commonLinkageSchema["types"][$keyType]["&mapTo"][0] = str_replace($pref["&prefixList"][0] . ":", 01111 $this->commonLinkageSchema["prefixes"][$keyPrefix]["&mapTo"][0], 01112 $this->commonLinkageSchema["types"][$keyType]["&mapTo"][0]); 01113 } 01114 } 01115 } 01116 } 01117 } 01118 01119 // Fix attributes 01120 foreach($this->commonLinkageSchema["properties"] as $keyProperty => $property) 01121 { 01122 if(substr($property["&mapTo"][0], 0, 7) != "http://") 01123 { 01124 $pos = stripos($property["&mapTo"][0], ":"); 01125 01126 if($pos !== FALSE) 01127 { 01128 $prefix = substr($property["&mapTo"][0], 0, $pos); 01129 01130 foreach($this->commonLinkageSchema["prefixes"] as $keyPrefix => $pref) 01131 { 01132 if($pref["&prefixList"][0] == $prefix) 01133 { 01134 $this-> 01135 commonLinkageSchema["properties"][$keyProperty]["&mapTo"][0] = str_replace($pref["&prefixList"][0] 01136 . ":", $this->commonLinkageSchema["prefixes"][$keyPrefix]["&mapTo"][0], 01137 $this->commonLinkageSchema["properties"][$keyProperty]["&mapTo"][0]); 01138 } 01139 } 01140 } 01141 } 01142 } 01143 } 01144 } 01145 01156 private function getReifiedAttribute($attribute) 01157 { 01158 // Check if it is a reification attribute 01159 if(($pos = strpos($attribute, "&", 1)) !== FALSE) 01160 { 01161 return (array ("attribute" => substr($attribute, 0, $pos), 01162 "reifiedAttribute" => substr($attribute, $pos, strlen($attribute) - $pos))); 01163 } 01164 01165 return (FALSE); 01166 } 01167 01182 public function getRdfN3($baseInstance = "", $baseOntology = "") 01183 { 01184 // Serialized file content 01185 $n3 = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n\n"; 01186 01187 // Serialization of reification statements to be happened to the $n3 file content. 01188 $n3ReificationStatements = ""; 01189 01190 // Convert each record that have been converted 01191 foreach($this->commonRecords as $record) 01192 { 01193 // Map ID & type 01194 $type = array(); 01195 01196 if(count($record["&type"]) == 0) 01197 { 01198 array_push($type, "http://www.w3.org/2002/07/owl#Thing"); 01199 } 01200 elseif(count($record["&type"]) >= 1) 01201 { 01202 foreach($record["&type"] as $rt) 01203 { 01204 // check in the linkage file for the type 01205 $t = $this->getLinkedType($rt["value"]); 01206 01207 if($t == "") 01208 { 01209 // If the type doesn't exist, we simply use the generic owl:Thing type 01210 array_push($type, "http://www.w3.org/2002/07/owl#Thing"); 01211 } 01212 else 01213 { 01214 // Otherwise we use the linked type 01215 array_push($type, $t); 01216 } 01217 } 01218 } 01219 01220 // Get the ID of the record 01221 $recordId = $baseInstance . $record["&id"][0]["value"]; 01222 01223 // Serialize the type(s) used to define the record 01224 if(count($type) == 1) 01225 { 01226 $n3 .= "\n<" . $recordId . "> a <" . $type[0] . "> .\n"; 01227 } 01228 else 01229 { 01230 $n3 .= "\n"; 01231 01232 foreach($type as $key => $t) 01233 { 01234 $n3 .= "<" . $recordId . "> a <$t> .\n"; 01235 } 01236 } 01237 01238 // Map properties / values of the record 01239 foreach($record as $property => $values) 01240 { 01241 // Make sure we don't process twice the ID and the TYPE 01242 if($property != "&id" && $property != "&type") 01243 { 01244 foreach($values as $value) 01245 { 01246 if($value != "") 01247 { 01248 // Check if this attribute is part of the linkage schema 01249 $p = $this->getLinkedProperty($property); 01250 01251 if($p == "") 01252 { 01253 // If the attribute to be converted is not part of the linakge schema, then we 01254 // simply create a "on-the-fly" attribute by using the $baseOntology URI. 01255 $p = $baseOntology . substr($property, 1, strlen($property) - 1); 01256 } 01257 01258 if(substr($value["value"], 0, 1) == "@") 01259 { 01260 $n3 .= "<" . $recordId . "> <" . $p . "> <" . $baseInstance . substr($value["value"],1) . "> .\n"; 01261 } 01262 // Check if the value is an external record reference 01263 elseif(substr($value["value"], 0, 2) == "@@") 01264 { 01265 $n3 .= "<" . $recordId . "> <" . $p . "> <" . substr($value["value"],2) . "> .\n"; 01266 } 01267 else 01268 { 01269 // The value is a literal 01270 $n3 .= "<" . $recordId . "> <" . $p . "> \"\"\"" . $this->escapeN3($value["value"]) . "\"\"\" .\n"; 01271 } 01272 01273 // Check if there is some statements to reify 01274 if(is_array($value["reify"])) 01275 { 01276 foreach($value["reify"] as $reifiedAttribute => $reiValues) 01277 { 01278 $rp = $this->getLinkedProperty($reifiedAttribute); 01279 01280 // Create serialized reification statements that will be happened to the end of the record 01281 // serialized file 01282 // Reification re-use RDF's reification vocabulary: http://www.w3.org/TR/REC-rdf-syntax/#reification 01283 01284 if($rp == "") 01285 { 01286 $reiProperty = $baseOntology . substr($reifiedAttribute, 1, strlen($reifiedAttribute) - 1); 01287 01288 01289 // @TODO: Check if "@" or "@@" 01290 foreach($reiValues as $reiValue) 01291 { 01292 $n3ReificationStatements .= "_:" . md5($recordId . $p . $value["value"]) . " a rdf:Statement ;\n"; 01293 01294 $n3ReificationStatements .= " rdf:subject <" . $recordId . "> ;\n"; 01295 $n3ReificationStatements .= " rdf:predicate <" . $p . "> ;\n"; 01296 $n3ReificationStatements .= " rdf:object \"\"\"" . $this->escapeN3($value["value"]) 01297 . "\"\"\" ;\n"; 01298 $n3ReificationStatements .= " <" . $reiProperty . "> \"\"\"" . $this->escapeN3($reiValue) 01299 . "\"\"\" .\n\n"; 01300 } 01301 } 01302 else 01303 { 01304 // @TODO: Check if "@" or "@@" 01305 foreach($reiValues as $reiValue) 01306 { 01307 $n3ReificationStatements .= "_:" . md5($recordId . $p . $value["value"]) . " a rdf:Statement ;\n"; 01308 01309 $n3ReificationStatements .= " rdf:subject <" . $recordId . "> ;\n"; 01310 $n3ReificationStatements .= " rdf:predicate <" . $p . "> ;\n"; 01311 $n3ReificationStatements .= " rdf:object \"\"\"" . $this->escapeN3($value["value"]) 01312 . "\"\"\" ;\n"; 01313 $n3ReificationStatements .= " <" . $rp . "> \"\"\"" . $this->escapeN3($reiValue) 01314 . "\"\"\" .\n\n"; 01315 } 01316 } 01317 } 01318 } 01319 } 01320 } 01321 } 01322 } 01323 } 01324 01325 return ($n3 . $n3ReificationStatements); 01326 } 01327 01340 public function getLinkedProperty($targetAttribute) 01341 { 01342 // Remve the processing character if it is present at the beginning of the attr 01343 if(substr($targetAttribute, 0, 1) == "&") 01344 { 01345 $targetAttribute = substr($targetAttribute, 1, strlen($targetAttribute) - 1); 01346 } 01347 01348 if(is_array($this->commonLinkageSchema["properties"])) 01349 { 01350 foreach($this->commonLinkageSchema["properties"] as $property) 01351 { 01352 if($property["&attributeList"][0] == $targetAttribute) 01353 { 01354 return ($property["&mapTo"][0]); 01355 } 01356 } 01357 } 01358 01359 // Linked property not found, return an empty string 01360 return (""); 01361 } 01362 01375 public function getLinkedType($targetType) 01376 { 01377 // Remve the processing character if it is present at the beginning of the attr 01378 if(substr($targetType, 0, 1) == "&") 01379 { 01380 $targetType = substr($targetType, 1, strlen($targetType) - 1); 01381 } 01382 01383 if(is_array($this->commonLinkageSchema["types"])) 01384 { 01385 foreach($this->commonLinkageSchema["types"] as $type) 01386 { 01387 if($type["&typeList"][0] == $targetType) 01388 { 01389 return ($type["&mapTo"][0]); 01390 } 01391 } 01392 } 01393 01394 // Linked type not found, return an empty string 01395 return (""); 01396 } 01397 01410 private function escapeN3($literal) 01411 { 01412 $literal = str_replace("\\", "\\\\", $literal); 01413 01414 return str_replace(array ('"', "'"), array ('\\"', "\\'"), $literal); 01415 } 01416 } 01417 ?>
