BibtexParser.php
Go to the documentation of this file.
00001 <?php 00002 00005 00006 00017 include_once("BibtexItem.php"); 00018 00019 00029 class BibtexParser 00030 { 00031 public $items = array(); 00032 private $fileContent = ""; 00033 private $cursor = 0; // The parser's file cursor 00034 00035 function __construct($content) 00036 { 00037 $this->fileContent = $content; 00038 00039 // Lets normalize the content of the file 00040 $this->fileContent = str_replace(array ("\t", "\r", "\n"), "", $this->fileContent); 00041 00042 // Remove additional spaces. 00043 $this->fileContent = preg_replace("#\s\s+#", " ", $this->fileContent); 00044 00045 // Fix potential bibtex format issues 00046 $this->fixFormatIssues(); 00047 00048 $this->parse(); 00049 } 00050 00051 function __destruct() { } 00052 00053 private function parse() 00054 { 00055 // Iterates for all bibtex items. 00056 00057 while($this->nextItem() !== FALSE) 00058 { 00059 // Create a new bib item 00060 $item = new BibtexItem(); 00061 00062 $item->addType($this->getItemType()); 00063 $item->addID($this->getItemID()); 00064 00065 $property; 00066 00067 while(($property = $this->getItemProperty()) !== FALSE) 00068 { 00069 $item->addProperty($property[0], $property[1]); 00070 } 00071 00072 array_push($this->items, $item); 00073 } 00074 } 00075 00076 // Move the cursor to the next bib item 00077 private function nextItem() 00078 { 00079 $this->cursor = strpos($this->fileContent, "@", $this->cursor); 00080 00081 if($this->cursor !== FALSE) 00082 { 00083 $this->cursor++; 00084 return $this->cursor; 00085 } 00086 else 00087 { 00088 return FALSE; 00089 } 00090 } 00091 00092 // Get the type of the item at cursor's position 00093 private function getItemType() 00094 { 00095 $end = strpos($this->fileContent, "{", $this->cursor); 00096 00097 $type = strtolower(substr($this->fileContent, $this->cursor, ($end - $this->cursor))); 00098 00099 // Move the cursor 00100 $this->cursor = $end + 1; 00101 00102 // Lets remove all spaces and tabs 00103 return (str_replace(" ", "", $type)); 00104 } 00105 00106 // Get the ID of the item at cursor's position 00107 private function getItemID() 00108 { 00109 $end = strpos($this->fileContent, ",", $this->cursor); 00110 00111 $id = substr($this->fileContent, $this->cursor, ($end - $this->cursor)); 00112 00113 // Move the cursor 00114 $this->cursor = $end + 1; 00115 00116 // Lets remove all spaces and tabs 00117 return ($id); 00118 } 00119 00120 // Get the next Property of the item at cursor's position 00121 private function getItemProperty() 00122 { 00123 // First, check if we reached the end of the bib item. 00124 if($this->fileContent[$this->cursor] == "}") 00125 { 00126 // Move the cursor 00127 $this->cursor += 1; 00128 return (FALSE); 00129 } 00130 00131 // Then lets check if we are facing an integer value: 00132 $pattern = '/(.*)?[\s]*=(.*),/U'; 00133 00134 if(preg_match($pattern, $this->fileContent, $matches, NULL, $this->cursor)) 00135 { 00136 if(strpos($matches[0], '"') === FALSE && strpos($matches[0], '{') === FALSE) 00137 { 00138 // Move the cursor 00139 $this->cursor += strlen($matches[0]); 00140 00141 return (array (strtolower(str_replace(" ", "", $matches[1])), str_replace(" ", "", $matches[2]))); 00142 } 00143 else 00144 { 00145 // End patterns: 00146 // (1) "} => ["]{1}[\s]*[\}]{1} 00147 // (2) },} => [\}]{1}[\s]*[,]{1}[\s]*[\}]{1} 00148 // (3) }} => [\}]{1}[\s]*[\s]*[\}]{1} 00149 00150 // (["]{1}[\s]*[\}]{1}|[\}]{1}[\s]*[,]{1}[\s]*[\}]{1}){1} 00151 00152 // Next items patterns: 00153 // (1) ", => ["]{1}[\s]*[,]{1} 00154 // (2) }, => [\}]{1}[\s]*[,]{1} 00155 // (3) "} => ["]{1}[\s]*[\}]{1} 00156 00157 // (["]{1}[\s]*[,]{1}|[\}]{1}[\s]*[,]{1}|["]{1}[\s]*[\}]{1}){1} 00158 00159 // Then extract the property->value for that bib item 00160 $pattern = '/(.*)[\s]*=[\s]*["\{]{1}(.*)(["]{1}[\s]*[,]{1}|[\}]{1}[\s]*[,]{1}|["]{1}[\s]*[\}]{1}){1}/U'; 00161 00162 if(preg_match($pattern, $this->fileContent, $matches, NULL, $this->cursor)) 00163 { 00164 // Move the cursor 00165 $this->cursor += strlen($matches[0]); 00166 00167 return (array (strtolower(str_replace(" ", "", $matches[1])), $matches[2])); 00168 } 00169 else 00170 { 00171 return (FALSE); 00172 } 00173 } 00174 } 00175 } 00176 00177 private function fixFormatIssues() 00178 { 00179 // Let fix the ending of a bibtex item from "} }" to "}, }" 00180 $pattern = '/(((\}[\s]*\}[\s]*)@)|(\}[\s]*\}[\s]*$))/U'; 00181 00182 if(preg_match_all($pattern, $this->fileContent, $matches)) 00183 { 00184 $replaces; 00185 00186 foreach($matches[0] as $match) 00187 { 00188 $replaces[$match] = $match; 00189 } 00190 00191 foreach($replaces as $replace) 00192 { 00193 $replaceWith = str_replace(" ", "", $replace); 00194 $replaceWith = str_replace("}}", "},}", $replaceWith); 00195 00196 $this->fileContent = str_replace($replace, $replaceWith, $this->fileContent); 00197 } 00198 } 00199 } 00200 } 00201 00202 00204 00205 ?>
