DavidW
23rd of October 2006 (Mon), 11:50
Finally, I think I've nailed the problem with EE failing to read most metadata (shutter speed, aperture, time, date etc.) in files saved from Photoshop CS2.
The problem is somewhat similar to this bug (http://photography-on-the.net/forum/showthread.php?t=224121) which Pekka has already incorporated the fix for in 2.01. CS2 writes the XMP metadata in a slightly different format - instead of <tag>data</tag>, it uses tag="data" for exif:, tiff: and aux: values.
I've attached a fixed version of ee_extract_xmp_data - if Pekka's original regex fails to produce a result, it uses ereg() against regexp2 to attempt to extract the data. I've tested it on my installation and it seems to work, though I regard it as somewhat scrappy programming.
There's an argument that a new parser should be written from scratch, but I needed this working today and I haven't got much time to mess, so it was a case of patching the code that was there.
Pekka - are you aware that the original code uses preg_match() which is a PCRE function? PCRE is a PHP extension, and may not be installed - though I expect it is installed in most people's PHP. The code I've added uses ereg(), which doesn't require PCRE, but I haven't bothered to rewrite the original regexes in enhanced regex rather than Perl regex form so that I can switch the original set of tests to ereg().
The function below completely replaces the original version of function ee_extract_xmp_data() in SCRIPT_editor_functions.php - it's easier to cut and paste the whole function rather than try to list the edits as they are extensive.
function ee_extract_xmp_data ($filename,$printout=0) {
// very straightforward one-purpose utility function which
// reads image data and gets some EXIF data (what I needed) out from its XMP/XAP tags (by Adobe Photoshop/CS)
// returns an array with values
// code by Pekka Saarinen http://photography-on-the.net
ob_start();
readfile($filename);
$source = ob_get_contents();
ob_end_clean();
$xmpdata_start = strpos($source,"<x:xmpmeta");
if ($xmpdata_start === FALSE) $xmpdata_start = strpos($source,"<x:xapmeta");
$xmpdata_end = strpos($source,"</x:xmpmeta>");
if ($xmpdata_end === FALSE) $xmpdata_end = strpos($source,"</x:xapmeta>");
$xmplenght = $xmpdata_end-$xmpdata_start;
$xmpdata = substr($source,$xmpdata_start,$xmplenght+12);
$xmp_parsed = array();
$regexps = array(
array("name" => "DC creator", "regexp" => "/<dc:creator>\s*<rdf:Seq>\s*<rdf:li>.+<\/rdf:li>\s*<\/rdf:Seq>\s*<\/dc:creator>/", "regexp2" => ""), // All dc: tags don't need a different regex
array("name" => "TIFF camera model", "regexp" => "/<tiff:Model>.+<\/tiff:Model>/", "regexp2" => "tiff:Model=\"([^\"]+)\""),
array("name" => "TIFF maker", "regexp" => "/<tiff:Make>.+<\/tiff:Make>/", "regexp2" => "tiff:Make=\"([^\"]+)\""),
array("name" => "EXIF exposure time", "regexp" => "/<exif:ExposureTime>.+<\/exif:ExposureTime>/", "regexp2" => "exif:ExposureTime=\"([^\"]+)\""),
array("name" => "EXIF shutterspeed value", "regexp" => "/<exif:ShutterSpeedValue>.+<\/exif:ShutterSpeedValue>/", "regexp2" => "exif:ShutterSpeedValue=\"([^\"]+)\""),
array("name" => "EXIF f number", "regexp" => "/<exif:FNumber>.+<\/exif:FNumber>/", "regexp2" => "exif:FNumber=\"([^\"]+)\""),
array("name" => "EXIF aperture value", "regexp" => "/<exif:ApertureValue>.+<\/exif:ApertureValue>/", "regexp2" => "exif:ApertureValue=\"([^\"]+)\""),
array("name" => "EXIF exposure program", "regexp" => "/<exif:ExposureProgram>.+<\/exif:ExposureProgram>/", "regexp2" => "exif:ExposureProgram=\"([^\"]+)\""),
array("name" => "EXIF iso speed ratings", "regexp" => "/<exif:ISOSpeedRatings>\s*<rdf:Seq>\s*<rdf:li>.+<\/rdf:li>\s*<\/rdf:Seq>\s*<\/exif:ISOSpeedRatings>/", "regexp2" => ""),
array("name" => "EXIF datetime original", "regexp" => "/<exif:DateTimeOriginal>.+<\/exif:DateTimeOriginal>/", "regexp2" => "exif:DateTimeOriginal=\"([^\"]+)\""),
array("name" => "EXIF exposure bias value", "regexp" => "/<exif:ExposureBiasValue>.+<\/exif:ExposureBiasValue>/", "regexp2" => "exif:ExposureBiasValue=\"([^\"]+)\""),
array("name" => "EXIF metering mode", "regexp" => "/<exif:MeteringMode>.+<\/exif:MeteringMode>/", "regexp2" => "exif:MeteringMode=\"([^\"]+)\""),
array("name" => "EXIF focal lenght", "regexp" => "/<exif:FocalLength\>.+\<\/exif:FocalLength>/", "regexp2" => "exif:FocalLength=\"([^\"]+)\""),
array("name" => "AUX lens", "regexp" => "/<aux:Lens>.+<\/aux:Lens>/", "regexp2" => "aux:Lens=\"([^\"]+)\""),
array("name" => "DC rights", "regexp" => "/<dc:rights>\s*<rdf:Alt>\s*<rdf:li xml:lang=['\"]x\-default['\"]>.+<\/rdf:li>\s*<\/rdf:Alt>\s*<\/dc:rights>/", "regexp2" => ""),
array("name" => "DC description", "regexp" => "/<dc:description>\s*<rdf:Alt>\s*<rdf:li xml:lang=['\"]x\-default['\"]>.+<\/rdf:li>\s*<\/rdf:Alt>\s*<\/dc:description>/", "regexp2" => ""),
array("name" => "DC title", "regexp" => "/<dc:title>\s*<rdf:Alt>\s*<rdf:li xml:lang=['\"]x\-default['\"]>.+<\/rdf:li>\s*<\/rdf:Alt>\s*<\/dc:title>/", "regexp2" => ""),
array("name" => "PHOTOSHOP headline", "regexp" => "/<photoshop:Headline>.+<\/photoshop:Headline>/", "regexp2" => "photoshop:Headline=\"([^\"]+)\""),
array("name" => "PHOTOSHOP city", "regexp" => "/<photoshop:City>.+<\/photoshop:City>/", "regexp2" => "photoshop:City=\"([^\"]+)\""),
array("name" => "PHOTOSHOP state", "regexp" => "/<photoshop:State>.+<\/photoshop:State>/", "regexp2" => "photoshop:State=\"([^\"]+)\""),
array("name" => "PHOTOSHOP country", "regexp" => "/<photoshop:Country>.+<\/photoshop:Country>/", "regexp2" => "photoshop:Country=\"([^\"]+)\""),
array("name" => "PHOTOSHOP category", "regexp" => "/<photoshop:Category>.+<\/photoshop:Category>/", "regexp2" => "photoshop:Category=\"([^\"]+)\""),
array("name" => "PHOTOSHOP credit", "regexp" => "/<photoshop:Credit>.+<\/photoshop:Credit>/", "regexp2" => "photoshop:Credit=\"([^\"]+)\""),
array("name" => "PHOTOSHOP authors position", "regexp" => "/<photoshop:AuthorsPosition>.+<\/photoshop:AuthorsPosition>/", "regexp2" => "photoshop:AuthorsPosition=\"([^\"]+)\"")
);
foreach ($regexps as $key => $k) {
$name = $k["name"];
$regexp = $k["regexp"];
$regexp2 = $k["regexp2"];
$xmp_item = "";
unset($r);
if (preg_match($regexp, $xmpdata, $r)) {
$xmp_item = @$r[0];
}
else {
unset($s);
ereg($regexp2, $xmpdata, $s);
$xmp_item = @$s[1]; // [1] to retrieve bracketed expression from regex
}
if ($name == "EXIF datetime original") {
$xmp_item = str_replace("Z","",$xmp_item);
$xmp_item = str_replace("T"," ",$xmp_item);
}
if ($name == "AUX lens") {
$xmp_item = str_replace(" ","",$xmp_item);
$xmp_item = str_replace("m","",$xmp_item);
}
array_push($xmp_parsed,array("item" => $name, "value" => strip_tags($xmp_item)));
}
$xmp_supplemental_categories = read_xmp_bag ($xmpdata,"<photoshop:SupplementalCategories>", "</photoshop:SupplementalCategories>");
array_push($xmp_parsed,array("item" => "PHOTOSHOP supplemental categories", "value" => $xmp_supplemental_categories));
$xmp_keywords = read_xmp_bag ($xmpdata,"<dc:subject>", "</dc:subject>");
array_push($xmp_parsed,array("item" => "DC keywords", "value" => $xmp_keywords));
//$xmp_author = read_xmp_bag ($xmpdata,"<dc:creator>", "</dc:creator>");
//array_push($xmp_parsed,array("item" => "DC creator", "value" => @$xmp_author[0]));
if ($printout != 0) {
foreach ($xmp_parsed as $key => $k) {
$item = $k["item"];
$value = $k["value"];
if (gettype($value) != "array") {
print "<br><span style=\"color: #990000;\"><b>" . $item . ":</b></span> " . $value;
} else {
print "<br><b>" . $item . ":</b> ";
ee_print_array($value);
}
}
}
$source = "";
return ($xmp_parsed);
}
Feedback would be appreciated from anyone. I've heard a few hints of metadata reading failure with EE2 - if all your photos are being read with a 'default' time and date (possibly in 2002), this could be your issue.
Pekka - if you're happy with the code from a security point of view, can this be incorporated into 2.02? It's heaps better than keying in all the metadata by hand!
David
The problem is somewhat similar to this bug (http://photography-on-the.net/forum/showthread.php?t=224121) which Pekka has already incorporated the fix for in 2.01. CS2 writes the XMP metadata in a slightly different format - instead of <tag>data</tag>, it uses tag="data" for exif:, tiff: and aux: values.
I've attached a fixed version of ee_extract_xmp_data - if Pekka's original regex fails to produce a result, it uses ereg() against regexp2 to attempt to extract the data. I've tested it on my installation and it seems to work, though I regard it as somewhat scrappy programming.
There's an argument that a new parser should be written from scratch, but I needed this working today and I haven't got much time to mess, so it was a case of patching the code that was there.
Pekka - are you aware that the original code uses preg_match() which is a PCRE function? PCRE is a PHP extension, and may not be installed - though I expect it is installed in most people's PHP. The code I've added uses ereg(), which doesn't require PCRE, but I haven't bothered to rewrite the original regexes in enhanced regex rather than Perl regex form so that I can switch the original set of tests to ereg().
The function below completely replaces the original version of function ee_extract_xmp_data() in SCRIPT_editor_functions.php - it's easier to cut and paste the whole function rather than try to list the edits as they are extensive.
function ee_extract_xmp_data ($filename,$printout=0) {
// very straightforward one-purpose utility function which
// reads image data and gets some EXIF data (what I needed) out from its XMP/XAP tags (by Adobe Photoshop/CS)
// returns an array with values
// code by Pekka Saarinen http://photography-on-the.net
ob_start();
readfile($filename);
$source = ob_get_contents();
ob_end_clean();
$xmpdata_start = strpos($source,"<x:xmpmeta");
if ($xmpdata_start === FALSE) $xmpdata_start = strpos($source,"<x:xapmeta");
$xmpdata_end = strpos($source,"</x:xmpmeta>");
if ($xmpdata_end === FALSE) $xmpdata_end = strpos($source,"</x:xapmeta>");
$xmplenght = $xmpdata_end-$xmpdata_start;
$xmpdata = substr($source,$xmpdata_start,$xmplenght+12);
$xmp_parsed = array();
$regexps = array(
array("name" => "DC creator", "regexp" => "/<dc:creator>\s*<rdf:Seq>\s*<rdf:li>.+<\/rdf:li>\s*<\/rdf:Seq>\s*<\/dc:creator>/", "regexp2" => ""), // All dc: tags don't need a different regex
array("name" => "TIFF camera model", "regexp" => "/<tiff:Model>.+<\/tiff:Model>/", "regexp2" => "tiff:Model=\"([^\"]+)\""),
array("name" => "TIFF maker", "regexp" => "/<tiff:Make>.+<\/tiff:Make>/", "regexp2" => "tiff:Make=\"([^\"]+)\""),
array("name" => "EXIF exposure time", "regexp" => "/<exif:ExposureTime>.+<\/exif:ExposureTime>/", "regexp2" => "exif:ExposureTime=\"([^\"]+)\""),
array("name" => "EXIF shutterspeed value", "regexp" => "/<exif:ShutterSpeedValue>.+<\/exif:ShutterSpeedValue>/", "regexp2" => "exif:ShutterSpeedValue=\"([^\"]+)\""),
array("name" => "EXIF f number", "regexp" => "/<exif:FNumber>.+<\/exif:FNumber>/", "regexp2" => "exif:FNumber=\"([^\"]+)\""),
array("name" => "EXIF aperture value", "regexp" => "/<exif:ApertureValue>.+<\/exif:ApertureValue>/", "regexp2" => "exif:ApertureValue=\"([^\"]+)\""),
array("name" => "EXIF exposure program", "regexp" => "/<exif:ExposureProgram>.+<\/exif:ExposureProgram>/", "regexp2" => "exif:ExposureProgram=\"([^\"]+)\""),
array("name" => "EXIF iso speed ratings", "regexp" => "/<exif:ISOSpeedRatings>\s*<rdf:Seq>\s*<rdf:li>.+<\/rdf:li>\s*<\/rdf:Seq>\s*<\/exif:ISOSpeedRatings>/", "regexp2" => ""),
array("name" => "EXIF datetime original", "regexp" => "/<exif:DateTimeOriginal>.+<\/exif:DateTimeOriginal>/", "regexp2" => "exif:DateTimeOriginal=\"([^\"]+)\""),
array("name" => "EXIF exposure bias value", "regexp" => "/<exif:ExposureBiasValue>.+<\/exif:ExposureBiasValue>/", "regexp2" => "exif:ExposureBiasValue=\"([^\"]+)\""),
array("name" => "EXIF metering mode", "regexp" => "/<exif:MeteringMode>.+<\/exif:MeteringMode>/", "regexp2" => "exif:MeteringMode=\"([^\"]+)\""),
array("name" => "EXIF focal lenght", "regexp" => "/<exif:FocalLength\>.+\<\/exif:FocalLength>/", "regexp2" => "exif:FocalLength=\"([^\"]+)\""),
array("name" => "AUX lens", "regexp" => "/<aux:Lens>.+<\/aux:Lens>/", "regexp2" => "aux:Lens=\"([^\"]+)\""),
array("name" => "DC rights", "regexp" => "/<dc:rights>\s*<rdf:Alt>\s*<rdf:li xml:lang=['\"]x\-default['\"]>.+<\/rdf:li>\s*<\/rdf:Alt>\s*<\/dc:rights>/", "regexp2" => ""),
array("name" => "DC description", "regexp" => "/<dc:description>\s*<rdf:Alt>\s*<rdf:li xml:lang=['\"]x\-default['\"]>.+<\/rdf:li>\s*<\/rdf:Alt>\s*<\/dc:description>/", "regexp2" => ""),
array("name" => "DC title", "regexp" => "/<dc:title>\s*<rdf:Alt>\s*<rdf:li xml:lang=['\"]x\-default['\"]>.+<\/rdf:li>\s*<\/rdf:Alt>\s*<\/dc:title>/", "regexp2" => ""),
array("name" => "PHOTOSHOP headline", "regexp" => "/<photoshop:Headline>.+<\/photoshop:Headline>/", "regexp2" => "photoshop:Headline=\"([^\"]+)\""),
array("name" => "PHOTOSHOP city", "regexp" => "/<photoshop:City>.+<\/photoshop:City>/", "regexp2" => "photoshop:City=\"([^\"]+)\""),
array("name" => "PHOTOSHOP state", "regexp" => "/<photoshop:State>.+<\/photoshop:State>/", "regexp2" => "photoshop:State=\"([^\"]+)\""),
array("name" => "PHOTOSHOP country", "regexp" => "/<photoshop:Country>.+<\/photoshop:Country>/", "regexp2" => "photoshop:Country=\"([^\"]+)\""),
array("name" => "PHOTOSHOP category", "regexp" => "/<photoshop:Category>.+<\/photoshop:Category>/", "regexp2" => "photoshop:Category=\"([^\"]+)\""),
array("name" => "PHOTOSHOP credit", "regexp" => "/<photoshop:Credit>.+<\/photoshop:Credit>/", "regexp2" => "photoshop:Credit=\"([^\"]+)\""),
array("name" => "PHOTOSHOP authors position", "regexp" => "/<photoshop:AuthorsPosition>.+<\/photoshop:AuthorsPosition>/", "regexp2" => "photoshop:AuthorsPosition=\"([^\"]+)\"")
);
foreach ($regexps as $key => $k) {
$name = $k["name"];
$regexp = $k["regexp"];
$regexp2 = $k["regexp2"];
$xmp_item = "";
unset($r);
if (preg_match($regexp, $xmpdata, $r)) {
$xmp_item = @$r[0];
}
else {
unset($s);
ereg($regexp2, $xmpdata, $s);
$xmp_item = @$s[1]; // [1] to retrieve bracketed expression from regex
}
if ($name == "EXIF datetime original") {
$xmp_item = str_replace("Z","",$xmp_item);
$xmp_item = str_replace("T"," ",$xmp_item);
}
if ($name == "AUX lens") {
$xmp_item = str_replace(" ","",$xmp_item);
$xmp_item = str_replace("m","",$xmp_item);
}
array_push($xmp_parsed,array("item" => $name, "value" => strip_tags($xmp_item)));
}
$xmp_supplemental_categories = read_xmp_bag ($xmpdata,"<photoshop:SupplementalCategories>", "</photoshop:SupplementalCategories>");
array_push($xmp_parsed,array("item" => "PHOTOSHOP supplemental categories", "value" => $xmp_supplemental_categories));
$xmp_keywords = read_xmp_bag ($xmpdata,"<dc:subject>", "</dc:subject>");
array_push($xmp_parsed,array("item" => "DC keywords", "value" => $xmp_keywords));
//$xmp_author = read_xmp_bag ($xmpdata,"<dc:creator>", "</dc:creator>");
//array_push($xmp_parsed,array("item" => "DC creator", "value" => @$xmp_author[0]));
if ($printout != 0) {
foreach ($xmp_parsed as $key => $k) {
$item = $k["item"];
$value = $k["value"];
if (gettype($value) != "array") {
print "<br><span style=\"color: #990000;\"><b>" . $item . ":</b></span> " . $value;
} else {
print "<br><b>" . $item . ":</b> ";
ee_print_array($value);
}
}
}
$source = "";
return ($xmp_parsed);
}
Feedback would be appreciated from anyone. I've heard a few hints of metadata reading failure with EE2 - if all your photos are being read with a 'default' time and date (possibly in 2002), this could be your issue.
Pekka - if you're happy with the code from a security point of view, can this be incorporated into 2.02? It's heaps better than keying in all the metadata by hand!
David