From 8486c2101cec0b218107d21b98f52fb0d341b9b8 Mon Sep 17 00:00:00 2001 From: codycraven Date: Fri, 29 Jun 2012 15:55:38 -0400 Subject: [PATCH] Updated script to support namespacing of Open Graph protocol. The old version of the script was limited to only parsing for the og namespace. This revision adds support for parsing for any namespaces desired, which unfortunately breaks the expected response for prior implementations however it is a necessary improvement. As a note the old implementation failed to work on sites following current Open Graph standards from http://ogp.me due to modifications in the namespace prefix declaration. This revision forgoes the forcing of prefix detection from jsdom since the Open Graph standard has changed at least three times that I am aware of. --- README.md | 32 ++++++++++++++-------- lib/ogp.js | 77 +++++++++++++++++++++++++++++----------------------- package.json | 5 ++-- 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index b85cb69..bff4e97 100644 --- a/README.md +++ b/README.md @@ -23,35 +23,43 @@ var jsdom = require('jsdom'), url = 'http://www.imdb.com/title/tt0068646/' jsdom.env({ html: url, done: function(error, window) { - - // 3. pass window as the only argument to ogp.parse method - var ogData = ogp.parse(window) - - // 5. Profit! - console.log('Open Graph data', ogData) + // 3. pass window as the only argument to ogp.parse method + var ogData = ogp.parse(window) + + // ogp.parse can also parse Open Graph properties for any namespaces desired. + // + // To match og and fb namespaces the following could be done: + // var ogData = ogp.parse(window, 'og fb') + // or: + // var ogData = ogp.parse(wondow, ['og', 'fb']) + + // 5. Profit! + console.log('Open Graph data', ogData) }}) ``` This will put next structure into stdout: ```javascript -{ url: 'http://www.imdb.com/title/tt0068646/', - title: 'The Godfather (1972)', - type: 'video.movie', - image: 'http://ia.media-imdb.com/images/M/MV5BMTIyMTIxNjI5NF5BMl5BanBnXkFtZTcwNzQzNDM5MQ@@._V1._SX97_SY140_.jpg', - site_name: 'IMDb' } +{ 'og:url': 'http://www.imdb.com/title/tt0068646/', + 'og:title': 'The Godfather (1972)', + 'og:type': 'video.movie', + 'og:image': 'http://ia.media-imdb.com/images/M/MV5BMTIyMTIxNjI5NF5BMl5BanBnXkFtZTcwNzQzNDM5MQ@@._V1._SX97_SY140_.jpg', + 'og:site_name': 'IMDb' } ``` In case if some of OpenGraph tags were presented multiple times (few image tags for example, output structure field related to that tag will be converted into array: ```javascript -{ image: ['image1.png', 'image2.png']} +{ 'og:image': ['image1.png', 'image2.png']} ``` ## Credits Written and maintained by [Yury Proshchenko](mailto:spect.man@gmail.com). +Multiple namespace support by [Cody Craven](http://github.com/codycraven). + ## License The MIT License diff --git a/lib/ogp.js b/lib/ogp.js index 6944fc8..fd72c4b 100644 --- a/lib/ogp.js +++ b/lib/ogp.js @@ -1,35 +1,44 @@ -exports.parse = function(window) { - var ns - for (var i = 0; i < window.document.documentElement.attributes.length; ++i) { - var attr = window.document.documentElement.attributes[i] - if (attr.nodeValue.toLowerCase() !== 'http://opengraphprotocol.org/schema/') continue - - ns = attr.nodeName.substring(6) - if (ns) break - } - - if (!ns) return {} - - var result = {}, - metaTags = window.document.getElementsByTagName('meta') - - for (var i = 0; i < metaTags.length; ++i) { - var tag = metaTags[i], - propertyAttr = tag.attributes['property'] - - if (!propertyAttr || propertyAttr.nodeValue.substring(0, ns.length) !== ns) - continue - - var property = tag.attributes['property'].nodeValue.substring(ns.length+1), - content = tag.attributes['content'].nodeValue - - if (!result[property]) - result[property] = content - else if (result[property].push) - result[property].push(content) - else - result[property] = [result[property], content] - } - - return result +exports.parse = function(window, namespaces) { + var result = {}, + metaTags = window.document.getElementsByTagName('meta'), + propertyRegex = /^([a-z]+):/; + + // If undefined set as "og" namespace (OGP default). + if (!namespaces) + namespaces = ['og']; + // If string convert to an array, accepts comma separated. + else if (typeof namespaces === 'string') { + namespaces = namespaces.split(' '); + } + + for (var i = 0; i < metaTags.length; ++i) { + var tag = metaTags[i], + propertyAttr = tag.attributes['property'] + + // All OG meta tags contain property attribute. + if (!propertyAttr) { + continue + } + else { + var matches = propertyRegex.exec(propertyAttr.nodeValue); + // Verify OG property is found in our acceptable namespaces. + if (!matches[1] || namespaces.indexOf(matches[1]) == -1) + continue + } + + var property = tag.attributes['property'].nodeValue, + content = tag.attributes['content'].nodeValue + + // If the property does not already exist, assign it. + if (!result[property]) + result[property] = content + // Else if result[property] is already an array then push it. + else if (result[property].push) + result[property].push(content) + // Else convert string to array. + else + result[property] = [result[property], content] + } + + return result } diff --git a/package.json b/package.json index 4ea7b6c..e4f9571 100644 --- a/package.json +++ b/package.json @@ -1,10 +1,11 @@ { "name": "ogp", "description": "Parse Open Graph meta tags from jsdom generated DOM", - "version": "0.0.2", + "version": "1.0.0", "author": "Yury Proshchenko ", "contributors": [ - { "name": "Yury Proshchenko", "email": "spect.man@gmail.com" } + { "name": "Yury Proshchenko", "email": "spect.man@gmail.com" }, + { "name": "Cody Craven", "email": "cody@cravencode.com", "url": "https://github.com/codycraven" } ], "keywords": ["open graph", "opengraph", "ogp"], "main": "./lib/ogp.js",