193 lines
5.8 KiB
Ruby
193 lines
5.8 KiB
Ruby
require 'zlib'
|
|
|
|
class Discogs
|
|
|
|
def self.artists
|
|
self.get_nodes('tmp/data/discogs_artists.xml', 'artist') do |node|
|
|
artist = Artist.new(
|
|
:name => (node.css('name').first.text),
|
|
:original_name => (node.css('name').first.text),
|
|
:pic_url => (node.css('images > image[type="primary"]').first.attr('uri') unless node.css('images > image[type="primary"]').empty?),
|
|
:status => 1
|
|
)
|
|
artist.save
|
|
node.css('namevariations > name, aliases > name').each do |v|
|
|
ArtistAlias.new(
|
|
:artist_id => artist.id,
|
|
:name => v.text
|
|
).save
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.releases
|
|
self.get_nodes('tmp/data/discogs_releases.xml', 'release') do |node|
|
|
|
|
# Defining artist
|
|
main_artist = nil
|
|
unless node.css('artists > artist > name').empty?
|
|
main_artist = Artist.find_or_create_by_name(node.css('artists > artist > name').first.text)
|
|
end
|
|
|
|
self.save_album main_artist, node
|
|
|
|
end
|
|
end
|
|
|
|
def self.save_album main_artist, album, node
|
|
unless node.css('master').empty?
|
|
album.master_id = node.css('master').first.attr('id')
|
|
end
|
|
|
|
# Parsing image
|
|
# unless node.css('images > image[type="primary"]').empty?
|
|
# album.pic_url = node.css('images > image[type="primary"]').first.attr('uri')
|
|
# album.has_pic = 1
|
|
# else
|
|
# if node.css('images > image[type="secondary"]').empty?
|
|
# album.has_pic = 0
|
|
# else
|
|
# album.pic_url = node.css('images > image[type="secondary"]').first.attr('uri')
|
|
# album.has_pic = 1
|
|
# end
|
|
# end
|
|
|
|
album.save ## ## ##
|
|
|
|
# Defining formats
|
|
formats = node.css('formats > format > descriptions > description').each do |f|
|
|
format = ReleaseFormat.find_or_create_by_name(f.text)
|
|
if format.hash.nil?
|
|
format.hash = f.text.scan(/\w/).join().downcase
|
|
format.save
|
|
end
|
|
AlbumFormat.new(
|
|
:album_id => album.id,
|
|
:release_format_id => format.id
|
|
).save
|
|
end
|
|
|
|
# Defining genres
|
|
unless node.css('genres > genre').empty?
|
|
node.css('genres > genre').each do |g|
|
|
genre = Genre.find_or_create_by_name(g.text)
|
|
AlbumGenre.new(
|
|
:album_id => album.id,
|
|
:genre_id => genre.id
|
|
).save
|
|
end
|
|
end
|
|
|
|
# Defining styles
|
|
unless node.css('styles > style').empty?
|
|
node.css('styles > style').each do |s|
|
|
style = Style.find_or_create_by_name(s.text)
|
|
AlbumStyle.new(
|
|
:album_id => album.id,
|
|
:style_id => style.id
|
|
).save
|
|
end
|
|
end
|
|
|
|
# Writing tracklist
|
|
unless node.css('tracklist > track').empty?
|
|
node.css('tracklist > track').each do |t|
|
|
track = Track.new(
|
|
:album_id => album.id,
|
|
:name => (t.css('title').first.text unless t.css('title').empty?),
|
|
:position => (t.css('position').first.text.scan(/[a-zA-Z0-9]/).join('').to_i(36) unless t.css('position').empty?),
|
|
:country => (node.css('country').first.text unless node.css('country').empty?),
|
|
:length => (self.duration_to_length(t.css('duration').first.text) unless t.css('duration').empty?)
|
|
)
|
|
track.save
|
|
|
|
if t.css('artists > artist').empty?
|
|
TrackArtist.new(
|
|
:track_id => track.id,
|
|
:artist_id => (main_artist.id unless main_artist.nil?),
|
|
:main => true,
|
|
:name => main_artist.name
|
|
).save
|
|
else
|
|
t.css('artists > artist').each_with_index do |a, i|
|
|
t_artist = Artist.find_or_create_by_name(a.css('name').first.text)
|
|
TrackArtist.new(
|
|
:track_id => track.id,
|
|
:artist_id => t_artist.id,
|
|
:main => (i == 0),
|
|
:join => a.css('join').first.text,
|
|
:name => a.css('name').first.text
|
|
).save
|
|
end
|
|
end
|
|
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.get_master_albums url
|
|
begin
|
|
xml = open(url,
|
|
'User-Agent' => 'Haven Search Bot',
|
|
'Cookie' => 'artist_layout=med; artist_limit=500'
|
|
).read
|
|
rescue => e
|
|
xml = ""
|
|
end
|
|
x = Nokogiri::HTML(xml)
|
|
albums = []
|
|
split = 0
|
|
x.css('table.discography > tbody > tr').each do |_|
|
|
if _.attr('class') == 'credit_header'
|
|
split+=1
|
|
end
|
|
if split == 2
|
|
break
|
|
end
|
|
if _.attr('class').match(/^main.*/) and _.css('td')[3].text.scan(/\d/).join('') != '' and (
|
|
_.css('a.mr_toggler').length == 1 or
|
|
_.css('td')[1].text.match(/\(.*?Album.*?\)/).present?
|
|
)
|
|
albums << {
|
|
:uri => (_.attr('class').match(/master/) ? 'master/' : 'release/')+_.attr('id')[1,100],
|
|
:title => _.css('h4 > a').first.text,
|
|
:year => _.css('td')[3].text.scan(/\d/).join('')
|
|
}
|
|
end
|
|
end
|
|
ap albums
|
|
end
|
|
|
|
def self.search_for_artist name
|
|
url = 'http://api.discogs.com/search?q=' << CGI::escape(name) << '&f=xml'
|
|
stream = open(url,
|
|
'User-Agent' => 'Haven Import Bot',
|
|
'Accept-Encoding' => 'gzip, deflate'
|
|
)
|
|
if (stream.content_encoding.empty?)
|
|
body = stream.read
|
|
else
|
|
body = Zlib::GzipReader.new(stream).read
|
|
end
|
|
x = Nokogiri::XML(body)
|
|
x.css('exactresults > result[type=artist] > uri, searchresults > result[type=artist] > uri').map{|n| n.text}.first
|
|
end
|
|
|
|
private
|
|
|
|
def self.get_nodes filename, nodename, &block
|
|
File.open(filename) do |file|
|
|
Nokogiri::XML::Reader.from_io(file).each do |node|
|
|
if node.name == nodename and node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
|
|
yield(Nokogiri::XML(node.outer_xml).root)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def self.duration_to_length duration
|
|
duration = duration.split(':')
|
|
duration[0].to_i * 60 + duration[1].to_i
|
|
end
|
|
|
|
end |