require 'zlib' class Discogs def self.artists self.get_nodes('tmp/data/discogs_artists.xml', 'artist') do |node| artist = Artist.new( :name => (node.css('name').first.text), :original_name => (node.css('name').first.text), :pic_url => (node.css('images > image[type="primary"]').first.attr('uri') unless node.css('images > image[type="primary"]').empty?), :status => 1 ) artist.save node.css('namevariations > name, aliases > name').each do |v| ArtistAlias.new( :artist_id => artist.id, :name => v.text ).save end end end def self.releases self.get_nodes('tmp/data/discogs_releases.xml', 'release') do |node| # Defining artist main_artist = nil unless node.css('artists > artist > name').empty? main_artist = Artist.find_or_create_by_name(node.css('artists > artist > name').first.text) end self.save_album main_artist, node end end def self.save_album main_artist, album, node unless node.css('master').empty? album.master_id = node.css('master').first.attr('id') end # Parsing image # unless node.css('images > image[type="primary"]').empty? # album.pic_url = node.css('images > image[type="primary"]').first.attr('uri') # album.has_pic = 1 # else # if node.css('images > image[type="secondary"]').empty? # album.has_pic = 0 # else # album.pic_url = node.css('images > image[type="secondary"]').first.attr('uri') # album.has_pic = 1 # end # end album.save ## ## ## # Defining formats formats = node.css('formats > format > descriptions > description').each do |f| format = ReleaseFormat.find_or_create_by_name(f.text) if format.hash.nil? format.hash = f.text.scan(/\w/).join().downcase format.save end AlbumFormat.new( :album_id => album.id, :release_format_id => format.id ).save end # Defining genres unless node.css('genres > genre').empty? node.css('genres > genre').each do |g| genre = Genre.find_or_create_by_name(g.text) AlbumGenre.new( :album_id => album.id, :genre_id => genre.id ).save end end # Defining styles unless node.css('styles > style').empty? node.css('styles > style').each do |s| style = Style.find_or_create_by_name(s.text) AlbumStyle.new( :album_id => album.id, :style_id => style.id ).save end end # Writing tracklist unless node.css('tracklist > track').empty? node.css('tracklist > track').each do |t| track = Track.new( :album_id => album.id, :name => (t.css('title').first.text unless t.css('title').empty?), :position => (t.css('position').first.text.scan(/[a-zA-Z0-9]/).join('').to_i(36) unless t.css('position').empty?), :country => (node.css('country').first.text unless node.css('country').empty?), :length => (self.duration_to_length(t.css('duration').first.text) unless t.css('duration').empty?) ) track.save if t.css('artists > artist').empty? TrackArtist.new( :track_id => track.id, :artist_id => (main_artist.id unless main_artist.nil?), :main => true, :name => main_artist.name ).save else t.css('artists > artist').each_with_index do |a, i| t_artist = Artist.find_or_create_by_name(a.css('name').first.text) TrackArtist.new( :track_id => track.id, :artist_id => t_artist.id, :main => (i == 0), :join => a.css('join').first.text, :name => a.css('name').first.text ).save end end end end end def self.get_master_albums url begin xml = open(url, 'User-Agent' => 'Haven Search Bot', 'Cookie' => 'artist_layout=med; artist_limit=500' ).read rescue => e xml = "" end x = Nokogiri::HTML(xml) albums = [] split = 0 x.css('table.discography > tbody > tr').each do |_| if _.attr('class') == 'credit_header' split+=1 end if split == 2 break end if _.attr('class').match(/^main.*/) and _.css('td')[3].text.scan(/\d/).join('') != '' and ( _.css('a.mr_toggler').length == 1 or _.css('td')[1].text.match(/\(.*?Album.*?\)/).present? ) albums << { :uri => (_.attr('class').match(/master/) ? 'master/' : 'release/')+_.attr('id')[1,100], :title => _.css('h4 > a').first.text, :year => _.css('td')[3].text.scan(/\d/).join('') } end end albums end def self.search_for_artist name url = 'http://api.discogs.com/search?q=' << CGI::escape(name) << '&f=xml' stream = open(url, 'User-Agent' => 'Haven Import Bot', 'Accept-Encoding' => 'gzip, deflate' ) if (stream.content_encoding.empty?) body = stream.read else body = Zlib::GzipReader.new(stream).read end x = Nokogiri::XML(body) x.css('exactresults > result[type=artist] > uri, searchresults > result[type=artist] > uri').map{|n| n.text}.first end private def self.get_nodes filename, nodename, &block File.open(filename) do |file| Nokogiri::XML::Reader.from_io(file).each do |node| if node.name == nodename and node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT yield(Nokogiri::XML(node.outer_xml).root) end end end end def self.duration_to_length duration duration = duration.split(':') duration[0].to_i * 60 + duration[1].to_i end end