From 7582bd18a5c776be44a6df33216fbbf7c830c6c8 Mon Sep 17 00:00:00 2001 From: magnolia-fan Date: Mon, 26 Sep 2011 09:10:23 +0400 Subject: [PATCH] MB + Last.fm + Discogs = Import! --- app/controllers/artist_controller.rb | 5 +- app/controllers/import_controller.rb | 164 ++++++++------ app/models/music/album.rb | 2 +- .../20110926031740_prepare_to_mixed_import.rb | 11 + db/schema.rb | 4 +- lib/discogs.rb | 204 ++++++++++-------- lib/tasks/lastfm_import.rake | 2 + lib/tasks/music_artist_import.rake | 8 + 8 files changed, 246 insertions(+), 154 deletions(-) create mode 100644 db/migrate/20110926031740_prepare_to_mixed_import.rb create mode 100644 lib/tasks/music_artist_import.rake diff --git a/app/controllers/artist_controller.rb b/app/controllers/artist_controller.rb index ecc8c29..12fc340 100644 --- a/app/controllers/artist_controller.rb +++ b/app/controllers/artist_controller.rb @@ -88,8 +88,7 @@ class ArtistController < ApplicationController end @artist[:albums] = [] - artist.albums.with_format(:album).each do |album| - ap album + artist.albums.each do |album| if true # media_types.include? album.album_type.downcase.to_sym and album.status == 1 tmp_album = {id: album.id, name: album.name, year: album.year, pic: album.pic_url} album_tracks = [] @@ -103,7 +102,7 @@ class ArtistController < ApplicationController (track.bonus == 0 ? album_tracks : bonus_tracks) << tmp_track end tmp_album[:tracks] = {album: album_tracks, bonus: bonus_tracks} - @artist[:albums] << tmp_album + @artist[:albums] << tmp_album if (album_tracks+bonus_tracks).length > 0 end end diff --git a/app/controllers/import_controller.rb b/app/controllers/import_controller.rb index 8125fee..25daac4 100644 --- a/app/controllers/import_controller.rb +++ b/app/controllers/import_controller.rb @@ -1,5 +1,7 @@ require 'musicbrainz' +require 'discogs' require 'nokogiri' +require 'zlib' class ImportController < ApplicationController def self.importArtist name, dry_run = false @@ -46,70 +48,106 @@ class ImportController < ApplicationController :url => url ).save end - - brainz_artist.release_groups.each do |brainz_release_group| - # Saving album - begin - album_lastfm = LastFM::Album.get_info( :artist => lastfm_artist['artist']['name'], :album => brainz_release_group.title ) - album_image = album_lastfm['album']['image'][3]['#text'] - rescue - album_image = '' - end - album = Album.new - album.name = brainz_release_group.title - album.year = brainz_release_group.first_release_date.year - album.artist_id = artist.id - album.mbid = brainz_release_group.id - album.album_type = brainz_release_group.type - album.pic_url = album_image - album.has_pic = (album_image != '' and not album_image.nil?) - album.status = 0 - album.save - # Tracks from the first release - tracks_hashed = [] - brainz_release_group.releases.each_with_index do |brainz_release, i| - local_brainz_release = LocalBrainz::Release.new( - :mbid => brainz_release.id, - :title => brainz_release.title, - :status => brainz_release.status, - :date => brainz_release.date, - :country => brainz_release.country, - :format => brainz_release.format, - :album_id => album.id - ) - local_brainz_release.save - # Processing tracks - brainz_release.tracks.each do |brainz_track| - local_brainz_track = LocalBrainz::Track.new( - :position => brainz_track.position, - :recording_id => brainz_track.recording_id, - :title => brainz_track.title, - :length => brainz_track.length, - :release_id => local_brainz_release.id - ) - local_brainz_track.save - track_title = brainz_track.title.gsub(/\s\/\s\[.*?\]/, '') - if tracks_hashed.include? track_title.downcase.scan(/[a-z0-9]*/).join - next - end - tracks_hashed << track_title.downcase.scan(/[a-z0-9]*/).join - track = Track.new - track.name = track_title - track.album_id = album.id - track.position = brainz_track.position - track.length = brainz_track.length - track.country = brainz_release.country - track.bonus = (i == 0 ? 0 : 1) - track.live = track_title.downcase.include? 'live' - track.acoustic = track_title.downcase.include? 'acoustic' - track.mbid = brainz_track.recording_id - track.save - end - end - album.status = 1 - album.save unless dry_run - end + unless brainz_artist.urls[:discogs].nil? + Discogs.get_master_albums(brainz_artist.urls[:discogs]).each do |info| + begin + puts " * "+info[:title]+" ("+info[:year]+") -- http://api.discogs.com/"+info[:uri]+")" + stream = open("http://api.discogs.com/"+info[:uri]+"?f=xml", + 'User-Agent' => 'Haven Import Bot', + 'Accept-Encoding' => 'gzip, deflate' + ) + if (stream.content_encoding.empty?) + body = stream.read + else + body = Zlib::GzipReader.new(stream).read + end + # Creating album + album = Album.new( + :name => info[:title], + :artist_id => artist.id, + :year => info[:year], + :status => 1, + :master => true + ) + begin + album_lastfm = LastFM::Album.get_info( :artist => lastfm_artist['artist']['name'], :album => info[:title] ) + album_image = album_lastfm['album']['image'][3]['#text'] + rescue + album_image = '' + end + album.pic_url = album_image + album.has_pic = (album_image != '' and not album_image.nil?) + album.save + Discogs.save_album(artist, album, Nokogiri::HTML(body)) + end + sleep 1 + end + end + + # brainz_artist.release_groups.each do |brainz_release_group| + # # Saving album + # begin + # album_lastfm = LastFM::Album.get_info( :artist => lastfm_artist['artist']['name'], :album => '' ) + # album_image = album_lastfm['album']['image'][3]['#text'] + # rescue + # album_image = '' + # end + # album = Album.new + # album.name = brainz_release_group.title + # album.year = brainz_release_group.first_release_date.year + # album.artist_id = artist.id + # album.mbid = brainz_release_group.id + # album.album_type = brainz_release_group.type + # album.pic_url = album_image + # album.has_pic = (album_image != '' and not album_image.nil?) + # album.status = 0 + # album.save + # # Tracks from the first release + # tracks_hashed = [] + # brainz_release_group.releases.each_with_index do |brainz_release, i| + # local_brainz_release = LocalBrainz::Release.new( + # :mbid => brainz_release.id, + # :title => brainz_release.title, + # :status => brainz_release.status, + # :date => brainz_release.date, + # :country => brainz_release.country, + # :format => brainz_release.format, + # :album_id => album.id + # ) + # local_brainz_release.save + # # Processing tracks + # brainz_release.tracks.each do |brainz_track| + # local_brainz_track = LocalBrainz::Track.new( + # :position => brainz_track.position, + # :recording_id => brainz_track.recording_id, + # :title => brainz_track.title, + # :length => brainz_track.length, + # :release_id => local_brainz_release.id + # ) + # local_brainz_track.save + # track_title = brainz_track.title.gsub(/\s\/\s\[.*?\]/, '') + # if tracks_hashed.include? track_title.downcase.scan(/[a-z0-9]*/).join + # next + # end + # tracks_hashed << track_title.downcase.scan(/[a-z0-9]*/).join + # track = Track.new + # track.name = track_title + # track.album_id = album.id + # track.position = brainz_track.position + # track.length = brainz_track.length + # track.country = brainz_release.country + # track.bonus = (i == 0 ? 0 : 1) + # track.live = track_title.downcase.include? 'live' + # track.acoustic = track_title.downcase.include? 'acoustic' + # track.mbid = brainz_track.recording_id + # track.save + # end + # end + # album.status = 1 + # album.save unless dry_run + # end + artist.status = 1 rescue => e ap e.message diff --git a/app/models/music/album.rb b/app/models/music/album.rb index 57ff092..7683886 100644 --- a/app/models/music/album.rb +++ b/app/models/music/album.rb @@ -1,6 +1,6 @@ class Album < ActiveRecord::Base belongs_to :artist - has_many :tracks, :order => 'bonus ASC, position ASC', :dependent => :destroy + has_many :tracks, :conditions => ['length > 0'], :order => 'bonus ASC, position ASC', :dependent => :destroy has_many :album_formats has_many :release_formats, :through => :album_formats diff --git a/db/migrate/20110926031740_prepare_to_mixed_import.rb b/db/migrate/20110926031740_prepare_to_mixed_import.rb new file mode 100644 index 0000000..e1203a1 --- /dev/null +++ b/db/migrate/20110926031740_prepare_to_mixed_import.rb @@ -0,0 +1,11 @@ +class PrepareToMixedImport < ActiveRecord::Migration + def up + add_column :track_artists, :name, :string + add_column :albums, :master_id, :integer + end + + def down + remove_column :track_artists, :name + remove_column :albums, :master_id + end +end diff --git a/db/schema.rb b/db/schema.rb index 950b586..df7e079 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 20110926002631) do +ActiveRecord::Schema.define(:version => 20110926031740) do create_table "album_formats", :force => true do |t| t.integer "album_id" @@ -49,6 +49,7 @@ ActiveRecord::Schema.define(:version => 20110926002631) do t.integer "has_pic" t.integer "status" t.boolean "master" + t.integer "master_id" end add_index "albums", ["artist_id"], :name => "index_albums_on_artist_id" @@ -182,6 +183,7 @@ ActiveRecord::Schema.define(:version => 20110926002631) do t.integer "artist_id" t.boolean "main" t.string "join" + t.string "name" end add_index "track_artists", ["artist_id"], :name => "index_track_artists_on_artist_id" diff --git a/lib/discogs.rb b/lib/discogs.rb index 80529d0..92a54c7 100644 --- a/lib/discogs.rb +++ b/lib/discogs.rb @@ -4,6 +4,7 @@ class Discogs self.get_nodes('tmp/data/discogs_artists.xml', 'artist') do |node| artist = Artist.new( :name => (node.css('name').first.text), + :original_name => (node.css('name').first.text), :pic_url => (node.css('images > image[type="primary"]').first.attr('uri') unless node.css('images > image[type="primary"]').empty?), :status => 1 ) @@ -26,96 +27,127 @@ class Discogs main_artist = Artist.find_or_create_by_name(node.css('artists > artist > name').first.text) end - # Creating album - album = Album.new( - :name => (node.css('title').first.text unless node.css('title').empty?), - :artist_id => (main_artist.id unless main_artist.nil?), - :year => (node.css('released').first.text.split('-').first.to_i unless node.css('released').empty?), - :status => 1, - :master => (not node.css('master_id').empty?) - ) - - # Parsing image - unless node.css('images > image[type="primary"]').empty? - album.pic_url = node.css('images > image[type="primary"]').first.attr('uri') - album.has_pic = 1 - else - album.has_pic = 0 - end - - album.save ## ## ## - - # Defining formats - formats = node.css('formats > format > descriptions > description').each do |f| - format = ReleaseFormat.find_or_create_by_name(f.text) - if format.hash.nil? - format.hash = f.text.scan(/\w/).join().downcase - format.save - end - AlbumFormat.new( - :album_id => album.id, - :release_format_id => format.id - ).save - end - - # Defining genres - unless node.css('genres > genre').empty? - node.css('genres > genre').each do |g| - genre = Genre.find_or_create_by_name(g.text) - AlbumGenre.new( - :album_id => album.id, - :genre_id => genre.id - ).save - end - end - - # Defining styles - unless node.css('styles > style').empty? - node.css('styles > style').each do |s| - style = Style.find_or_create_by_name(s.text) - AlbumStyle.new( - :album_id => album.id, - :style_id => style.id - ).save - end - end - - # Writing tracklist - unless node.css('tracklist > track').empty? - node.css('tracklist > track').each do |t| - track = Track.new( - :album_id => album.id, - :name => (t.css('title').first.text unless t.css('title').empty?), - :position => (t.css('position').first.text.to_i(36) unless t.css('position').empty?), - :country => (node.css('country').first.text unless node.css('country').empty?), - :length => (self.duration_to_length(t.css('duration').first.text) unless t.css('duration').empty?) - ) - track.save - - if t.css('artists > artist').empty? - TrackArtist.new( - :track_id => track.id, - :artist_id => (main_artist.id unless main_artist.nil?), - :main => true - ).save - else - t.css('artists > artist').each_with_index do |a, i| - t_artist = Artist.find_or_create_by_name(a.css('name').first.text) - TrackArtist.new( - :track_id => track.id, - :artist_id => t_artist.id, - :main => (i == 0), - :join => a.css('join').first.text - ).save - end - end - - end - end + self.save_album main_artist, node end end + def self.save_album main_artist, album, node + unless node.css('master').empty? + album.master_id = node.css('master').first.attr('id') + end + + # Parsing image + # unless node.css('images > image[type="primary"]').empty? + # album.pic_url = node.css('images > image[type="primary"]').first.attr('uri') + # album.has_pic = 1 + # else + # if node.css('images > image[type="secondary"]').empty? + # album.has_pic = 0 + # else + # album.pic_url = node.css('images > image[type="secondary"]').first.attr('uri') + # album.has_pic = 1 + # end + # end + + album.save ## ## ## + + # Defining formats + formats = node.css('formats > format > descriptions > description').each do |f| + format = ReleaseFormat.find_or_create_by_name(f.text) + if format.hash.nil? + format.hash = f.text.scan(/\w/).join().downcase + format.save + end + AlbumFormat.new( + :album_id => album.id, + :release_format_id => format.id + ).save + end + + # Defining genres + unless node.css('genres > genre').empty? + node.css('genres > genre').each do |g| + genre = Genre.find_or_create_by_name(g.text) + AlbumGenre.new( + :album_id => album.id, + :genre_id => genre.id + ).save + end + end + + # Defining styles + unless node.css('styles > style').empty? + node.css('styles > style').each do |s| + style = Style.find_or_create_by_name(s.text) + AlbumStyle.new( + :album_id => album.id, + :style_id => style.id + ).save + end + end + + # Writing tracklist + unless node.css('tracklist > track').empty? + node.css('tracklist > track').each do |t| + track = Track.new( + :album_id => album.id, + :name => (t.css('title').first.text unless t.css('title').empty?), + :position => (t.css('position').first.text.scan(/[a-zA-Z0-9]/).join('').to_i(36) unless t.css('position').empty?), + :country => (node.css('country').first.text unless node.css('country').empty?), + :length => (self.duration_to_length(t.css('duration').first.text) unless t.css('duration').empty?) + ) + track.save + + if t.css('artists > artist').empty? + TrackArtist.new( + :track_id => track.id, + :artist_id => (main_artist.id unless main_artist.nil?), + :main => true, + :name => main_artist.name + ).save + else + t.css('artists > artist').each_with_index do |a, i| + t_artist = Artist.find_or_create_by_name(a.css('name').first.text) + TrackArtist.new( + :track_id => track.id, + :artist_id => t_artist.id, + :main => (i == 0), + :join => a.css('join').first.text, + :name => a.css('name').first.text + ).save + end + end + + end + end + end + + def self.get_master_albums url + x = Nokogiri::HTML(open(url, + 'User-Agent' => 'Haven Search Bot', + 'Cookie' => 'artist_layout=med; artist_limit=500' + ).read) + albums = [] + split = 0 + x.css('table.discography > tbody > tr').each do |_| + if _.attr('class') == 'credit_header' + split+=1 + end + if split == 2 + break + end + if _.attr('class').match(/^main.*/) and _.css('td')[3].text.scan(/\d/).join('') != '' and _.css('a.mr_toggler').length == 1 + albums << { + :uri => (_.attr('class').match(/master/) ? 'master/' : 'release/')+_.attr('id')[1,100], + :title => _.css('h4 > a').first.text, + :year => _.css('td')[3].text.scan(/\d/).join('') + } + end + end + albums + end + private def self.get_nodes filename, nodename, &block diff --git a/lib/tasks/lastfm_import.rake b/lib/tasks/lastfm_import.rake index d380285..52c433e 100644 --- a/lib/tasks/lastfm_import.rake +++ b/lib/tasks/lastfm_import.rake @@ -2,6 +2,7 @@ namespace :lastfm do namespace :import do desc 'Import top 1000 artists from last.fm' task :top => :environment do + ActiveRecord::Base.logger = Logger.new('/dev/null') ImportController.parseLastfmXML('tmp/data/top1000.xml').each do |artist| puts artist[:name] + (artist[:mbid].empty? ? '' : ' (' + artist[:mbid] + ')') record = Artist.find_or_create_by_name(artist[:name]) @@ -19,6 +20,7 @@ namespace :lastfm do end end task :hyped => :environment do + ActiveRecord::Base.logger = Logger.new('/dev/null') ImportController.parseLastfmXML('tmp/data/hyped500.xml').each do |artist| puts artist[:name] + (artist[:mbid].empty? ? '' : ' (' + artist[:mbid] + ')') record = Artist.find_or_create_by_name(artist[:name]) diff --git a/lib/tasks/music_artist_import.rake b/lib/tasks/music_artist_import.rake new file mode 100644 index 0000000..679903e --- /dev/null +++ b/lib/tasks/music_artist_import.rake @@ -0,0 +1,8 @@ +namespace :music do + namespace :artist do + desc 'Imports artist' + task :import => :environment do + ImportController.importArtist('Jet') + end + end +end