From 89776db4036f1f3c24693cc69d85706af2fda5d1 Mon Sep 17 00:00:00 2001 From: magnolia-fan Date: Fri, 24 Jun 2011 13:36:44 +0400 Subject: [PATCH] Refactored artist import using mbrainz n lastfm gems. Improved stability --- Gemfile | 1 + Gemfile.lock | 12 +- app/controllers/artist_controller.rb | 6 +- app/controllers/import_controller.rb | 256 ++++++++++-------- .../20110624073136_add_mbid_to_track.rb | 9 + db/schema.rb | 3 +- lib/tasks/db_import.rake | 3 +- public/js/beathaven/ajax.js | 5 +- public/js/beathaven/search.js | 7 +- public/templates/search.html | 3 +- 10 files changed, 177 insertions(+), 128 deletions(-) create mode 100644 db/migrate/20110624073136_add_mbid_to_track.rb diff --git a/Gemfile b/Gemfile index fe2b0cd..4519cf3 100644 --- a/Gemfile +++ b/Gemfile @@ -9,3 +9,4 @@ gem 'awesome_print', :require => 'ap' gem 'delayed_job' gem 'lastfm', :git => 'git://github.com/magnolia-fan/ruby-lastfm.git' +gem 'musicbrainz-ruby', :git => 'git://github.com/magnolia-fan/musicbrainz-ruby.git' diff --git a/Gemfile.lock b/Gemfile.lock index 55485c2..8ade364 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,11 @@ +GIT + remote: git://github.com/magnolia-fan/musicbrainz-ruby.git + revision: 17d743a270b02b77251e10357af3d12cdab5f3a9 + specs: + musicbrainz-ruby (0.4.0) + hashie (~> 1.0) + httparty (>= 0.7.3) + GIT remote: git://github.com/magnolia-fan/ruby-lastfm.git revision: 1c82d5616c99689162afa8df854d16ebaa604e23 @@ -47,6 +55,7 @@ GEM daemons erubis (2.6.6) abstract (>= 1.0.0) + hashie (1.0.0) httparty (0.7.8) crack (= 0.1.8) i18n (0.5.0) @@ -81,7 +90,7 @@ GEM thor (0.14.6) treetop (1.4.9) polyglot (>= 0.3.1) - tzinfo (0.3.27) + tzinfo (0.3.28) xml-simple (1.1.0) PLATFORMS @@ -91,6 +100,7 @@ DEPENDENCIES awesome_print delayed_job lastfm! + musicbrainz-ruby! pg rails (= 3.0.8) sqlite3 diff --git a/app/controllers/artist_controller.rb b/app/controllers/artist_controller.rb index 634590d..0316264 100644 --- a/app/controllers/artist_controller.rb +++ b/app/controllers/artist_controller.rb @@ -1,6 +1,7 @@ require 'open-uri' class ArtistController < ApplicationController + @@default_album_types = ['Album', 'Soundtrack'] def data data = {} name = params[:name].gsub('%20', ' ').gsub('+', ' ') @@ -15,6 +16,9 @@ class ArtistController < ApplicationController end render :json => {status: 'loading', pics: pics} return + elsif artist and artist.status == 2 + render :json => {status: 'loading_failed', pics: []} + return end unless artist results = ArtistController.musicBrainzExactSearch(name) @@ -42,7 +46,7 @@ class ArtistController < ApplicationController data['albums'] = [] albums = artist.albums albums.each do |album| - if album.album_type == 'Album' + if @@default_album_types.include? album.album_type tmp_album = {name: album.name, year: album.year, pic: album.pic_url} album_tracks = [] bonus_tracks = [] diff --git a/app/controllers/import_controller.rb b/app/controllers/import_controller.rb index 3b608f1..83af8c0 100644 --- a/app/controllers/import_controller.rb +++ b/app/controllers/import_controller.rb @@ -1,143 +1,159 @@ -class ImportController < ApplicationController - @@user_agent = 'BeatHaven.org' - @@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2' - def self.importArtist name +require 'lastfm' +require 'musicbrainz' - # Get artist info - artist_mb_data = ArtistController.musicBrainzExactSearch(name).first - begin - artist_lastfm_xml = open( - 'http://ws.audioscrobbler.com/2.0/'+ - '?method=artist.getinfo&api_key='+ @@lastfm_api_key + - '&artist='+ URI.escape(name).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F') - ).read - artist_desc = artist_lastfm_xml.scan(/<\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0] - artist_pic = artist_lastfm_xml.scan(/?(.*)<\/image>/)[0][0] - rescue - artist_desc = '' - artist_pic = '' - end +class ImportController < ApplicationController + + @@user_agent = 'BeatHaven.org' + + @@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2' + @@lastfm_secret = '19e70e98b291e9f15d0516925945eb1b' + + @@brainz_login = 'magnolia_fan' + @@brainz_password = '111111' + + def self.importArtist name + + # Initializing gems + brainz = MusicBrainz::Client.new(@@brainz_login, @@brainz_password, @@user_agent) + lastfm = Lastfm.new(@@lastfm_api_key, @@lastfm_secret) - # Save artist artist = Artist.find_by_name(name) - #artist.name = artist_mb_data[:name] - artist.desc = artist_desc - artist.pic_url = artist_pic - artist.artist_type = artist_mb_data[:type] - artist.mbid = artist_mb_data[:mbid] - artist.save - # Get albums from MB - release_groups_mb_xml = open( - 'http://musicbrainz.org/ws/2/release-group/?artist='+ artist_mb_data[:mbid], - 'User-Agent' => @@user_agent - ).read - release_groups_mb_data = release_groups_mb_xml.scan(/(.+?)<\/title>/m) + begin - release_groups_mb_data.each do |mb_album| - - # Get album releases from MB - releases_mb_xml = open( - 'http://musicbrainz.org/ws/2/release/?release-group='+ mb_album[1], - 'User-Agent' => @@user_agent - ).read - releases_mb_data = releases_mb_xml.scan(/<release\sid=\"([a-f0-9-]+?)\">.*?<status>([a-zA-Z]+?)<\/status>.*?<date>([0-9-]+?)<\/date>.*?<country>([A-Z]+?)<\/country>/m) - releases_mb_data.each do |item| - if item[2].length == 4 - item[2] += '-12-28' - elsif item[2].length == 7 - item[2] += '-28' - end + # Get artist info + artist_mb_data = ArtistController.musicBrainzExactSearch(name).first + begin + artist_lastfm = lastfm.artist.get_info(name) + artist_mb = brainz.artist(artist_lastfm['mbid']) + rescue + return end + + # Save artist + artist.desc = artist_lastfm['bio']['summary'] + artist.pic_url = artist_lastfm['image'][3]['content'] + artist.artist_type = artist_mb['artist']['type'] + artist.mbid = artist_lastfm['mbid'] + + # Get albums from MB + release_groups_mb = brainz.release_group(nil, :artist => artist_lastfm['mbid'])['release_group_list']['release_group'] + release_groups_mb.each do |mb_album| - unless releases_mb_data.empty? - - # Sorting releases by date - releases_mb_data.sort! { |a, b| a[2].to_date <=> b[2].to_date } - main_release = releases_mb_data.shift - - # Get tracks from the first release and then exclude release - main_tracks_mb_xml = open( - 'http://musicbrainz.org/ws/2/release/'+ main_release[0] +'/?inc=recordings', - 'User-Agent' => @@user_agent - ).read - mb_tracks = main_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position>.*?<recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title>(<length>(.*?)<\/length>)?/m) - mb_tracks.map! do |item| - item << main_release[3] + # Get album releases from MB + releases_mb = brainz.release(nil, {'release-group' => mb_album['id']})['release_list']['release'] + if releases_mb.is_a? Hash + releases_mb = [releases_mb] end - - unless mb_tracks.empty? - - # Saving album - begin - album_lastm_xml = open( - 'http://ws.audioscrobbler.com/2.0/'+ - '?method=album.getinfo&api_key='+ @@lastfm_api_key + - '&artist='+ URI.escape(name).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')+ - '&album='+ URI.escape(mb_album[2]).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F') - ).read - album_image = album_lastm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0] - rescue - album_image = '' + releases_mb.delete_if { |a| a['date'].nil? } + releases_mb.each do |release| + if release['date'].length == 4 + release['date'] += '-12-28' + elsif release['date'].length == 7 + release['date'] += '-28' end - album = Album.new - album.name = mb_album[2] - album.year = main_release[2].to_date.year - album.artist_id = artist.id - album.mbid = mb_album[1] - album.album_type = mb_album[0] - album.pic_url = album_image - album.save - - # Creating hashed array for matching - mb_main_tracks_hashed = [] - mb_tracks.each do |mb_track| - unless mb_main_tracks_hashed.include? mb_track[2].downcase.scan(/[a-z0-9]*/).join - mb_main_tracks_hashed << mb_track[2].downcase.scan(/[a-z0-9]*/).join - mb_track << main_release[3] # Release country - mb_track << 0 # Bonus flag + end + + unless releases_mb.empty? or true + + # Sorting releases by date + releases_mb.sort! { |a, b| a['date'].to_date <=> b['date'].to_date } + main_release = releases_mb.shift + + # Get tracks from the first release and then exclude release + tracks_mb = [] + tracks_mb_r = brainz.release(main_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium'] + if tracks_mb_r.is_a? Hash + tracks_mb = tracks_mb_r['track_list']['track'] + elsif tracks_mb_r.is_a? Array + tracks_mb = tracks_mb_r.shift['track_list']['track'] + tracks_mb_r.each do |cd| + cd['track_list']['track'].map! { |mb_track| mb_track['bonus'] = 1 } + tracks_mb |= cd['track_list']['track'] end end + + unless tracks_mb.empty? + + # Saving album + begin + album_lastfm = lastfm.album.get_info(artist_lastfm['name'], mb_album['title']) + album_image = album_lastfm['image'][3]['content'] + rescue + album_image = '' + end - # Searching for tracks in other releases - releases_mb_data.each do |mb_release| - other_tracks_mb_xml = open( - 'http://musicbrainz.org/ws/2/release/'+ mb_release[0] +'/?inc=recordings', - 'User-Agent' => @@user_agent - ).read - mb_other_tracks = other_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position>.*?<recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m) - mb_other_tracks.each do |item| - unless mb_main_tracks_hashed.include? item[2].downcase.scan(/[a-z0-9]*/).join - item << mb_release[3] # Release country - item << 1 # Bonus flag - mb_tracks << item + album = Album.new + album.name = mb_album['title'] + album.year = main_release['date'].to_date.year + album.artist_id = artist.id + album.mbid = mb_album['id'] + album.album_type = mb_album['type'] + album.pic_url = album_image + album.save + + # Creating hashed array for matching + mb_main_tracks_hashed = [] + tracks_mb.each do |mb_track| + unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join + mb_main_tracks_hashed << mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join + mb_track['country'] = main_release['country'] unless main_release['country'].nil? + mb_track['bonus'] = 0 if mb_track['bonus'].nil? end end - end - # Saving tracks - mb_tracks.each do |mb_track| - unless mb_track[2] == '[silence]' or mb_track[2] == '[untitled]' - track = Track.new - track.name = mb_track[2] - track.album_id = album.id - track.position = mb_track[0] - track.length = mb_track[4] - track.country = mb_track[5] - track.bonus = mb_track[6] - track.live = mb_track[2].downcase.include? 'live' - track.acoustic = mb_track[2].downcase.include? 'acoustic' - track.save + # Searching for tracks in other releases + releases_mb.each do |mb_release| + + other_tracks_mb = [] + other_tracks_mb_r = brainz.release(mb_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium'] + if other_tracks_mb_r.is_a? Hash + other_tracks_mb = other_tracks_mb_r['track_list']['track'] + elsif other_tracks_mb_r.is_a? Array + other_tracks_mb_r.each do |cd| + other_tracks_mb |= cd['track_list']['track'] + end + end + + other_tracks_mb.each do |mb_track| + unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join + mb_track['country'] = mb_release['country'] unless mb_release['country'].nil? + mb_track['bonus'] = 1 + tracks_mb << mb_track + end + end end - end - end # unless mb_main_tracks.empty? + # Saving tracks + tracks_mb.each do |mb_track| + unless ['[silence]', '[untitled]'].include? mb_track['recording']['title'] + track = Track.new + track.name = mb_track['recording']['title'] + track.album_id = album.id + track.position = mb_track['position'] + track.length = mb_track['length'] unless mb_track['length'].nil? + track.length = mb_track['recording']['length'] unless mb_track['recording']['length'].nil? + track.country = mb_track['country'] + track.bonus = mb_track['bonus'] + track.live = mb_track['recording']['title'].downcase.include? 'live' + track.acoustic = mb_track['recording']['title'].downcase.include? 'acoustic' + track.mbid = mb_track['recording']['id'] + track.save + end + end + + end # unless mb_main_tracks.empty? - end # unless mb_releases.empty? + end # unless mb_releases.empty? - end # mb_albums.each do |mb_album| + end # mb_albums.each do |mb_album| + + artist.status = 1 + + rescue + artist.status = 2 + end - artist.status = 1 artist.save end # def self.importArtist name diff --git a/db/migrate/20110624073136_add_mbid_to_track.rb b/db/migrate/20110624073136_add_mbid_to_track.rb new file mode 100644 index 0000000..3f9f6f8 --- /dev/null +++ b/db/migrate/20110624073136_add_mbid_to_track.rb @@ -0,0 +1,9 @@ +class AddMbidToTrack < ActiveRecord::Migration + def self.up + add_column :tracks, :mbid, :string + end + + def self.down + remove_column :tracks, :mbid + end +end diff --git a/db/schema.rb b/db/schema.rb index 5968335..63032cd 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 20110622204123) do +ActiveRecord::Schema.define(:version => 20110624073136) do create_table "albums", :force => true do |t| t.string "name" @@ -90,6 +90,7 @@ ActiveRecord::Schema.define(:version => 20110622204123) do t.datetime "updated_at" t.integer "length" t.string "country" + t.string "mbid" end create_table "users", :force => true do |t| diff --git a/lib/tasks/db_import.rake b/lib/tasks/db_import.rake index 748a5f8..dfc7cf2 100644 --- a/lib/tasks/db_import.rake +++ b/lib/tasks/db_import.rake @@ -3,7 +3,8 @@ require 'open-uri' namespace :db do desc 'Imports test data from MusicBrainz database' task :import => :environment do - ati = ['The Killers'] + + ati = ['Marilyn Manson'] ati.each do |name| ImportController.importArtist(name) #ap res = ArtistController.musicBrainzExactSearch(name) diff --git a/public/js/beathaven/ajax.js b/public/js/beathaven/ajax.js index 0674ded..3244891 100644 --- a/public/js/beathaven/ajax.js +++ b/public/js/beathaven/ajax.js @@ -17,8 +17,9 @@ var Ajax = { } else if (data.status == 'suggestions') { Search.hideSpinner(); Search.showSuggestions(data.values); - } else if (data.status == 'error') { - Ajax.load404Page(); + } else if (data.status == 'loading_failed') { + Search.hideSpinner(); + Search.showError(); } return false; } else { diff --git a/public/js/beathaven/search.js b/public/js/beathaven/search.js index 193a81f..3f82893 100644 --- a/public/js/beathaven/search.js +++ b/public/js/beathaven/search.js @@ -6,6 +6,7 @@ var Search = { $('#search-container input').attr('disabled', 'disabled').blur(); $('#search-container img').show(); $('.autocomplete-container').hide(); + $('.artist_loading').hide(); Search.hideSuggestions(); }, @@ -33,7 +34,7 @@ var Search = { }, showArtistPics: function(pics) { - $('.artist_loading, .artist_pics').show(); + $('.artist_loading.ok, .artist_pics').show(); for (var i = 0; i < pics.length; i++) { if (Search.pics.indexOf(pics[i]) === -1) { Search.pics.push(pics[i]); @@ -44,6 +45,10 @@ var Search = { '); } } + }, + + showError: function() { + $('.artist_loading.failed').show(); } } diff --git a/public/templates/search.html b/public/templates/search.html index 93199b3..dd6d895 100644 --- a/public/templates/search.html +++ b/public/templates/search.html @@ -4,7 +4,8 @@ <input type="submit" value="Search" id="search_button"/> </form> <img class="spinner" src="/images/loader.gif" alt=""/> - <div class="artist_loading">Artist info is loading for the first time now. Usually it takes less than a minute, please wait a bit.</div> + <div class="artist_loading failed">Something very bad happened while we tried out to load some info about this artist. How about some other one?</div> + <div class="artist_loading ok">Artist info is loading for the first time now. Usually it takes less than a minute, please wait a bit.</div> <div class="artist_pics"></div> <div class="suggestions"> <div>Misspelled?</div>