1
0
Fork 0

Refactored artist import using mbrainz n lastfm gems. Improved stability

This commit is contained in:
magnolia-fan 2011-06-24 13:36:44 +04:00
parent ecb47b27e7
commit 89776db403
10 changed files with 177 additions and 128 deletions

View File

@ -9,3 +9,4 @@ gem 'awesome_print', :require => 'ap'
gem 'delayed_job' gem 'delayed_job'
gem 'lastfm', :git => 'git://github.com/magnolia-fan/ruby-lastfm.git' gem 'lastfm', :git => 'git://github.com/magnolia-fan/ruby-lastfm.git'
gem 'musicbrainz-ruby', :git => 'git://github.com/magnolia-fan/musicbrainz-ruby.git'

View File

@ -1,3 +1,11 @@
GIT
remote: git://github.com/magnolia-fan/musicbrainz-ruby.git
revision: 17d743a270b02b77251e10357af3d12cdab5f3a9
specs:
musicbrainz-ruby (0.4.0)
hashie (~> 1.0)
httparty (>= 0.7.3)
GIT GIT
remote: git://github.com/magnolia-fan/ruby-lastfm.git remote: git://github.com/magnolia-fan/ruby-lastfm.git
revision: 1c82d5616c99689162afa8df854d16ebaa604e23 revision: 1c82d5616c99689162afa8df854d16ebaa604e23
@ -47,6 +55,7 @@ GEM
daemons daemons
erubis (2.6.6) erubis (2.6.6)
abstract (>= 1.0.0) abstract (>= 1.0.0)
hashie (1.0.0)
httparty (0.7.8) httparty (0.7.8)
crack (= 0.1.8) crack (= 0.1.8)
i18n (0.5.0) i18n (0.5.0)
@ -81,7 +90,7 @@ GEM
thor (0.14.6) thor (0.14.6)
treetop (1.4.9) treetop (1.4.9)
polyglot (>= 0.3.1) polyglot (>= 0.3.1)
tzinfo (0.3.27) tzinfo (0.3.28)
xml-simple (1.1.0) xml-simple (1.1.0)
PLATFORMS PLATFORMS
@ -91,6 +100,7 @@ DEPENDENCIES
awesome_print awesome_print
delayed_job delayed_job
lastfm! lastfm!
musicbrainz-ruby!
pg pg
rails (= 3.0.8) rails (= 3.0.8)
sqlite3 sqlite3

View File

@ -1,6 +1,7 @@
require 'open-uri' require 'open-uri'
class ArtistController < ApplicationController class ArtistController < ApplicationController
@@default_album_types = ['Album', 'Soundtrack']
def data def data
data = {} data = {}
name = params[:name].gsub('%20', ' ').gsub('+', ' ') name = params[:name].gsub('%20', ' ').gsub('+', ' ')
@ -15,6 +16,9 @@ class ArtistController < ApplicationController
end end
render :json => {status: 'loading', pics: pics} render :json => {status: 'loading', pics: pics}
return return
elsif artist and artist.status == 2
render :json => {status: 'loading_failed', pics: []}
return
end end
unless artist unless artist
results = ArtistController.musicBrainzExactSearch(name) results = ArtistController.musicBrainzExactSearch(name)
@ -42,7 +46,7 @@ class ArtistController < ApplicationController
data['albums'] = [] data['albums'] = []
albums = artist.albums albums = artist.albums
albums.each do |album| albums.each do |album|
if album.album_type == 'Album' if @@default_album_types.include? album.album_type
tmp_album = {name: album.name, year: album.year, pic: album.pic_url} tmp_album = {name: album.name, year: album.year, pic: album.pic_url}
album_tracks = [] album_tracks = []
bonus_tracks = [] bonus_tracks = []

View File

@ -1,143 +1,159 @@
require 'lastfm'
require 'musicbrainz'
class ImportController < ApplicationController class ImportController < ApplicationController
@@user_agent = 'BeatHaven.org' @@user_agent = 'BeatHaven.org'
@@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2' @@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2'
@@lastfm_secret = '19e70e98b291e9f15d0516925945eb1b'
@@brainz_login = 'magnolia_fan'
@@brainz_password = '111111'
def self.importArtist name def self.importArtist name
# Get artist info # Initializing gems
artist_mb_data = ArtistController.musicBrainzExactSearch(name).first brainz = MusicBrainz::Client.new(@@brainz_login, @@brainz_password, @@user_agent)
begin lastfm = Lastfm.new(@@lastfm_api_key, @@lastfm_secret)
artist_lastfm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+
'?method=artist.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')
).read
artist_desc = artist_lastfm_xml.scan(/<summary><\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0]
artist_pic = artist_lastfm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
rescue
artist_desc = ''
artist_pic = ''
end
# Save artist
artist = Artist.find_by_name(name) artist = Artist.find_by_name(name)
#artist.name = artist_mb_data[:name]
artist.desc = artist_desc
artist.pic_url = artist_pic
artist.artist_type = artist_mb_data[:type]
artist.mbid = artist_mb_data[:mbid]
artist.save
# Get albums from MB begin
release_groups_mb_xml = open(
'http://musicbrainz.org/ws/2/release-group/?artist='+ artist_mb_data[:mbid],
'User-Agent' => @@user_agent
).read
release_groups_mb_data = release_groups_mb_xml.scan(/<release-group\stype=\"([a-zA-Z]+?)\"\sid=\"([a-f0-9-]+?)\"><title>(.+?)<\/title>/m)
release_groups_mb_data.each do |mb_album| # Get artist info
artist_mb_data = ArtistController.musicBrainzExactSearch(name).first
# Get album releases from MB begin
releases_mb_xml = open( artist_lastfm = lastfm.artist.get_info(name)
'http://musicbrainz.org/ws/2/release/?release-group='+ mb_album[1], artist_mb = brainz.artist(artist_lastfm['mbid'])
'User-Agent' => @@user_agent rescue
).read return
releases_mb_data = releases_mb_xml.scan(/<release\sid=\"([a-f0-9-]+?)\">.*?<status>([a-zA-Z]+?)<\/status>.*?<date>([0-9-]+?)<\/date>.*?<country>([A-Z]+?)<\/country>/m)
releases_mb_data.each do |item|
if item[2].length == 4
item[2] += '-12-28'
elsif item[2].length == 7
item[2] += '-28'
end
end end
unless releases_mb_data.empty? # Save artist
artist.desc = artist_lastfm['bio']['summary']
artist.pic_url = artist_lastfm['image'][3]['content']
artist.artist_type = artist_mb['artist']['type']
artist.mbid = artist_lastfm['mbid']
# Sorting releases by date # Get albums from MB
releases_mb_data.sort! { |a, b| a[2].to_date <=> b[2].to_date } release_groups_mb = brainz.release_group(nil, :artist => artist_lastfm['mbid'])['release_group_list']['release_group']
main_release = releases_mb_data.shift release_groups_mb.each do |mb_album|
# Get tracks from the first release and then exclude release # Get album releases from MB
main_tracks_mb_xml = open( releases_mb = brainz.release(nil, {'release-group' => mb_album['id']})['release_list']['release']
'http://musicbrainz.org/ws/2/release/'+ main_release[0] +'/?inc=recordings', if releases_mb.is_a? Hash
'User-Agent' => @@user_agent releases_mb = [releases_mb]
).read end
mb_tracks = main_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position>.*?<recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title>(<length>(.*?)<\/length>)?/m) releases_mb.delete_if { |a| a['date'].nil? }
mb_tracks.map! do |item| releases_mb.each do |release|
item << main_release[3] if release['date'].length == 4
release['date'] += '-12-28'
elsif release['date'].length == 7
release['date'] += '-28'
end
end end
unless mb_tracks.empty? unless releases_mb.empty? or true
# Saving album # Sorting releases by date
begin releases_mb.sort! { |a, b| a['date'].to_date <=> b['date'].to_date }
album_lastm_xml = open( main_release = releases_mb.shift
'http://ws.audioscrobbler.com/2.0/'+
'?method=album.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')+
'&album='+ URI.escape(mb_album[2]).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')
).read
album_image = album_lastm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
rescue
album_image = ''
end
album = Album.new
album.name = mb_album[2]
album.year = main_release[2].to_date.year
album.artist_id = artist.id
album.mbid = mb_album[1]
album.album_type = mb_album[0]
album.pic_url = album_image
album.save
# Creating hashed array for matching # Get tracks from the first release and then exclude release
mb_main_tracks_hashed = [] tracks_mb = []
mb_tracks.each do |mb_track| tracks_mb_r = brainz.release(main_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium']
unless mb_main_tracks_hashed.include? mb_track[2].downcase.scan(/[a-z0-9]*/).join if tracks_mb_r.is_a? Hash
mb_main_tracks_hashed << mb_track[2].downcase.scan(/[a-z0-9]*/).join tracks_mb = tracks_mb_r['track_list']['track']
mb_track << main_release[3] # Release country elsif tracks_mb_r.is_a? Array
mb_track << 0 # Bonus flag tracks_mb = tracks_mb_r.shift['track_list']['track']
tracks_mb_r.each do |cd|
cd['track_list']['track'].map! { |mb_track| mb_track['bonus'] = 1 }
tracks_mb |= cd['track_list']['track']
end end
end end
# Searching for tracks in other releases unless tracks_mb.empty?
releases_mb_data.each do |mb_release|
other_tracks_mb_xml = open( # Saving album
'http://musicbrainz.org/ws/2/release/'+ mb_release[0] +'/?inc=recordings', begin
'User-Agent' => @@user_agent album_lastfm = lastfm.album.get_info(artist_lastfm['name'], mb_album['title'])
).read album_image = album_lastfm['image'][3]['content']
mb_other_tracks = other_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position>.*?<recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m) rescue
mb_other_tracks.each do |item| album_image = ''
unless mb_main_tracks_hashed.include? item[2].downcase.scan(/[a-z0-9]*/).join end
item << mb_release[3] # Release country
item << 1 # Bonus flag album = Album.new
mb_tracks << item album.name = mb_album['title']
album.year = main_release['date'].to_date.year
album.artist_id = artist.id
album.mbid = mb_album['id']
album.album_type = mb_album['type']
album.pic_url = album_image
album.save
# Creating hashed array for matching
mb_main_tracks_hashed = []
tracks_mb.each do |mb_track|
unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join
mb_main_tracks_hashed << mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join
mb_track['country'] = main_release['country'] unless main_release['country'].nil?
mb_track['bonus'] = 0 if mb_track['bonus'].nil?
end end
end end
end
# Saving tracks # Searching for tracks in other releases
mb_tracks.each do |mb_track| releases_mb.each do |mb_release|
unless mb_track[2] == '[silence]' or mb_track[2] == '[untitled]'
track = Track.new other_tracks_mb = []
track.name = mb_track[2] other_tracks_mb_r = brainz.release(mb_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium']
track.album_id = album.id if other_tracks_mb_r.is_a? Hash
track.position = mb_track[0] other_tracks_mb = other_tracks_mb_r['track_list']['track']
track.length = mb_track[4] elsif other_tracks_mb_r.is_a? Array
track.country = mb_track[5] other_tracks_mb_r.each do |cd|
track.bonus = mb_track[6] other_tracks_mb |= cd['track_list']['track']
track.live = mb_track[2].downcase.include? 'live' end
track.acoustic = mb_track[2].downcase.include? 'acoustic' end
track.save
other_tracks_mb.each do |mb_track|
unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join
mb_track['country'] = mb_release['country'] unless mb_release['country'].nil?
mb_track['bonus'] = 1
tracks_mb << mb_track
end
end
end end
end
end # unless mb_main_tracks.empty? # Saving tracks
tracks_mb.each do |mb_track|
unless ['[silence]', '[untitled]'].include? mb_track['recording']['title']
track = Track.new
track.name = mb_track['recording']['title']
track.album_id = album.id
track.position = mb_track['position']
track.length = mb_track['length'] unless mb_track['length'].nil?
track.length = mb_track['recording']['length'] unless mb_track['recording']['length'].nil?
track.country = mb_track['country']
track.bonus = mb_track['bonus']
track.live = mb_track['recording']['title'].downcase.include? 'live'
track.acoustic = mb_track['recording']['title'].downcase.include? 'acoustic'
track.mbid = mb_track['recording']['id']
track.save
end
end
end # unless mb_releases.empty? end # unless mb_main_tracks.empty?
end # mb_albums.each do |mb_album| end # unless mb_releases.empty?
end # mb_albums.each do |mb_album|
artist.status = 1
rescue
artist.status = 2
end
artist.status = 1
artist.save artist.save
end # def self.importArtist name end # def self.importArtist name

View File

@ -0,0 +1,9 @@
class AddMbidToTrack < ActiveRecord::Migration
def self.up
add_column :tracks, :mbid, :string
end
def self.down
remove_column :tracks, :mbid
end
end

View File

@ -10,7 +10,7 @@
# #
# It's strongly recommended to check this file into your version control system. # It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110622204123) do ActiveRecord::Schema.define(:version => 20110624073136) do
create_table "albums", :force => true do |t| create_table "albums", :force => true do |t|
t.string "name" t.string "name"
@ -90,6 +90,7 @@ ActiveRecord::Schema.define(:version => 20110622204123) do
t.datetime "updated_at" t.datetime "updated_at"
t.integer "length" t.integer "length"
t.string "country" t.string "country"
t.string "mbid"
end end
create_table "users", :force => true do |t| create_table "users", :force => true do |t|

View File

@ -3,7 +3,8 @@ require 'open-uri'
namespace :db do namespace :db do
desc 'Imports test data from MusicBrainz database' desc 'Imports test data from MusicBrainz database'
task :import => :environment do task :import => :environment do
ati = ['The Killers']
ati = ['Marilyn Manson']
ati.each do |name| ati.each do |name|
ImportController.importArtist(name) ImportController.importArtist(name)
#ap res = ArtistController.musicBrainzExactSearch(name) #ap res = ArtistController.musicBrainzExactSearch(name)

View File

@ -17,8 +17,9 @@ var Ajax = {
} else if (data.status == 'suggestions') { } else if (data.status == 'suggestions') {
Search.hideSpinner(); Search.hideSpinner();
Search.showSuggestions(data.values); Search.showSuggestions(data.values);
} else if (data.status == 'error') { } else if (data.status == 'loading_failed') {
Ajax.load404Page(); Search.hideSpinner();
Search.showError();
} }
return false; return false;
} else { } else {

View File

@ -6,6 +6,7 @@ var Search = {
$('#search-container input').attr('disabled', 'disabled').blur(); $('#search-container input').attr('disabled', 'disabled').blur();
$('#search-container img').show(); $('#search-container img').show();
$('.autocomplete-container').hide(); $('.autocomplete-container').hide();
$('.artist_loading').hide();
Search.hideSuggestions(); Search.hideSuggestions();
}, },
@ -33,7 +34,7 @@ var Search = {
}, },
showArtistPics: function(pics) { showArtistPics: function(pics) {
$('.artist_loading, .artist_pics').show(); $('.artist_loading.ok, .artist_pics').show();
for (var i = 0; i < pics.length; i++) { for (var i = 0; i < pics.length; i++) {
if (Search.pics.indexOf(pics[i]) === -1) { if (Search.pics.indexOf(pics[i]) === -1) {
Search.pics.push(pics[i]); Search.pics.push(pics[i]);
@ -44,6 +45,10 @@ var Search = {
'); ');
} }
} }
},
showError: function() {
$('.artist_loading.failed').show();
} }
} }

View File

@ -4,7 +4,8 @@
<input type="submit" value="Search" id="search_button"/> <input type="submit" value="Search" id="search_button"/>
</form> </form>
<img class="spinner" src="/images/loader.gif" alt=""/> <img class="spinner" src="/images/loader.gif" alt=""/>
<div class="artist_loading">Artist info is loading for the first time now. Usually it takes less than a minute, please wait a bit.</div> <div class="artist_loading failed">Something very bad happened while we tried out to load some info about this artist. How about some other one?</div>
<div class="artist_loading ok">Artist info is loading for the first time now. Usually it takes less than a minute, please wait a bit.</div>
<div class="artist_pics"></div> <div class="artist_pics"></div>
<div class="suggestions"> <div class="suggestions">
<div>Misspelled?</div> <div>Misspelled?</div>