1
0
Fork 0

Refactored artist import using mbrainz n lastfm gems. Improved stability

This commit is contained in:
magnolia-fan 2011-06-24 13:36:44 +04:00
parent ecb47b27e7
commit 89776db403
10 changed files with 177 additions and 128 deletions

View File

@ -9,3 +9,4 @@ gem 'awesome_print', :require => 'ap'
gem 'delayed_job'
gem 'lastfm', :git => 'git://github.com/magnolia-fan/ruby-lastfm.git'
gem 'musicbrainz-ruby', :git => 'git://github.com/magnolia-fan/musicbrainz-ruby.git'

View File

@ -1,3 +1,11 @@
GIT
remote: git://github.com/magnolia-fan/musicbrainz-ruby.git
revision: 17d743a270b02b77251e10357af3d12cdab5f3a9
specs:
musicbrainz-ruby (0.4.0)
hashie (~> 1.0)
httparty (>= 0.7.3)
GIT
remote: git://github.com/magnolia-fan/ruby-lastfm.git
revision: 1c82d5616c99689162afa8df854d16ebaa604e23
@ -47,6 +55,7 @@ GEM
daemons
erubis (2.6.6)
abstract (>= 1.0.0)
hashie (1.0.0)
httparty (0.7.8)
crack (= 0.1.8)
i18n (0.5.0)
@ -81,7 +90,7 @@ GEM
thor (0.14.6)
treetop (1.4.9)
polyglot (>= 0.3.1)
tzinfo (0.3.27)
tzinfo (0.3.28)
xml-simple (1.1.0)
PLATFORMS
@ -91,6 +100,7 @@ DEPENDENCIES
awesome_print
delayed_job
lastfm!
musicbrainz-ruby!
pg
rails (= 3.0.8)
sqlite3

View File

@ -1,6 +1,7 @@
require 'open-uri'
class ArtistController < ApplicationController
@@default_album_types = ['Album', 'Soundtrack']
def data
data = {}
name = params[:name].gsub('%20', ' ').gsub('+', ' ')
@ -15,6 +16,9 @@ class ArtistController < ApplicationController
end
render :json => {status: 'loading', pics: pics}
return
elsif artist and artist.status == 2
render :json => {status: 'loading_failed', pics: []}
return
end
unless artist
results = ArtistController.musicBrainzExactSearch(name)
@ -42,7 +46,7 @@ class ArtistController < ApplicationController
data['albums'] = []
albums = artist.albums
albums.each do |album|
if album.album_type == 'Album'
if @@default_album_types.include? album.album_type
tmp_album = {name: album.name, year: album.year, pic: album.pic_url}
album_tracks = []
bonus_tracks = []

View File

@ -1,143 +1,159 @@
class ImportController < ApplicationController
@@user_agent = 'BeatHaven.org'
@@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2'
def self.importArtist name
require 'lastfm'
require 'musicbrainz'
# Get artist info
artist_mb_data = ArtistController.musicBrainzExactSearch(name).first
begin
artist_lastfm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+
'?method=artist.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')
).read
artist_desc = artist_lastfm_xml.scan(/<summary><\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0]
artist_pic = artist_lastfm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
rescue
artist_desc = ''
artist_pic = ''
end
class ImportController < ApplicationController
@@user_agent = 'BeatHaven.org'
@@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2'
@@lastfm_secret = '19e70e98b291e9f15d0516925945eb1b'
@@brainz_login = 'magnolia_fan'
@@brainz_password = '111111'
def self.importArtist name
# Initializing gems
brainz = MusicBrainz::Client.new(@@brainz_login, @@brainz_password, @@user_agent)
lastfm = Lastfm.new(@@lastfm_api_key, @@lastfm_secret)
# Save artist
artist = Artist.find_by_name(name)
#artist.name = artist_mb_data[:name]
artist.desc = artist_desc
artist.pic_url = artist_pic
artist.artist_type = artist_mb_data[:type]
artist.mbid = artist_mb_data[:mbid]
artist.save
# Get albums from MB
release_groups_mb_xml = open(
'http://musicbrainz.org/ws/2/release-group/?artist='+ artist_mb_data[:mbid],
'User-Agent' => @@user_agent
).read
release_groups_mb_data = release_groups_mb_xml.scan(/<release-group\stype=\"([a-zA-Z]+?)\"\sid=\"([a-f0-9-]+?)\"><title>(.+?)<\/title>/m)
begin
release_groups_mb_data.each do |mb_album|
# Get album releases from MB
releases_mb_xml = open(
'http://musicbrainz.org/ws/2/release/?release-group='+ mb_album[1],
'User-Agent' => @@user_agent
).read
releases_mb_data = releases_mb_xml.scan(/<release\sid=\"([a-f0-9-]+?)\">.*?<status>([a-zA-Z]+?)<\/status>.*?<date>([0-9-]+?)<\/date>.*?<country>([A-Z]+?)<\/country>/m)
releases_mb_data.each do |item|
if item[2].length == 4
item[2] += '-12-28'
elsif item[2].length == 7
item[2] += '-28'
end
# Get artist info
artist_mb_data = ArtistController.musicBrainzExactSearch(name).first
begin
artist_lastfm = lastfm.artist.get_info(name)
artist_mb = brainz.artist(artist_lastfm['mbid'])
rescue
return
end
# Save artist
artist.desc = artist_lastfm['bio']['summary']
artist.pic_url = artist_lastfm['image'][3]['content']
artist.artist_type = artist_mb['artist']['type']
artist.mbid = artist_lastfm['mbid']
# Get albums from MB
release_groups_mb = brainz.release_group(nil, :artist => artist_lastfm['mbid'])['release_group_list']['release_group']
release_groups_mb.each do |mb_album|
unless releases_mb_data.empty?
# Sorting releases by date
releases_mb_data.sort! { |a, b| a[2].to_date <=> b[2].to_date }
main_release = releases_mb_data.shift
# Get tracks from the first release and then exclude release
main_tracks_mb_xml = open(
'http://musicbrainz.org/ws/2/release/'+ main_release[0] +'/?inc=recordings',
'User-Agent' => @@user_agent
).read
mb_tracks = main_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position>.*?<recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title>(<length>(.*?)<\/length>)?/m)
mb_tracks.map! do |item|
item << main_release[3]
# Get album releases from MB
releases_mb = brainz.release(nil, {'release-group' => mb_album['id']})['release_list']['release']
if releases_mb.is_a? Hash
releases_mb = [releases_mb]
end
unless mb_tracks.empty?
# Saving album
begin
album_lastm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+
'?method=album.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')+
'&album='+ URI.escape(mb_album[2]).gsub(/amp;/, '').gsub(/\&/, '%26').gsub(/\?/, '%3F')
).read
album_image = album_lastm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
rescue
album_image = ''
releases_mb.delete_if { |a| a['date'].nil? }
releases_mb.each do |release|
if release['date'].length == 4
release['date'] += '-12-28'
elsif release['date'].length == 7
release['date'] += '-28'
end
album = Album.new
album.name = mb_album[2]
album.year = main_release[2].to_date.year
album.artist_id = artist.id
album.mbid = mb_album[1]
album.album_type = mb_album[0]
album.pic_url = album_image
album.save
# Creating hashed array for matching
mb_main_tracks_hashed = []
mb_tracks.each do |mb_track|
unless mb_main_tracks_hashed.include? mb_track[2].downcase.scan(/[a-z0-9]*/).join
mb_main_tracks_hashed << mb_track[2].downcase.scan(/[a-z0-9]*/).join
mb_track << main_release[3] # Release country
mb_track << 0 # Bonus flag
end
unless releases_mb.empty? or true
# Sorting releases by date
releases_mb.sort! { |a, b| a['date'].to_date <=> b['date'].to_date }
main_release = releases_mb.shift
# Get tracks from the first release and then exclude release
tracks_mb = []
tracks_mb_r = brainz.release(main_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium']
if tracks_mb_r.is_a? Hash
tracks_mb = tracks_mb_r['track_list']['track']
elsif tracks_mb_r.is_a? Array
tracks_mb = tracks_mb_r.shift['track_list']['track']
tracks_mb_r.each do |cd|
cd['track_list']['track'].map! { |mb_track| mb_track['bonus'] = 1 }
tracks_mb |= cd['track_list']['track']
end
end
unless tracks_mb.empty?
# Saving album
begin
album_lastfm = lastfm.album.get_info(artist_lastfm['name'], mb_album['title'])
album_image = album_lastfm['image'][3]['content']
rescue
album_image = ''
end
# Searching for tracks in other releases
releases_mb_data.each do |mb_release|
other_tracks_mb_xml = open(
'http://musicbrainz.org/ws/2/release/'+ mb_release[0] +'/?inc=recordings',
'User-Agent' => @@user_agent
).read
mb_other_tracks = other_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position>.*?<recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m)
mb_other_tracks.each do |item|
unless mb_main_tracks_hashed.include? item[2].downcase.scan(/[a-z0-9]*/).join
item << mb_release[3] # Release country
item << 1 # Bonus flag
mb_tracks << item
album = Album.new
album.name = mb_album['title']
album.year = main_release['date'].to_date.year
album.artist_id = artist.id
album.mbid = mb_album['id']
album.album_type = mb_album['type']
album.pic_url = album_image
album.save
# Creating hashed array for matching
mb_main_tracks_hashed = []
tracks_mb.each do |mb_track|
unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join
mb_main_tracks_hashed << mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join
mb_track['country'] = main_release['country'] unless main_release['country'].nil?
mb_track['bonus'] = 0 if mb_track['bonus'].nil?
end
end
end
# Saving tracks
mb_tracks.each do |mb_track|
unless mb_track[2] == '[silence]' or mb_track[2] == '[untitled]'
track = Track.new
track.name = mb_track[2]
track.album_id = album.id
track.position = mb_track[0]
track.length = mb_track[4]
track.country = mb_track[5]
track.bonus = mb_track[6]
track.live = mb_track[2].downcase.include? 'live'
track.acoustic = mb_track[2].downcase.include? 'acoustic'
track.save
# Searching for tracks in other releases
releases_mb.each do |mb_release|
other_tracks_mb = []
other_tracks_mb_r = brainz.release(mb_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium']
if other_tracks_mb_r.is_a? Hash
other_tracks_mb = other_tracks_mb_r['track_list']['track']
elsif other_tracks_mb_r.is_a? Array
other_tracks_mb_r.each do |cd|
other_tracks_mb |= cd['track_list']['track']
end
end
other_tracks_mb.each do |mb_track|
unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join
mb_track['country'] = mb_release['country'] unless mb_release['country'].nil?
mb_track['bonus'] = 1
tracks_mb << mb_track
end
end
end
end
end # unless mb_main_tracks.empty?
# Saving tracks
tracks_mb.each do |mb_track|
unless ['[silence]', '[untitled]'].include? mb_track['recording']['title']
track = Track.new
track.name = mb_track['recording']['title']
track.album_id = album.id
track.position = mb_track['position']
track.length = mb_track['length'] unless mb_track['length'].nil?
track.length = mb_track['recording']['length'] unless mb_track['recording']['length'].nil?
track.country = mb_track['country']
track.bonus = mb_track['bonus']
track.live = mb_track['recording']['title'].downcase.include? 'live'
track.acoustic = mb_track['recording']['title'].downcase.include? 'acoustic'
track.mbid = mb_track['recording']['id']
track.save
end
end
end # unless mb_main_tracks.empty?
end # unless mb_releases.empty?
end # unless mb_releases.empty?
end # mb_albums.each do |mb_album|
end # mb_albums.each do |mb_album|
artist.status = 1
rescue
artist.status = 2
end
artist.status = 1
artist.save
end # def self.importArtist name

View File

@ -0,0 +1,9 @@
class AddMbidToTrack < ActiveRecord::Migration
def self.up
add_column :tracks, :mbid, :string
end
def self.down
remove_column :tracks, :mbid
end
end

View File

@ -10,7 +10,7 @@
#
# It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110622204123) do
ActiveRecord::Schema.define(:version => 20110624073136) do
create_table "albums", :force => true do |t|
t.string "name"
@ -90,6 +90,7 @@ ActiveRecord::Schema.define(:version => 20110622204123) do
t.datetime "updated_at"
t.integer "length"
t.string "country"
t.string "mbid"
end
create_table "users", :force => true do |t|

View File

@ -3,7 +3,8 @@ require 'open-uri'
namespace :db do
desc 'Imports test data from MusicBrainz database'
task :import => :environment do
ati = ['The Killers']
ati = ['Marilyn Manson']
ati.each do |name|
ImportController.importArtist(name)
#ap res = ArtistController.musicBrainzExactSearch(name)

View File

@ -17,8 +17,9 @@ var Ajax = {
} else if (data.status == 'suggestions') {
Search.hideSpinner();
Search.showSuggestions(data.values);
} else if (data.status == 'error') {
Ajax.load404Page();
} else if (data.status == 'loading_failed') {
Search.hideSpinner();
Search.showError();
}
return false;
} else {

View File

@ -6,6 +6,7 @@ var Search = {
$('#search-container input').attr('disabled', 'disabled').blur();
$('#search-container img').show();
$('.autocomplete-container').hide();
$('.artist_loading').hide();
Search.hideSuggestions();
},
@ -33,7 +34,7 @@ var Search = {
},
showArtistPics: function(pics) {
$('.artist_loading, .artist_pics').show();
$('.artist_loading.ok, .artist_pics').show();
for (var i = 0; i < pics.length; i++) {
if (Search.pics.indexOf(pics[i]) === -1) {
Search.pics.push(pics[i]);
@ -44,6 +45,10 @@ var Search = {
');
}
}
},
showError: function() {
$('.artist_loading.failed').show();
}
}

View File

@ -4,7 +4,8 @@
<input type="submit" value="Search" id="search_button"/>
</form>
<img class="spinner" src="/images/loader.gif" alt=""/>
<div class="artist_loading">Artist info is loading for the first time now. Usually it takes less than a minute, please wait a bit.</div>
<div class="artist_loading failed">Something very bad happened while we tried out to load some info about this artist. How about some other one?</div>
<div class="artist_loading ok">Artist info is loading for the first time now. Usually it takes less than a minute, please wait a bit.</div>
<div class="artist_pics"></div>
<div class="suggestions">
<div>Misspelled?</div>