From 67b6899f9fd7a43a84d3257c0b7f3590dabad2d1 Mon Sep 17 00:00:00 2001 From: Gregory Eremin Date: Wed, 14 Sep 2011 20:03:21 +0400 Subject: [PATCH] Import refactoring --- Gemfile.lock | 4 +- app/controllers/import_controller.rb | 241 ++++++--------------------- lib/tasks/db_import.rake | 15 +- 3 files changed, 62 insertions(+), 198 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index ba4c0fa..71c3ca3 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -70,7 +70,7 @@ GEM treetop (~> 1.4.8) mime-types (1.16) multi_json (1.0.3) - musicbrainz (0.4.1) + musicbrainz (0.4.3) nokogiri nokogiri (1.5.0) pg (0.11.0) @@ -131,7 +131,7 @@ DEPENDENCIES haml json lastfm! - musicbrainz (~> 0.4.1) + musicbrainz (~> 0.4.3) pg rails (= 3.1.0) sass diff --git a/app/controllers/import_controller.rb b/app/controllers/import_controller.rb index c7a1cb3..43114a4 100644 --- a/app/controllers/import_controller.rb +++ b/app/controllers/import_controller.rb @@ -10,205 +10,71 @@ class ImportController < ApplicationController # Initializing gems lastfm = Lastfm.new(@@lastfm_api_key, @@lastfm_secret) - artist = Artist.find_or_create_by_name(name) + artist = Artist.find_by_name(name) begin lastfm_artist = lastfm.artist.get_info(name) brainz_artist = MusicBrainz::Artist.find_by_name(name) rescue => e + lastfm_artist = { + 'bio' => { 'summary' => '' }, + 'image' => [ nil, nil, nil, { 'content' => '' } ] + } ap e.message ap e.backtrace end - # Save artist - artist.desc = lastfm_artist['bio']['summary'] - artist.pic_url = lastfm_artist['image'][3]['content'] - artist.artist_type = brainz_artist.type - artist.mbid = brainz_artist.id - artist.save! unless dry_run - - brainz_artist.release_groups.each do |release_group| - # Saving album - begin - album_lastfm = lastfm.album.get_info(lastfm_artist['name'], release_group.title) - album_image = album_lastfm['image'][3]['content'] - rescue - album_image = '' - end - album = Album.new - album.name = release_group.title - album.year = release_group.first_release_date.year - album.artist_id = artist.id - album.mbid = release_group.id - album.album_type = release_group.type - album.pic_url = album_image - album.has_pic = (album_image != '' and not album_image.nil?) - dry_run ? ap(album) : album.save - # Preparing releases - release_group.releases.each do |release| - - end - end - begin - # Get albums from MB - release_groups_mb = brainz.release_group(nil, :artist => artist_mb_data[:mbid], :limit => 500) - - unless release_groups_mb['release_group_list']['release_group'].nil? - - if release_groups_mb['release_group_list']['release_group'].is_a? Hash - release_groups_mb['release_group_list']['release_group'] = [release_groups_mb['release_group_list']['release_group']] + # Save artist + artist.desc = lastfm_artist['bio']['summary'] + artist.pic_url = lastfm_artist['image'][3]['content'] + artist.artist_type = brainz_artist.type + artist.mbid = brainz_artist.id + dry_run ? ap(artist) : artist.save + + brainz_artist.release_groups.each do |brainz_release_group| + # Saving album + begin + album_lastfm = lastfm.album.get_info(lastfm_artist['name'], brainz_release_group.title) + album_image = album_lastfm['image'][3]['content'] + rescue + album_image = '' end - - release_groups_mb['release_group_list']['release_group'].each do |mb_album| - - album_name = mb_album['title'] + (mb_album['disambiguation'].nil? ? '' : ' ('+ mb_album['disambiguation'] +')') - - # Checking if there is already an album with the same name - albums_db = Album.where(:artist_id => artist.id, :name => album_name, :album_type => mb_album['type']) - - # Get album releases from MB - releases_mb = brainz.release(nil, {'release-group' => mb_album['id']}) - - unless releases_mb['release_list']['release'].nil? or not albums_db.empty? - - releases_mb = releases_mb['release_list']['release'] - - if releases_mb.is_a? Hash - releases_mb = [releases_mb] + album = Album.new + album.name = brainz_release_group.title + album.year = brainz_release_group.first_release_date.year + album.artist_id = artist.id + album.mbid = brainz_release_group.id + album.album_type = brainz_release_group.type + album.pic_url = album_image + album.has_pic = (album_image != '' and not album_image.nil?) + dry_run ? ap(album) : album.save + # Tracks from the first release + tracks_hashed = [] + brainz_release_group.releases.each_with_index do |brainz_release, i| + # Processing tracks + brainz_release.tracks.each do |brainz_track| + track_title = brainz_track.title.gsub(/\s\/\s\[.*?\]/, '') + if tracks_hashed.include? track_title.downcase.scan(/[a-z0-9]*/).join + next end - - releases_mb.delete_if { |a| a['date'].nil? } - - unless releases_mb.empty? - - releases_mb.each do |release| - if release['date'].length == 4 - release['date'] += '-12-28' - elsif release['date'].length == 7 - release['date'] += '-28' - end - end - - # Sorting releases by date - releases_mb.sort! { |a, b| a['date'].to_date <=> b['date'].to_date } - main_release = releases_mb.shift - - # Get tracks from the first release and then exclude release - tracks_mb = [] - tracks_mb_r = brainz.release(main_release['id'], {'inc' => 'recordings'}) - - unless tracks_mb_r['release']['medium_list']['medium'].nil? - - tracks_mb_r = tracks_mb_r['release']['medium_list']['medium'] - - if tracks_mb_r.is_a? Hash - if tracks_mb_r['track_list']['track'].is_a? Hash - tracks_mb_r['track_list']['track'] = [tracks_mb_r['track_list']['track']] - end - tracks_mb = tracks_mb_r['track_list']['track'] - elsif tracks_mb_r.is_a? Array - tracks_mb = tracks_mb_r.shift['track_list']['track'] - tracks_mb_r.each do |cd| - if cd['track_list']['track'].is_a? Hash - cd['track_list']['track'] = [cd['track_list']['track']] - end - cd['track_list']['track'].each { |mb_track| mb_track['bonus'] = 1 } - tracks_mb |= cd['track_list']['track'] - end - end - - unless tracks_mb.empty? - - # Saving album - begin - album_lastfm = lastfm.album.get_info(artist_lastfm['name'], album_name) - album_image = album_lastfm['image'][3]['content'] - rescue - album_image = '' - end - - album = Album.new - album.name = album_name - album.year = main_release['date'].to_date.year - album.artist_id = artist.id - album.mbid = mb_album['id'] - album.album_type = mb_album['type'] - album.pic_url = album_image - album.has_pic = (album_image != '' and not album_image.nil?) - album.save unless dry_run - ap album if dry_run - - # Creating hashed array for matching - mb_main_tracks_hashed = [] - tracks_mb.each do |mb_track| - unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join - mb_main_tracks_hashed << mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join - mb_track['country'] = main_release['country'] unless main_release['country'].nil? - mb_track['bonus'] = 0 if mb_track['bonus'].nil? - end - end - - # Searching for tracks in other releases - releases_mb.each do |mb_release| - - other_tracks_mb = [] - other_tracks_mb_r = brainz.release(mb_release['id'], {'inc' => 'recordings'})['release']['medium_list']['medium'] - if other_tracks_mb_r.is_a? Hash - if other_tracks_mb_r['track_list']['track'].is_a? Hash - other_tracks_mb_r['track_list']['track'] = [other_tracks_mb_r['track_list']['track']] - end - other_tracks_mb = other_tracks_mb_r['track_list']['track'] - elsif other_tracks_mb_r.is_a? Array - other_tracks_mb_r.each do |cd| - if cd['track_list']['track'].is_a? Hash - cd['track_list']['track'] = [cd['track_list']['track']] - end - other_tracks_mb |= cd['track_list']['track'] - end - end - - other_tracks_mb.each do |mb_track| - unless mb_main_tracks_hashed.include? mb_track['recording']['title'].downcase.scan(/[a-z0-9]*/).join - mb_track['country'] = mb_release['country'] unless mb_release['country'].nil? - mb_track['bonus'] = 1 - tracks_mb << mb_track - end - end - end - - # Saving tracks - tracks_mb.each do |mb_track| - unless ['[silence]', '[untitled]'].include? mb_track['recording']['title'] - track = Track.new - track.name = mb_track['recording']['title'].gsub(/\s\/\s\[.*?\]/, '') - track.album_id = album.id - track.position = mb_track['position'] - track.length = mb_track['length'] unless mb_track['length'].nil? - track.length = mb_track['recording']['length'] unless mb_track['recording']['length'].nil? - track.country = mb_track['country'] - track.bonus = mb_track['bonus'] - track.live = mb_track['recording']['title'].downcase.include? 'live' - track.acoustic = mb_track['recording']['title'].downcase.include? 'acoustic' - track.mbid = mb_track['recording']['id'] - track.save unless dry_run - end - end - - end # unless tracks_mb_r['release']['medium_list']['medium'].nil? - - end # releases_mb.empty? - - end # unless mb_main_tracks.empty? - - end # unless releases_mb.nil? - - end # release_groups_mb.each do |mb_album| - - end # unless release_groups_mb['release_group_list']['release_group'].nil? + tracks_hashed << track_title.downcase.scan(/[a-z0-9]*/).join + track = Track.new + track.name = track_title + track.album_id = album.id + track.position = brainz_track.position + track.length = brainz_track.length + track.country = brainz_release.country + track.bonus = (i == 0 ? 0 : 1) + track.live = track_title.downcase.include? 'live' + track.acoustic = track_title.downcase.include? 'acoustic' + track.mbid = brainz_track.recording_id + dry_run ? ap(track) : track.save + end + end + end artist.status = 1 - rescue => e ap e.message ap e.backtrace @@ -216,8 +82,5 @@ class ImportController < ApplicationController end artist.save unless dry_run - ap artist if dry_run - - end # def self.importArtist name - + end end \ No newline at end of file diff --git a/lib/tasks/db_import.rake b/lib/tasks/db_import.rake index 0095d71..bd38d94 100644 --- a/lib/tasks/db_import.rake +++ b/lib/tasks/db_import.rake @@ -1,12 +1,13 @@ -require 'uri' -require 'open-uri' namespace :db do desc 'Imports test data from MusicBrainz database' - task :import => :environment do - - ati = ['Vampire Weekend'] - ati.each do |name| - ImportController.importArtist(name, true) + task :import, [:name, :dry_run] => :environment do |t, argv| + if argv[:dry_run].nil? or argv[:dry_run] == "true" + argv[:dry_run] = true + else + argv[:dry_run] = false + end + unless argv[:name].nil? + ImportController.importArtist(argv[:name], argv[:dry_run]) end end end \ No newline at end of file