From 17f5edf8416f18f2fd31c5b3f1d670eb966a2b9b Mon Sep 17 00:00:00 2001 From: magnolia-fan Date: Sat, 18 Jun 2011 01:49:32 +0400 Subject: [PATCH] Loading data from MusicBrainz Web Service, stability fixes, database reorganized. Closes #39 --- Gemfile | 2 +- Gemfile.lock | 2 + app/controllers/artist_controller.rb | 32 ++-- app/controllers/import_controller.rb | 163 ++++++++++++------ app/models/artist.rb | 2 +- app/models/music_brainz_album.rb | 61 ------- app/models/music_brainz_artist.rb | 20 --- app/models/music_brainz_release.rb | 7 - app/models/music_brainz_track.rb | 6 - ...0617165021_add_type_and_mbid_to_artists.rb | 11 ++ ...10617174535_add_type_and_mbid_to_albums.rb | 13 ++ .../20110617211824_add_country_to_track.rb | 9 + db/migrate/20110617213912_fix_column_names.rb | 11 ++ db/schema.rb | 8 +- lib/tasks/db_import.rake | 2 +- 15 files changed, 179 insertions(+), 170 deletions(-) delete mode 100644 app/models/music_brainz_album.rb delete mode 100644 app/models/music_brainz_artist.rb delete mode 100644 app/models/music_brainz_release.rb delete mode 100644 app/models/music_brainz_track.rb create mode 100644 db/migrate/20110617165021_add_type_and_mbid_to_artists.rb create mode 100644 db/migrate/20110617174535_add_type_and_mbid_to_albums.rb create mode 100644 db/migrate/20110617211824_add_country_to_track.rb create mode 100644 db/migrate/20110617213912_fix_column_names.rb diff --git a/Gemfile b/Gemfile index f02d5ed..49596e0 100644 --- a/Gemfile +++ b/Gemfile @@ -21,7 +21,7 @@ gem 'awesome_print', :require => 'ap' # Bundle the extra gems: # gem 'bj' -# gem 'nokogiri' +gem 'nokogiri' # gem 'sqlite3-ruby', :require => 'sqlite3' # gem 'aws-s3', :require => 'aws/s3' diff --git a/Gemfile.lock b/Gemfile.lock index 7e39d09..f1596ed 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -40,6 +40,7 @@ GEM mime-types (~> 1.16) treetop (~> 1.4.8) mime-types (1.16) + nokogiri (1.4.5) pg (0.11.0) polyglot (0.3.1) rack (1.2.3) @@ -72,6 +73,7 @@ PLATFORMS DEPENDENCIES awesome_print + nokogiri pg rails (= 3.0.8) sqlite3 diff --git a/app/controllers/artist_controller.rb b/app/controllers/artist_controller.rb index cc29c28..762c1d9 100644 --- a/app/controllers/artist_controller.rb +++ b/app/controllers/artist_controller.rb @@ -21,23 +21,25 @@ class ArtistController < ApplicationController data['albums'] = [] albums = artist.albums albums.each do |album| - tmp_album = {name: album.name, year: album.year, pic: album.pic_url} - album_tracks = [] - bonus_tracks = [] - album.tracks.each do |track| - tmp_track = {name: track.name, live: track.live, acoustic: track.acoustic} - if track.length - time = (track.length / 1000).round - time_m = (time / 60).floor - time_s = time - time_m * 60 - tmp_track['duration'] = time_m.to_s + ':' + (time_s < 10 ? '0' : '') + time_s.to_s - else - tmp_track['duration'] = '0:00' + if album.album_type == 'Album' + tmp_album = {name: album.name, year: album.year, pic: album.pic_url} + album_tracks = [] + bonus_tracks = [] + album.tracks.each do |track| + tmp_track = {name: track.name, live: track.live, acoustic: track.acoustic} + if track.length + time = (track.length / 1000).round + time_m = (time / 60).floor + time_s = time - time_m * 60 + tmp_track['duration'] = time_m.to_s + ':' + (time_s < 10 ? '0' : '') + time_s.to_s + else + tmp_track['duration'] = '0:00' + end + (track.bonus == 0 ? album_tracks : bonus_tracks) << tmp_track end - (track.bonus == 0 ? album_tracks : bonus_tracks) << tmp_track + tmp_album['tracks'] = {album: album_tracks, bonus: bonus_tracks} + data['albums'] << tmp_album end - tmp_album['tracks'] = {album: album_tracks, bonus: bonus_tracks} - data['albums'] << tmp_album end render :json => data end diff --git a/app/controllers/import_controller.rb b/app/controllers/import_controller.rb index 6756ab1..c2402e6 100644 --- a/app/controllers/import_controller.rb +++ b/app/controllers/import_controller.rb @@ -1,82 +1,133 @@ class ImportController < ApplicationController + @@user_agent = 'BeatHaven.org' + @@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2' def self.importArtist name - # Get artist from MB - mb_artist = MusicBrainzArtist.getByName(name) - # # Save artist - artist_xml = open( + + # Get artist info + artist_mb_xml = open( + 'http://musicbrainz.org/ws/2/artist/?query='+ URI.escape(name) +'&limit=1', + 'User-Agent' => @@user_agent + ).read + artist_mb_data = artist_mb_xml.scan(/.*?(.+?)<\/name>/m) + artist_lastfm_xml = open( 'http://ws.audioscrobbler.com/2.0/'+ - '?method=artist.getinfo&api_key=04fda005dbf61a50af5abc3e90f111f2'+ - '&artist='+ URI.escape(name)).read + '?method=artist.getinfo&api_key='+ @@lastfm_api_key + + '&artist='+ URI.escape(name) + ).read + + # Save artist artist = Artist.new - artist.name = mb_artist.name - artist.desc = artist_xml.scan(/<\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0] - artist.pic_url = artist_xml.scan(/?(.*)<\/image>/)[0][0] + artist.name = artist_mb_data[0][2] + artist.desc = artist_lastfm_xml.scan(/<\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0] + artist.pic_url = artist_lastfm_xml.scan(/?(.*)<\/image>/)[0][0] + artist.artist_type = artist_mb_data[0][0] + artist.mbid = artist_mb_data[0][1] artist.save + # Get albums from MB - mb_albums = MusicBrainzAlbum.where('artist_id = ? AND release_type = 1', mb_artist.id).order('year ASC, id ASC') - mb_albums.each do |mb_album| + release_groups_mb_xml = open( + 'http://musicbrainz.org/ws/2/release-group/?artist='+ artist_mb_data[0][1], + 'User-Agent' => @@user_agent + ).read + release_groups_mb_data = release_groups_mb_xml.scan(/(.+?)<\/title>/m) + + release_groups_mb_data.each do |mb_album| + # Get album releases from MB - mb_releases = MusicBrainzRelease.where('album_id = ? AND release_type = 1', mb_album.id).order('date ASC, id ASC') - unless mb_releases.empty? + releases_mb_xml = open( + 'http://musicbrainz.org/ws/2/release/?release-group='+ mb_album[1], + 'User-Agent' => @@user_agent + ).read + releases_mb_data = releases_mb_xml.scan(/<release\sid=\"([a-f0-9-]+?)\">.*?<status>([a-zA-Z]+?)<\/status>.*?<date>([0-9-]+?)<\/date>.*?<country>([A-Z]+?)<\/country>/m) + releases_mb_data.each do |item| + if item[2].length == 4 + item[2] += '-01-01' + end + end + + unless releases_mb_data.empty? + + # Sorting releases by date + releases_mb_data.sort! { |a, b| a[2].to_date <=> b[2].to_date } + main_release = releases_mb_data.shift + # Get tracks from the first release and then exclude release - mb_main_tracks = MusicBrainzTrack.where('release_id IN (?)', mb_releases.shift.id).order('position ASC') - unless mb_main_tracks.empty? + main_tracks_mb_xml = open( + 'http://musicbrainz.org/ws/2/release/'+ main_release[0] +'/?inc=recordings', + 'User-Agent' => @@user_agent + ).read + mb_tracks = main_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position><recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m) + mb_tracks.map! do |item| + item << main_release[3] + end + + unless mb_tracks.empty? + # Saving album - album_xml = open( - 'http://ws.audioscrobbler.com/2.0/'+ - '?method=album.getinfo&api_key=04fda005dbf61a50af5abc3e90f111f2'+ - '&artist='+ URI.escape(name).gsub(/\&/, '%26').gsub(/\?/, '%3F')+ - '&album='+ URI.escape(mb_album.name).gsub(/\&/, '%26').gsub(/\?/, '%3F')).read + begin + album_lastm_xml = open( + 'http://ws.audioscrobbler.com/2.0/'+ + '?method=album.getinfo&api_key='+ @@lastfm_api_key + + '&artist='+ URI.escape(name).gsub(/\&/, '%26').gsub(/\?/, '%3F')+ + '&album='+ URI.escape(mb_album[2]).gsub(/\&/, '%26').gsub(/\?/, '%3F') + ).read + album_image = album_lastm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0] + rescue + album_image = '' + end album = Album.new - album.name = mb_album.name - album.year = mb_album.year + album.name = mb_album[2] + album.year = main_release[2].to_date.year album.artist_id = artist.id - album.status = 1 - album.pic_url = album_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0] + album.mbid = mb_album[1] + album.album_type = mb_album[0] + album.pic_url = album_image album.save + # Creating hashed array for matching mb_main_tracks_hashed = [] - mb_main_tracks.each do |mb_track| - mb_main_tracks_hashed << mb_track.name.downcase.scan(/[a-z0-9]*/).join - end - # Getting other releases ids - release_ids = [] - mb_releases.each do |mb_release| - release_ids << mb_release.id + mb_tracks.each do |mb_track| + mb_main_tracks_hashed << mb_track[2].downcase.scan(/[a-z0-9]*/).join + mb_track << main_release[3] # Release country + mb_track << 0 # Bonus flag end + # Searching for tracks in other releases - mb_other_tracks = MusicBrainzTrack.where('release_id IN (?)', release_ids).order('position ASC') - mb_bonus_tracks = [] - mb_other_tracks.each do |mb_track| - unless mb_main_tracks_hashed.include? mb_track.name.downcase.scan(/[a-z0-9]*/).join - mb_bonus_tracks << mb_track + releases_mb_data.each do |mb_release| + other_tracks_mb_xml = open( + 'http://musicbrainz.org/ws/2/release/'+ mb_release[0] +'/?inc=recordings', + 'User-Agent' => @@user_agent + ).read + mb_other_tracks = other_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position><recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m) + mb_other_tracks.each do |item| + unless mb_main_tracks_hashed.include? item[2].downcase.scan(/[a-z0-9]*/).join + item << mb_release[3] # Release country + item << 1 # Bonus flag + mb_tracks << item + end end - end # mb_other_tracks.each do |mb_track| + end + # Saving tracks - mb_main_tracks.each do |mb_track| + mb_tracks.each do |mb_track| track = Track.new - track.name = mb_track.name + track.name = mb_track[2] track.album_id = album.id - track.position = mb_track.position - track.length = mb_track.length - track.bonus = 0 - track.live = mb_track.name.downcase.include? 'live' - track.acoustic = mb_track.name.downcase.include? 'acoustic' + track.position = mb_track[0] + track.length = mb_track[3] + track.country = mb_track[4] + track.bonus = mb_track[5] + track.live = mb_track[2].downcase.include? 'live' + track.acoustic = mb_track[2].downcase.include? 'acoustic' track.save - end #mb_main_tracks.each do |mb_track| - mb_bonus_tracks.each do |mb_track| - track = Track.new - track.name = mb_track.name - track.album_id = album.id - track.position = mb_track.position - track.length = mb_track.length - track.bonus = 1 - track.live = mb_track.name.downcase.include? 'live' - track.acoustic = mb_track.name.downcase.include? 'acoustic' - track.save - end # mb_bonus_tracks.each do |mb_track| + end + end # unless mb_main_tracks.empty? + end # unless mb_releases.empty? + end # mb_albums.each do |mb_album| + end # def self.importArtist name + end \ No newline at end of file diff --git a/app/models/artist.rb b/app/models/artist.rb index f8290cb..2332d73 100644 --- a/app/models/artist.rb +++ b/app/models/artist.rb @@ -1,3 +1,3 @@ class Artist < ActiveRecord::Base - has_many :albums, :conditions => "status = 1", :order => 'year ASC, id ASC' + has_many :albums, :order => 'year ASC, id ASC' end diff --git a/app/models/music_brainz_album.rb b/app/models/music_brainz_album.rb deleted file mode 100644 index bdeaad2..0000000 --- a/app/models/music_brainz_album.rb +++ /dev/null @@ -1,61 +0,0 @@ -class MusicBrainzAlbum < ActiveRecord::Base - establish_connection :musicbrainz - set_table_name 'musicbrainz.bh_release_group' - - belongs_to :music_brainz_artist - has_many :music_brainz_releases, :conditions => "release_type = 1", :order => 'date ASC, id ASC' - - require 'iconv' - - def cover artist - covers = AlbumPic.where(album_id: id).first - unless covers.nil? - covers.extralarge - else - q_artist = CGI::escape(artist) - q_album = CGI::escape(name) - path = open( - 'http://ws.audioscrobbler.com/2.0/' << - '?method=album.getinfo' << - '&api_key=04fda005dbf61a50af5abc3e90f111f2' << - '&artist=' << q_artist << - '&album=' << q_album - ).read - m = path.scan(/<image\ssize=\"(.*)\">(.*)<\/image>/i) - AlbumPic.where( - album_id: id, - small: m[0][1], - medium: m[1][1], - large: m[2][1], - extralarge: m[3][1], - mega: m[4][1] - ).create - m[3][1] - end - end - - def tracksInDb - tracks = [] - result = [] - tracks_in_db = [] - track_ids = [] - - self.releases.first.tracks.each do |track| - tracks << track - track_ids << track.id.to_s - end - in_db = TrackData.any_in(id: track_ids).only("id") - unless in_db.nil? - in_db.each do |track| - tracks_in_db << track["id"].to_i; - end - end - tracks.each do |track| - result << { - :object => track, - :in_db => tracks_in_db.include?(track.id) ? nil : true - } - end - result - end -end \ No newline at end of file diff --git a/app/models/music_brainz_artist.rb b/app/models/music_brainz_artist.rb deleted file mode 100644 index c3ca09c..0000000 --- a/app/models/music_brainz_artist.rb +++ /dev/null @@ -1,20 +0,0 @@ -class MusicBrainzArtist < ActiveRecord::Base - establish_connection :musicbrainz - set_table_name 'musicbrainz.bh_artist' - - has_many :albums, :conditions => "release_type = 1", :order => 'year ASC, id ASC' - - def self.getByName(name) - MusicBrainzArtist.first(:conditions => ['name = ? AND id=credit_id', name], :order => 'rating DESC') - end - - def self.getLastFmAutocomplete(query) - return nil if query.nil? or query.strip.empty? - - json = ActiveSupport::JSON.decode(open( - 'http://www.last.fm/search/autocomplete' << - '?q=' << URI.escape(query) - ).read) - return json.empty? ? nil : json - end -end \ No newline at end of file diff --git a/app/models/music_brainz_release.rb b/app/models/music_brainz_release.rb deleted file mode 100644 index f9d014e..0000000 --- a/app/models/music_brainz_release.rb +++ /dev/null @@ -1,7 +0,0 @@ -class MusicBrainzRelease < ActiveRecord::Base - establish_connection :musicbrainz - set_table_name 'musicbrainz.bh_release' - - belongs_to :music_brainz_album - has_many :music_brainz_tracks, :order => 'position ASC' -end \ No newline at end of file diff --git a/app/models/music_brainz_track.rb b/app/models/music_brainz_track.rb deleted file mode 100644 index 178438b..0000000 --- a/app/models/music_brainz_track.rb +++ /dev/null @@ -1,6 +0,0 @@ -class MusicBrainzTrack < ActiveRecord::Base - establish_connection :musicbrainz - set_table_name 'musicbrainz.bh_track' - - belongs_to :music_brainz_release -end \ No newline at end of file diff --git a/db/migrate/20110617165021_add_type_and_mbid_to_artists.rb b/db/migrate/20110617165021_add_type_and_mbid_to_artists.rb new file mode 100644 index 0000000..ae6c01f --- /dev/null +++ b/db/migrate/20110617165021_add_type_and_mbid_to_artists.rb @@ -0,0 +1,11 @@ +class AddTypeAndMbidToArtists < ActiveRecord::Migration + def self.up + add_column :artists, :type, :string + add_column :artists, :mbid, :string + end + + def self.down + remove_column :artists, :mbid + remove_column :artists, :type + end +end diff --git a/db/migrate/20110617174535_add_type_and_mbid_to_albums.rb b/db/migrate/20110617174535_add_type_and_mbid_to_albums.rb new file mode 100644 index 0000000..c6a70d2 --- /dev/null +++ b/db/migrate/20110617174535_add_type_and_mbid_to_albums.rb @@ -0,0 +1,13 @@ +class AddTypeAndMbidToAlbums < ActiveRecord::Migration + def self.up + remove_column :albums, :status + add_column :albums, :type, :string + add_column :albums, :mbid, :string + end + + def self.down + remove_column :albums, :mbid + remove_column :albums, :type + add_column :albums, :type, :integer + end +end diff --git a/db/migrate/20110617211824_add_country_to_track.rb b/db/migrate/20110617211824_add_country_to_track.rb new file mode 100644 index 0000000..b7709c0 --- /dev/null +++ b/db/migrate/20110617211824_add_country_to_track.rb @@ -0,0 +1,9 @@ +class AddCountryToTrack < ActiveRecord::Migration + def self.up + add_column :tracks, :country, :string + end + + def self.down + remove_column :tracks, :country + end +end diff --git a/db/migrate/20110617213912_fix_column_names.rb b/db/migrate/20110617213912_fix_column_names.rb new file mode 100644 index 0000000..6eb06b9 --- /dev/null +++ b/db/migrate/20110617213912_fix_column_names.rb @@ -0,0 +1,11 @@ +class FixColumnNames < ActiveRecord::Migration + def self.up + rename_column :artists, :type, :artist_type + rename_column :albums, :type, :album_type + end + + def self.down + rename_column :artists, :artist_type, :type + rename_column :albums, :album_type, :type + end +end diff --git a/db/schema.rb b/db/schema.rb index 755da86..92dff59 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,16 +10,17 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 20110614175827) do +ActiveRecord::Schema.define(:version => 20110617213912) do create_table "albums", :force => true do |t| t.string "name" t.integer "artist_id" t.integer "year" - t.integer "status" t.string "pic_url" t.datetime "created_at" t.datetime "updated_at" + t.string "album_type" + t.string "mbid" end create_table "artists", :force => true do |t| @@ -28,6 +29,8 @@ ActiveRecord::Schema.define(:version => 20110614175827) do t.datetime "created_at" t.datetime "updated_at" t.string "pic_url" + t.string "artist_type" + t.string "mbid" end create_table "tracks", :force => true do |t| @@ -40,6 +43,7 @@ ActiveRecord::Schema.define(:version => 20110614175827) do t.datetime "created_at" t.datetime "updated_at" t.integer "length" + t.string "country" end end diff --git a/lib/tasks/db_import.rake b/lib/tasks/db_import.rake index 4364d04..09f0170 100644 --- a/lib/tasks/db_import.rake +++ b/lib/tasks/db_import.rake @@ -1,7 +1,7 @@ require 'uri' require 'open-uri' namespace :db do - desc 'Imports data from MusicBrainz database' + desc 'Imports test data from MusicBrainz database' task :import => :environment do ati = ['Jet'] ati.each do |name|