Fork 0

Loading data from MusicBrainz Web Service, stability fixes, database reorganized. Closes #39

This commit is contained in:
magnolia-fan 2011-06-18 01:49:32 +04:00
parent a66300c8e4
commit 17f5edf841
15 changed files with 179 additions and 170 deletions

View File

@ -21,7 +21,7 @@ gem 'awesome_print', :require => 'ap'
# Bundle the extra gems: # Bundle the extra gems:
# gem 'bj' # gem 'bj'
# gem 'nokogiri' gem 'nokogiri'
# gem 'sqlite3-ruby', :require => 'sqlite3' # gem 'sqlite3-ruby', :require => 'sqlite3'
# gem 'aws-s3', :require => 'aws/s3' # gem 'aws-s3', :require => 'aws/s3'

View File

@ -40,6 +40,7 @@ GEM
mime-types (~> 1.16) mime-types (~> 1.16)
treetop (~> 1.4.8) treetop (~> 1.4.8)
mime-types (1.16) mime-types (1.16)
nokogiri (1.4.5)
pg (0.11.0) pg (0.11.0)
polyglot (0.3.1) polyglot (0.3.1)
rack (1.2.3) rack (1.2.3)
@ -72,6 +73,7 @@ PLATFORMS
awesome_print awesome_print
pg pg
rails (= 3.0.8) rails (= 3.0.8)
sqlite3 sqlite3

View File

@ -21,6 +21,7 @@ class ArtistController < ApplicationController
data['albums'] = [] data['albums'] = []
albums = artist.albums albums = artist.albums
albums.each do |album| albums.each do |album|
if album.album_type == 'Album'
tmp_album = {name: album.name, year: album.year, pic: album.pic_url} tmp_album = {name: album.name, year: album.year, pic: album.pic_url}
album_tracks = [] album_tracks = []
bonus_tracks = [] bonus_tracks = []
@ -39,6 +40,7 @@ class ArtistController < ApplicationController
tmp_album['tracks'] = {album: album_tracks, bonus: bonus_tracks} tmp_album['tracks'] = {album: album_tracks, bonus: bonus_tracks}
data['albums'] << tmp_album data['albums'] << tmp_album
end end
render :json => data render :json => data
end end

View File

@ -1,82 +1,133 @@
class ImportController < ApplicationController class ImportController < ApplicationController
@@user_agent = 'BeatHaven.org'
@@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2'
def self.importArtist name def self.importArtist name
# Get artist from MB
mb_artist = MusicBrainzArtist.getByName(name) # Get artist info
# # Save artist artist_mb_xml = open(
artist_xml = open( 'http://musicbrainz.org/ws/2/artist/?query='+ URI.escape(name) +'&limit=1',
'User-Agent' => @@user_agent
artist_mb_data = artist_mb_xml.scan(/<artist.*?type=\"(.+?)\"\sid=\"([a-f0-9-]+?)\">.*?<name>(.+?)<\/name>/m)
artist_lastfm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+ 'http://ws.audioscrobbler.com/2.0/'+
'?method=artist.getinfo&api_key=04fda005dbf61a50af5abc3e90f111f2'+ '?method=artist.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name)).read '&artist='+ URI.escape(name)
# Save artist
artist = Artist.new artist = Artist.new
artist.name = mb_artist.name artist.name = artist_mb_data[0][2]
artist.desc = artist_xml.scan(/<summary><\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0] artist.desc = artist_lastfm_xml.scan(/<summary><\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0]
artist.pic_url = artist_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0] artist.pic_url = artist_lastfm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
artist.artist_type = artist_mb_data[0][0]
artist.mbid = artist_mb_data[0][1]
artist.save artist.save
# Get albums from MB # Get albums from MB
mb_albums = MusicBrainzAlbum.where('artist_id = ? AND release_type = 1', mb_artist.id).order('year ASC, id ASC') release_groups_mb_xml = open(
mb_albums.each do |mb_album| 'http://musicbrainz.org/ws/2/release-group/?artist='+ artist_mb_data[0][1],
'User-Agent' => @@user_agent
release_groups_mb_data = release_groups_mb_xml.scan(/<release-group\stype=\"([a-zA-Z]+?)\"\sid=\"([a-f0-9-]+?)\"><title>(.+?)<\/title>/m)
release_groups_mb_data.each do |mb_album|
# Get album releases from MB # Get album releases from MB
mb_releases = MusicBrainzRelease.where('album_id = ? AND release_type = 1', mb_album.id).order('date ASC, id ASC') releases_mb_xml = open(
unless mb_releases.empty? 'http://musicbrainz.org/ws/2/release/?release-group='+ mb_album[1],
'User-Agent' => @@user_agent
releases_mb_data = releases_mb_xml.scan(/<release\sid=\"([a-f0-9-]+?)\">.*?<status>([a-zA-Z]+?)<\/status>.*?<date>([0-9-]+?)<\/date>.*?<country>([A-Z]+?)<\/country>/m)
releases_mb_data.each do |item|
if item[2].length == 4
item[2] += '-01-01'
unless releases_mb_data.empty?
# Sorting releases by date
releases_mb_data.sort! { |a, b| a[2].to_date <=> b[2].to_date }
main_release = releases_mb_data.shift
# Get tracks from the first release and then exclude release # Get tracks from the first release and then exclude release
mb_main_tracks = MusicBrainzTrack.where('release_id IN (?)', mb_releases.shift.id).order('position ASC') main_tracks_mb_xml = open(
unless mb_main_tracks.empty? 'http://musicbrainz.org/ws/2/release/'+ main_release[0] +'/?inc=recordings',
'User-Agent' => @@user_agent
mb_tracks = main_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position><recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m)
mb_tracks.map! do |item|
item << main_release[3]
unless mb_tracks.empty?
# Saving album # Saving album
album_xml = open( begin
album_lastm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+ 'http://ws.audioscrobbler.com/2.0/'+
'?method=album.getinfo&api_key=04fda005dbf61a50af5abc3e90f111f2'+ '?method=album.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name).gsub(/\&/, '%26').gsub(/\?/, '%3F')+ '&artist='+ URI.escape(name).gsub(/\&/, '%26').gsub(/\?/, '%3F')+
'&album='+ URI.escape(mb_album.name).gsub(/\&/, '%26').gsub(/\?/, '%3F')).read '&album='+ URI.escape(mb_album[2]).gsub(/\&/, '%26').gsub(/\?/, '%3F')
album_image = album_lastm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
album_image = ''
album = Album.new album = Album.new
album.name = mb_album.name album.name = mb_album[2]
album.year = mb_album.year album.year = main_release[2].to_date.year
album.artist_id = artist.id album.artist_id = artist.id
album.status = 1 album.mbid = mb_album[1]
album.pic_url = album_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0] album.album_type = mb_album[0]
album.pic_url = album_image
album.save album.save
# Creating hashed array for matching # Creating hashed array for matching
mb_main_tracks_hashed = [] mb_main_tracks_hashed = []
mb_main_tracks.each do |mb_track| mb_tracks.each do |mb_track|
mb_main_tracks_hashed << mb_track.name.downcase.scan(/[a-z0-9]*/).join mb_main_tracks_hashed << mb_track[2].downcase.scan(/[a-z0-9]*/).join
end mb_track << main_release[3] # Release country
# Getting other releases ids mb_track << 0 # Bonus flag
release_ids = []
mb_releases.each do |mb_release|
release_ids << mb_release.id
end end
# Searching for tracks in other releases # Searching for tracks in other releases
mb_other_tracks = MusicBrainzTrack.where('release_id IN (?)', release_ids).order('position ASC') releases_mb_data.each do |mb_release|
mb_bonus_tracks = [] other_tracks_mb_xml = open(
mb_other_tracks.each do |mb_track| 'http://musicbrainz.org/ws/2/release/'+ mb_release[0] +'/?inc=recordings',
unless mb_main_tracks_hashed.include? mb_track.name.downcase.scan(/[a-z0-9]*/).join 'User-Agent' => @@user_agent
mb_bonus_tracks << mb_track ).read
mb_other_tracks = other_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position><recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m)
mb_other_tracks.each do |item|
unless mb_main_tracks_hashed.include? item[2].downcase.scan(/[a-z0-9]*/).join
item << mb_release[3] # Release country
item << 1 # Bonus flag
mb_tracks << item
end end
end # mb_other_tracks.each do |mb_track| end
# Saving tracks # Saving tracks
mb_main_tracks.each do |mb_track| mb_tracks.each do |mb_track|
track = Track.new track = Track.new
track.name = mb_track.name track.name = mb_track[2]
track.album_id = album.id track.album_id = album.id
track.position = mb_track.position track.position = mb_track[0]
track.length = mb_track.length track.length = mb_track[3]
track.bonus = 0 track.country = mb_track[4]
track.live = mb_track.name.downcase.include? 'live' track.bonus = mb_track[5]
track.acoustic = mb_track.name.downcase.include? 'acoustic' track.live = mb_track[2].downcase.include? 'live'
track.acoustic = mb_track[2].downcase.include? 'acoustic'
track.save track.save
end #mb_main_tracks.each do |mb_track| end
mb_bonus_tracks.each do |mb_track|
track = Track.new end # unless mb_main_tracks.empty?
track.name = mb_track.name
track.album_id = album.id end # unless mb_releases.empty?
track.position = mb_track.position
track.length = mb_track.length end # mb_albums.each do |mb_album|
track.bonus = 1
track.live = mb_track.name.downcase.include? 'live' end # def self.importArtist name
track.acoustic = mb_track.name.downcase.include? 'acoustic'
end # mb_bonus_tracks.each do |mb_track|
end # unless mb_main_tracks.empty?
end # unless mb_releases.empty?
end # mb_albums.each do |mb_album|
end # def self.importArtist name
end end

View File

@ -1,3 +1,3 @@
class Artist < ActiveRecord::Base class Artist < ActiveRecord::Base
has_many :albums, :conditions => "status = 1", :order => 'year ASC, id ASC' has_many :albums, :order => 'year ASC, id ASC'
end end

View File

@ -1,61 +0,0 @@
class MusicBrainzAlbum < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_release_group'
belongs_to :music_brainz_artist
has_many :music_brainz_releases, :conditions => "release_type = 1", :order => 'date ASC, id ASC'
require 'iconv'
def cover artist
covers = AlbumPic.where(album_id: id).first
unless covers.nil?
q_artist = CGI::escape(artist)
q_album = CGI::escape(name)
path = open(
'http://ws.audioscrobbler.com/2.0/' <<
'?method=album.getinfo' <<
'&api_key=04fda005dbf61a50af5abc3e90f111f2' <<
'&artist=' << q_artist <<
'&album=' << q_album
m = path.scan(/<image\ssize=\"(.*)\">(.*)<\/image>/i)
album_id: id,
small: m[0][1],
medium: m[1][1],
large: m[2][1],
extralarge: m[3][1],
mega: m[4][1]
def tracksInDb
tracks = []
result = []
tracks_in_db = []
track_ids = []
self.releases.first.tracks.each do |track|
tracks << track
track_ids << track.id.to_s
in_db = TrackData.any_in(id: track_ids).only("id")
unless in_db.nil?
in_db.each do |track|
tracks_in_db << track["id"].to_i;
tracks.each do |track|
result << {
:object => track,
:in_db => tracks_in_db.include?(track.id) ? nil : true

View File

@ -1,20 +0,0 @@
class MusicBrainzArtist < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_artist'
has_many :albums, :conditions => "release_type = 1", :order => 'year ASC, id ASC'
def self.getByName(name)
MusicBrainzArtist.first(:conditions => ['name = ? AND id=credit_id', name], :order => 'rating DESC')
def self.getLastFmAutocomplete(query)
return nil if query.nil? or query.strip.empty?
json = ActiveSupport::JSON.decode(open(
'http://www.last.fm/search/autocomplete' <<
'?q=' << URI.escape(query)
return json.empty? ? nil : json

View File

@ -1,7 +0,0 @@
class MusicBrainzRelease < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_release'
belongs_to :music_brainz_album
has_many :music_brainz_tracks, :order => 'position ASC'

View File

@ -1,6 +0,0 @@
class MusicBrainzTrack < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_track'
belongs_to :music_brainz_release

View File

@ -0,0 +1,11 @@
class AddTypeAndMbidToArtists < ActiveRecord::Migration
def self.up
add_column :artists, :type, :string
add_column :artists, :mbid, :string
def self.down
remove_column :artists, :mbid
remove_column :artists, :type

View File

@ -0,0 +1,13 @@
class AddTypeAndMbidToAlbums < ActiveRecord::Migration
def self.up
remove_column :albums, :status
add_column :albums, :type, :string
add_column :albums, :mbid, :string
def self.down
remove_column :albums, :mbid
remove_column :albums, :type
add_column :albums, :type, :integer

View File

@ -0,0 +1,9 @@
class AddCountryToTrack < ActiveRecord::Migration
def self.up
add_column :tracks, :country, :string
def self.down
remove_column :tracks, :country

View File

@ -0,0 +1,11 @@
class FixColumnNames < ActiveRecord::Migration
def self.up
rename_column :artists, :type, :artist_type
rename_column :albums, :type, :album_type
def self.down
rename_column :artists, :artist_type, :type
rename_column :albums, :album_type, :type

View File

@ -10,16 +10,17 @@
# #
# It's strongly recommended to check this file into your version control system. # It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110614175827) do ActiveRecord::Schema.define(:version => 20110617213912) do
create_table "albums", :force => true do |t| create_table "albums", :force => true do |t|
t.string "name" t.string "name"
t.integer "artist_id" t.integer "artist_id"
t.integer "year" t.integer "year"
t.integer "status"
t.string "pic_url" t.string "pic_url"
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.string "album_type"
t.string "mbid"
end end
create_table "artists", :force => true do |t| create_table "artists", :force => true do |t|
@ -28,6 +29,8 @@ ActiveRecord::Schema.define(:version => 20110614175827) do
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.string "pic_url" t.string "pic_url"
t.string "artist_type"
t.string "mbid"
end end
create_table "tracks", :force => true do |t| create_table "tracks", :force => true do |t|
@ -40,6 +43,7 @@ ActiveRecord::Schema.define(:version => 20110614175827) do
t.datetime "created_at" t.datetime "created_at"
t.datetime "updated_at" t.datetime "updated_at"
t.integer "length" t.integer "length"
t.string "country"
end end
end end

View File

@ -1,7 +1,7 @@
require 'uri' require 'uri'
require 'open-uri' require 'open-uri'
namespace :db do namespace :db do
desc 'Imports data from MusicBrainz database' desc 'Imports test data from MusicBrainz database'
task :import => :environment do task :import => :environment do
ati = ['Jet'] ati = ['Jet']
ati.each do |name| ati.each do |name|