1
0
Fork 0

Loading data from MusicBrainz Web Service, stability fixes, database reorganized. Closes #39

This commit is contained in:
magnolia-fan 2011-06-18 01:49:32 +04:00
parent a66300c8e4
commit 17f5edf841
15 changed files with 179 additions and 170 deletions

View File

@ -21,7 +21,7 @@ gem 'awesome_print', :require => 'ap'
# Bundle the extra gems:
# gem 'bj'
# gem 'nokogiri'
gem 'nokogiri'
# gem 'sqlite3-ruby', :require => 'sqlite3'
# gem 'aws-s3', :require => 'aws/s3'

View File

@ -40,6 +40,7 @@ GEM
mime-types (~> 1.16)
treetop (~> 1.4.8)
mime-types (1.16)
nokogiri (1.4.5)
pg (0.11.0)
polyglot (0.3.1)
rack (1.2.3)
@ -72,6 +73,7 @@ PLATFORMS
DEPENDENCIES
awesome_print
nokogiri
pg
rails (= 3.0.8)
sqlite3

View File

@ -21,23 +21,25 @@ class ArtistController < ApplicationController
data['albums'] = []
albums = artist.albums
albums.each do |album|
tmp_album = {name: album.name, year: album.year, pic: album.pic_url}
album_tracks = []
bonus_tracks = []
album.tracks.each do |track|
tmp_track = {name: track.name, live: track.live, acoustic: track.acoustic}
if track.length
time = (track.length / 1000).round
time_m = (time / 60).floor
time_s = time - time_m * 60
tmp_track['duration'] = time_m.to_s + ':' + (time_s < 10 ? '0' : '') + time_s.to_s
else
tmp_track['duration'] = '0:00'
if album.album_type == 'Album'
tmp_album = {name: album.name, year: album.year, pic: album.pic_url}
album_tracks = []
bonus_tracks = []
album.tracks.each do |track|
tmp_track = {name: track.name, live: track.live, acoustic: track.acoustic}
if track.length
time = (track.length / 1000).round
time_m = (time / 60).floor
time_s = time - time_m * 60
tmp_track['duration'] = time_m.to_s + ':' + (time_s < 10 ? '0' : '') + time_s.to_s
else
tmp_track['duration'] = '0:00'
end
(track.bonus == 0 ? album_tracks : bonus_tracks) << tmp_track
end
(track.bonus == 0 ? album_tracks : bonus_tracks) << tmp_track
tmp_album['tracks'] = {album: album_tracks, bonus: bonus_tracks}
data['albums'] << tmp_album
end
tmp_album['tracks'] = {album: album_tracks, bonus: bonus_tracks}
data['albums'] << tmp_album
end
render :json => data
end

View File

@ -1,82 +1,133 @@
class ImportController < ApplicationController
@@user_agent = 'BeatHaven.org'
@@lastfm_api_key = '04fda005dbf61a50af5abc3e90f111f2'
def self.importArtist name
# Get artist from MB
mb_artist = MusicBrainzArtist.getByName(name)
# # Save artist
artist_xml = open(
# Get artist info
artist_mb_xml = open(
'http://musicbrainz.org/ws/2/artist/?query='+ URI.escape(name) +'&limit=1',
'User-Agent' => @@user_agent
).read
artist_mb_data = artist_mb_xml.scan(/<artist.*?type=\"(.+?)\"\sid=\"([a-f0-9-]+?)\">.*?<name>(.+?)<\/name>/m)
artist_lastfm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+
'?method=artist.getinfo&api_key=04fda005dbf61a50af5abc3e90f111f2'+
'&artist='+ URI.escape(name)).read
'?method=artist.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name)
).read
# Save artist
artist = Artist.new
artist.name = mb_artist.name
artist.desc = artist_xml.scan(/<summary><\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0]
artist.pic_url = artist_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
artist.name = artist_mb_data[0][2]
artist.desc = artist_lastfm_xml.scan(/<summary><\!\[CDATA\[?(.*)\]\]><\/summary>/m)[0][0]
artist.pic_url = artist_lastfm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
artist.artist_type = artist_mb_data[0][0]
artist.mbid = artist_mb_data[0][1]
artist.save
# Get albums from MB
mb_albums = MusicBrainzAlbum.where('artist_id = ? AND release_type = 1', mb_artist.id).order('year ASC, id ASC')
mb_albums.each do |mb_album|
release_groups_mb_xml = open(
'http://musicbrainz.org/ws/2/release-group/?artist='+ artist_mb_data[0][1],
'User-Agent' => @@user_agent
).read
release_groups_mb_data = release_groups_mb_xml.scan(/<release-group\stype=\"([a-zA-Z]+?)\"\sid=\"([a-f0-9-]+?)\"><title>(.+?)<\/title>/m)
release_groups_mb_data.each do |mb_album|
# Get album releases from MB
mb_releases = MusicBrainzRelease.where('album_id = ? AND release_type = 1', mb_album.id).order('date ASC, id ASC')
unless mb_releases.empty?
releases_mb_xml = open(
'http://musicbrainz.org/ws/2/release/?release-group='+ mb_album[1],
'User-Agent' => @@user_agent
).read
releases_mb_data = releases_mb_xml.scan(/<release\sid=\"([a-f0-9-]+?)\">.*?<status>([a-zA-Z]+?)<\/status>.*?<date>([0-9-]+?)<\/date>.*?<country>([A-Z]+?)<\/country>/m)
releases_mb_data.each do |item|
if item[2].length == 4
item[2] += '-01-01'
end
end
unless releases_mb_data.empty?
# Sorting releases by date
releases_mb_data.sort! { |a, b| a[2].to_date <=> b[2].to_date }
main_release = releases_mb_data.shift
# Get tracks from the first release and then exclude release
mb_main_tracks = MusicBrainzTrack.where('release_id IN (?)', mb_releases.shift.id).order('position ASC')
unless mb_main_tracks.empty?
main_tracks_mb_xml = open(
'http://musicbrainz.org/ws/2/release/'+ main_release[0] +'/?inc=recordings',
'User-Agent' => @@user_agent
).read
mb_tracks = main_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position><recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m)
mb_tracks.map! do |item|
item << main_release[3]
end
unless mb_tracks.empty?
# Saving album
album_xml = open(
'http://ws.audioscrobbler.com/2.0/'+
'?method=album.getinfo&api_key=04fda005dbf61a50af5abc3e90f111f2'+
'&artist='+ URI.escape(name).gsub(/\&/, '%26').gsub(/\?/, '%3F')+
'&album='+ URI.escape(mb_album.name).gsub(/\&/, '%26').gsub(/\?/, '%3F')).read
begin
album_lastm_xml = open(
'http://ws.audioscrobbler.com/2.0/'+
'?method=album.getinfo&api_key='+ @@lastfm_api_key +
'&artist='+ URI.escape(name).gsub(/\&/, '%26').gsub(/\?/, '%3F')+
'&album='+ URI.escape(mb_album[2]).gsub(/\&/, '%26').gsub(/\?/, '%3F')
).read
album_image = album_lastm_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
rescue
album_image = ''
end
album = Album.new
album.name = mb_album.name
album.year = mb_album.year
album.name = mb_album[2]
album.year = main_release[2].to_date.year
album.artist_id = artist.id
album.status = 1
album.pic_url = album_xml.scan(/<image\ssize=\"extralarge\">?(.*)<\/image>/)[0][0]
album.mbid = mb_album[1]
album.album_type = mb_album[0]
album.pic_url = album_image
album.save
# Creating hashed array for matching
mb_main_tracks_hashed = []
mb_main_tracks.each do |mb_track|
mb_main_tracks_hashed << mb_track.name.downcase.scan(/[a-z0-9]*/).join
end
# Getting other releases ids
release_ids = []
mb_releases.each do |mb_release|
release_ids << mb_release.id
mb_tracks.each do |mb_track|
mb_main_tracks_hashed << mb_track[2].downcase.scan(/[a-z0-9]*/).join
mb_track << main_release[3] # Release country
mb_track << 0 # Bonus flag
end
# Searching for tracks in other releases
mb_other_tracks = MusicBrainzTrack.where('release_id IN (?)', release_ids).order('position ASC')
mb_bonus_tracks = []
mb_other_tracks.each do |mb_track|
unless mb_main_tracks_hashed.include? mb_track.name.downcase.scan(/[a-z0-9]*/).join
mb_bonus_tracks << mb_track
releases_mb_data.each do |mb_release|
other_tracks_mb_xml = open(
'http://musicbrainz.org/ws/2/release/'+ mb_release[0] +'/?inc=recordings',
'User-Agent' => @@user_agent
).read
mb_other_tracks = other_tracks_mb_xml.scan(/<track>.*?<position>(\d+?)<\/position><recording\sid=\"([a-f0-9-]+?)\"><title>(.*?)<\/title><length>(.*?)<\/length>/m)
mb_other_tracks.each do |item|
unless mb_main_tracks_hashed.include? item[2].downcase.scan(/[a-z0-9]*/).join
item << mb_release[3] # Release country
item << 1 # Bonus flag
mb_tracks << item
end
end
end # mb_other_tracks.each do |mb_track|
end
# Saving tracks
mb_main_tracks.each do |mb_track|
mb_tracks.each do |mb_track|
track = Track.new
track.name = mb_track.name
track.name = mb_track[2]
track.album_id = album.id
track.position = mb_track.position
track.length = mb_track.length
track.bonus = 0
track.live = mb_track.name.downcase.include? 'live'
track.acoustic = mb_track.name.downcase.include? 'acoustic'
track.position = mb_track[0]
track.length = mb_track[3]
track.country = mb_track[4]
track.bonus = mb_track[5]
track.live = mb_track[2].downcase.include? 'live'
track.acoustic = mb_track[2].downcase.include? 'acoustic'
track.save
end #mb_main_tracks.each do |mb_track|
mb_bonus_tracks.each do |mb_track|
track = Track.new
track.name = mb_track.name
track.album_id = album.id
track.position = mb_track.position
track.length = mb_track.length
track.bonus = 1
track.live = mb_track.name.downcase.include? 'live'
track.acoustic = mb_track.name.downcase.include? 'acoustic'
track.save
end # mb_bonus_tracks.each do |mb_track|
end
end # unless mb_main_tracks.empty?
end # unless mb_releases.empty?
end # mb_albums.each do |mb_album|
end # def self.importArtist name
end

View File

@ -1,3 +1,3 @@
class Artist < ActiveRecord::Base
has_many :albums, :conditions => "status = 1", :order => 'year ASC, id ASC'
has_many :albums, :order => 'year ASC, id ASC'
end

View File

@ -1,61 +0,0 @@
class MusicBrainzAlbum < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_release_group'
belongs_to :music_brainz_artist
has_many :music_brainz_releases, :conditions => "release_type = 1", :order => 'date ASC, id ASC'
require 'iconv'
def cover artist
covers = AlbumPic.where(album_id: id).first
unless covers.nil?
covers.extralarge
else
q_artist = CGI::escape(artist)
q_album = CGI::escape(name)
path = open(
'http://ws.audioscrobbler.com/2.0/' <<
'?method=album.getinfo' <<
'&api_key=04fda005dbf61a50af5abc3e90f111f2' <<
'&artist=' << q_artist <<
'&album=' << q_album
).read
m = path.scan(/<image\ssize=\"(.*)\">(.*)<\/image>/i)
AlbumPic.where(
album_id: id,
small: m[0][1],
medium: m[1][1],
large: m[2][1],
extralarge: m[3][1],
mega: m[4][1]
).create
m[3][1]
end
end
def tracksInDb
tracks = []
result = []
tracks_in_db = []
track_ids = []
self.releases.first.tracks.each do |track|
tracks << track
track_ids << track.id.to_s
end
in_db = TrackData.any_in(id: track_ids).only("id")
unless in_db.nil?
in_db.each do |track|
tracks_in_db << track["id"].to_i;
end
end
tracks.each do |track|
result << {
:object => track,
:in_db => tracks_in_db.include?(track.id) ? nil : true
}
end
result
end
end

View File

@ -1,20 +0,0 @@
class MusicBrainzArtist < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_artist'
has_many :albums, :conditions => "release_type = 1", :order => 'year ASC, id ASC'
def self.getByName(name)
MusicBrainzArtist.first(:conditions => ['name = ? AND id=credit_id', name], :order => 'rating DESC')
end
def self.getLastFmAutocomplete(query)
return nil if query.nil? or query.strip.empty?
json = ActiveSupport::JSON.decode(open(
'http://www.last.fm/search/autocomplete' <<
'?q=' << URI.escape(query)
).read)
return json.empty? ? nil : json
end
end

View File

@ -1,7 +0,0 @@
class MusicBrainzRelease < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_release'
belongs_to :music_brainz_album
has_many :music_brainz_tracks, :order => 'position ASC'
end

View File

@ -1,6 +0,0 @@
class MusicBrainzTrack < ActiveRecord::Base
establish_connection :musicbrainz
set_table_name 'musicbrainz.bh_track'
belongs_to :music_brainz_release
end

View File

@ -0,0 +1,11 @@
class AddTypeAndMbidToArtists < ActiveRecord::Migration
def self.up
add_column :artists, :type, :string
add_column :artists, :mbid, :string
end
def self.down
remove_column :artists, :mbid
remove_column :artists, :type
end
end

View File

@ -0,0 +1,13 @@
class AddTypeAndMbidToAlbums < ActiveRecord::Migration
def self.up
remove_column :albums, :status
add_column :albums, :type, :string
add_column :albums, :mbid, :string
end
def self.down
remove_column :albums, :mbid
remove_column :albums, :type
add_column :albums, :type, :integer
end
end

View File

@ -0,0 +1,9 @@
class AddCountryToTrack < ActiveRecord::Migration
def self.up
add_column :tracks, :country, :string
end
def self.down
remove_column :tracks, :country
end
end

View File

@ -0,0 +1,11 @@
class FixColumnNames < ActiveRecord::Migration
def self.up
rename_column :artists, :type, :artist_type
rename_column :albums, :type, :album_type
end
def self.down
rename_column :artists, :artist_type, :type
rename_column :albums, :album_type, :type
end
end

View File

@ -10,16 +10,17 @@
#
# It's strongly recommended to check this file into your version control system.
ActiveRecord::Schema.define(:version => 20110614175827) do
ActiveRecord::Schema.define(:version => 20110617213912) do
create_table "albums", :force => true do |t|
t.string "name"
t.integer "artist_id"
t.integer "year"
t.integer "status"
t.string "pic_url"
t.datetime "created_at"
t.datetime "updated_at"
t.string "album_type"
t.string "mbid"
end
create_table "artists", :force => true do |t|
@ -28,6 +29,8 @@ ActiveRecord::Schema.define(:version => 20110614175827) do
t.datetime "created_at"
t.datetime "updated_at"
t.string "pic_url"
t.string "artist_type"
t.string "mbid"
end
create_table "tracks", :force => true do |t|
@ -40,6 +43,7 @@ ActiveRecord::Schema.define(:version => 20110614175827) do
t.datetime "created_at"
t.datetime "updated_at"
t.integer "length"
t.string "country"
end
end

View File

@ -1,7 +1,7 @@
require 'uri'
require 'open-uri'
namespace :db do
desc 'Imports data from MusicBrainz database'
desc 'Imports test data from MusicBrainz database'
task :import => :environment do
ati = ['Jet']
ati.each do |name|